airbyte-cdk 6.26.0.dev4106__py3-none-any.whl → 6.26.0.dev4108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -3
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -2
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +22 -13
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +71 -34
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +33 -4
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +93 -27
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +5 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +22 -5
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +138 -38
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +49 -25
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -1
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +18 -11
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +51 -0
- airbyte_cdk/sources/file_based/file_based_source.py +16 -55
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +19 -31
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -7
- airbyte_cdk/sources/file_based/stream/identities_stream.py +5 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +166 -83
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/sources/utils/transform.py +23 -2
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +8 -1
- airbyte_cdk-6.26.0.dev4108.dist-info/LICENSE_SHORT +1 -0
- {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/METADATA +5 -5
- {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/RECORD +50 -48
- {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/file_based/config/permissions.py +0 -34
- {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/entry_points.txt +0 -0
@@ -21,7 +21,6 @@ import pkgutil
|
|
21
21
|
import sys
|
22
22
|
import traceback
|
23
23
|
from collections.abc import Mapping
|
24
|
-
from datetime import datetime
|
25
24
|
from pathlib import Path
|
26
25
|
from typing import Any, cast
|
27
26
|
|
@@ -44,6 +43,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
|
44
43
|
)
|
45
44
|
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
46
45
|
from airbyte_cdk.sources.source import TState
|
46
|
+
from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
|
47
47
|
|
48
48
|
|
49
49
|
class SourceLocalYaml(YamlDeclarativeSource):
|
@@ -101,7 +101,7 @@ def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
|
|
101
101
|
type=Type.TRACE,
|
102
102
|
trace=AirbyteTraceMessage(
|
103
103
|
type=TraceType.ERROR,
|
104
|
-
emitted_at=
|
104
|
+
emitted_at=ab_datetime_now().to_epoch_millis(),
|
105
105
|
error=AirbyteErrorTraceMessage(
|
106
106
|
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
107
107
|
stack_trace=traceback.format_exc(),
|
@@ -191,7 +191,7 @@ def create_declarative_source(
|
|
191
191
|
type=Type.TRACE,
|
192
192
|
trace=AirbyteTraceMessage(
|
193
193
|
type=TraceType.ERROR,
|
194
|
-
emitted_at=
|
194
|
+
emitted_at=ab_datetime_now().to_epoch_millis(),
|
195
195
|
error=AirbyteErrorTraceMessage(
|
196
196
|
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
197
197
|
stack_trace=traceback.format_exc(),
|
@@ -3,7 +3,6 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import dataclasses
|
6
|
-
from datetime import datetime
|
7
6
|
from typing import Any, List, Mapping
|
8
7
|
|
9
8
|
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
@@ -21,6 +20,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
21
20
|
ModelToComponentFactory,
|
22
21
|
)
|
23
22
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
23
|
+
from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
|
24
24
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
25
25
|
|
26
26
|
DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
|
@@ -114,4 +114,4 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage:
|
|
114
114
|
|
115
115
|
|
116
116
|
def _emitted_at() -> int:
|
117
|
-
return
|
117
|
+
return ab_datetime_now().to_epoch_millis()
|
@@ -437,10 +437,10 @@ class AsyncJobOrchestrator:
|
|
437
437
|
yield from self._process_running_partitions_and_yield_completed_ones()
|
438
438
|
self._wait_on_status_update()
|
439
439
|
except Exception as exception:
|
440
|
+
LOGGER.warning(
|
441
|
+
f"Caught exception that stops the processing of the jobs: {exception}. Traceback: {traceback.format_exc()}"
|
442
|
+
)
|
440
443
|
if self._is_breaking_exception(exception):
|
441
|
-
LOGGER.warning(
|
442
|
-
f"Caught exception that stops the processing of the jobs: {exception}"
|
443
|
-
)
|
444
444
|
self._abort_all_running_jobs()
|
445
445
|
raise exception
|
446
446
|
|
@@ -482,16 +482,16 @@ class AsyncJobOrchestrator:
|
|
482
482
|
and exception.failure_type == FailureType.config_error
|
483
483
|
)
|
484
484
|
|
485
|
-
def fetch_records(self,
|
485
|
+
def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
|
486
486
|
"""
|
487
|
-
Fetches records from the given
|
487
|
+
Fetches records from the given jobs.
|
488
488
|
|
489
489
|
Args:
|
490
|
-
|
490
|
+
async_jobs Iterable[AsyncJob]: The list of AsyncJobs.
|
491
491
|
|
492
492
|
Yields:
|
493
493
|
Iterable[Mapping[str, Any]]: The fetched records from the jobs.
|
494
494
|
"""
|
495
|
-
for job in
|
495
|
+
for job in async_jobs:
|
496
496
|
yield from self._job_repository.fetch_records(job)
|
497
497
|
self._job_repository.delete(job)
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import base64
|
6
|
+
import json
|
6
7
|
from dataclasses import InitVar, dataclass
|
7
8
|
from datetime import datetime
|
8
9
|
from typing import Any, Mapping, Optional, Union
|
@@ -104,21 +105,21 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
104
105
|
)
|
105
106
|
|
106
107
|
def _get_jwt_headers(self) -> dict[str, Any]:
|
107
|
-
"""
|
108
|
+
"""
|
108
109
|
Builds and returns the headers used when signing the JWT.
|
109
110
|
"""
|
110
|
-
headers = self._additional_jwt_headers.eval(self.config)
|
111
|
+
headers = self._additional_jwt_headers.eval(self.config, json_loads=json.loads)
|
111
112
|
if any(prop in headers for prop in ["kid", "alg", "typ", "cty"]):
|
112
113
|
raise ValueError(
|
113
114
|
"'kid', 'alg', 'typ', 'cty' are reserved headers and should not be set as part of 'additional_jwt_headers'"
|
114
115
|
)
|
115
116
|
|
116
117
|
if self._kid:
|
117
|
-
headers["kid"] = self._kid.eval(self.config)
|
118
|
+
headers["kid"] = self._kid.eval(self.config, json_loads=json.loads)
|
118
119
|
if self._typ:
|
119
|
-
headers["typ"] = self._typ.eval(self.config)
|
120
|
+
headers["typ"] = self._typ.eval(self.config, json_loads=json.loads)
|
120
121
|
if self._cty:
|
121
|
-
headers["cty"] = self._cty.eval(self.config)
|
122
|
+
headers["cty"] = self._cty.eval(self.config, json_loads=json.loads)
|
122
123
|
headers["alg"] = self._algorithm
|
123
124
|
return headers
|
124
125
|
|
@@ -130,18 +131,19 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
130
131
|
exp = now + self._token_duration if isinstance(self._token_duration, int) else now
|
131
132
|
nbf = now
|
132
133
|
|
133
|
-
payload = self._additional_jwt_payload.eval(self.config)
|
134
|
+
payload = self._additional_jwt_payload.eval(self.config, json_loads=json.loads)
|
134
135
|
if any(prop in payload for prop in ["iss", "sub", "aud", "iat", "exp", "nbf"]):
|
135
136
|
raise ValueError(
|
136
137
|
"'iss', 'sub', 'aud', 'iat', 'exp', 'nbf' are reserved properties and should not be set as part of 'additional_jwt_payload'"
|
137
138
|
)
|
138
139
|
|
139
140
|
if self._iss:
|
140
|
-
payload["iss"] = self._iss.eval(self.config)
|
141
|
+
payload["iss"] = self._iss.eval(self.config, json_loads=json.loads)
|
141
142
|
if self._sub:
|
142
|
-
payload["sub"] = self._sub.eval(self.config)
|
143
|
+
payload["sub"] = self._sub.eval(self.config, json_loads=json.loads)
|
143
144
|
if self._aud:
|
144
|
-
payload["aud"] = self._aud.eval(self.config)
|
145
|
+
payload["aud"] = self._aud.eval(self.config, json_loads=json.loads)
|
146
|
+
|
145
147
|
payload["iat"] = now
|
146
148
|
payload["exp"] = exp
|
147
149
|
payload["nbf"] = nbf
|
@@ -151,7 +153,7 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
151
153
|
"""
|
152
154
|
Returns the secret key used to sign the JWT.
|
153
155
|
"""
|
154
|
-
secret_key: str = self._secret_key.eval(self.config)
|
156
|
+
secret_key: str = self._secret_key.eval(self.config, json_loads=json.loads)
|
155
157
|
return (
|
156
158
|
base64.b64encode(secret_key.encode()).decode()
|
157
159
|
if self._base64_encode_secret_key
|
@@ -176,7 +178,11 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
176
178
|
"""
|
177
179
|
Returns the header prefix to be used when attaching the token to the request.
|
178
180
|
"""
|
179
|
-
return
|
181
|
+
return (
|
182
|
+
self._header_prefix.eval(self.config, json_loads=json.loads)
|
183
|
+
if self._header_prefix
|
184
|
+
else None
|
185
|
+
)
|
180
186
|
|
181
187
|
@property
|
182
188
|
def auth_header(self) -> str:
|
@@ -3,10 +3,9 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
+
from datetime import timedelta
|
6
7
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
8
|
|
8
|
-
import pendulum
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
11
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
12
11
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
@@ -18,6 +17,7 @@ from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import
|
|
18
17
|
from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import (
|
19
18
|
SingleUseRefreshTokenOauth2Authenticator,
|
20
19
|
)
|
20
|
+
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
21
21
|
|
22
22
|
|
23
23
|
@dataclass
|
@@ -53,7 +53,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
53
53
|
refresh_token: Optional[Union[InterpolatedString, str]] = None
|
54
54
|
scopes: Optional[List[str]] = None
|
55
55
|
token_expiry_date: Optional[Union[InterpolatedString, str]] = None
|
56
|
-
_token_expiry_date: Optional[
|
56
|
+
_token_expiry_date: Optional[AirbyteDateTime] = field(init=False, repr=False, default=None)
|
57
57
|
token_expiry_date_format: Optional[str] = None
|
58
58
|
token_expiry_is_time_of_expiration: bool = False
|
59
59
|
access_token_name: Union[InterpolatedString, str] = "access_token"
|
@@ -122,15 +122,24 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
122
122
|
self._refresh_request_headers = InterpolatedMapping(
|
123
123
|
self.refresh_request_headers or {}, parameters=parameters
|
124
124
|
)
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
125
|
+
try:
|
126
|
+
if (
|
127
|
+
isinstance(self.token_expiry_date, (int, str))
|
128
|
+
and str(self.token_expiry_date).isdigit()
|
129
|
+
):
|
130
|
+
self._token_expiry_date = ab_datetime_parse(self.token_expiry_date)
|
131
|
+
else:
|
132
|
+
self._token_expiry_date = (
|
133
|
+
ab_datetime_parse(
|
134
|
+
InterpolatedString.create(
|
135
|
+
self.token_expiry_date, parameters=parameters
|
136
|
+
).eval(self.config)
|
137
|
+
)
|
138
|
+
if self.token_expiry_date
|
139
|
+
else ab_datetime_now() - timedelta(days=1)
|
129
140
|
)
|
130
|
-
|
131
|
-
|
132
|
-
else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
|
133
|
-
)
|
141
|
+
except ValueError as e:
|
142
|
+
raise ValueError(f"Invalid token expiry date format: {e}")
|
134
143
|
self.use_profile_assertion = (
|
135
144
|
InterpolatedBoolean(self.use_profile_assertion, parameters=parameters)
|
136
145
|
if isinstance(self.use_profile_assertion, str)
|
@@ -222,8 +231,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
222
231
|
def get_refresh_request_headers(self) -> Mapping[str, Any]:
|
223
232
|
return self._refresh_request_headers.eval(self.config)
|
224
233
|
|
225
|
-
def get_token_expiry_date(self) ->
|
226
|
-
return self._token_expiry_date # type: ignore # _token_expiry_date is
|
234
|
+
def get_token_expiry_date(self) -> AirbyteDateTime:
|
235
|
+
return self._token_expiry_date # type: ignore # _token_expiry_date is an AirbyteDateTime. It is never None despite what mypy thinks
|
227
236
|
|
228
237
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
229
238
|
self._token_expiry_date = self._parse_token_expiration_date(value)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import base64
|
6
6
|
import logging
|
7
7
|
from dataclasses import InitVar, dataclass
|
8
|
-
from typing import Any, Mapping, Union
|
8
|
+
from typing import Any, Mapping, MutableMapping, Union
|
9
9
|
|
10
10
|
import requests
|
11
11
|
from cachetools import TTLCache, cached
|
@@ -45,11 +45,6 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
|
|
45
45
|
config: Config
|
46
46
|
parameters: InitVar[Mapping[str, Any]]
|
47
47
|
|
48
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
49
|
-
self._field_name = InterpolatedString.create(
|
50
|
-
self.request_option.field_name, parameters=parameters
|
51
|
-
)
|
52
|
-
|
53
48
|
@property
|
54
49
|
def auth_header(self) -> str:
|
55
50
|
options = self._get_request_options(RequestOptionType.header)
|
@@ -60,9 +55,9 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
|
|
60
55
|
return self.token_provider.get_token()
|
61
56
|
|
62
57
|
def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]:
|
63
|
-
options = {}
|
58
|
+
options: MutableMapping[str, Any] = {}
|
64
59
|
if self.request_option.inject_into == option_type:
|
65
|
-
|
60
|
+
self.request_option.inject_into_request(options, self.token, self.config)
|
66
61
|
return options
|
67
62
|
|
68
63
|
def get_request_params(self) -> Mapping[str, Any]:
|
@@ -9,9 +9,7 @@ from dataclasses import InitVar, dataclass, field
|
|
9
9
|
from typing import Any, List, Mapping, Optional, Union
|
10
10
|
|
11
11
|
import dpath
|
12
|
-
import pendulum
|
13
12
|
from isodate import Duration
|
14
|
-
from pendulum import DateTime
|
15
13
|
|
16
14
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
17
15
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
@@ -21,6 +19,7 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
|
21
19
|
from airbyte_cdk.sources.http_logger import format_http_message
|
22
20
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
23
21
|
from airbyte_cdk.sources.types import Config
|
22
|
+
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now
|
24
23
|
|
25
24
|
|
26
25
|
class TokenProvider:
|
@@ -38,7 +37,7 @@ class SessionTokenProvider(TokenProvider):
|
|
38
37
|
message_repository: MessageRepository = NoopMessageRepository()
|
39
38
|
decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={}))
|
40
39
|
|
41
|
-
_next_expiration_time: Optional[
|
40
|
+
_next_expiration_time: Optional[AirbyteDateTime] = None
|
42
41
|
_token: Optional[str] = None
|
43
42
|
|
44
43
|
def get_token(self) -> str:
|
@@ -48,7 +47,7 @@ class SessionTokenProvider(TokenProvider):
|
|
48
47
|
return self._token
|
49
48
|
|
50
49
|
def _refresh_if_necessary(self) -> None:
|
51
|
-
if self._next_expiration_time is None or self._next_expiration_time <
|
50
|
+
if self._next_expiration_time is None or self._next_expiration_time < ab_datetime_now():
|
52
51
|
self._refresh()
|
53
52
|
|
54
53
|
def _refresh(self) -> None:
|
@@ -65,7 +64,7 @@ class SessionTokenProvider(TokenProvider):
|
|
65
64
|
raise ReadException("Failed to get session token, response got ignored by requester")
|
66
65
|
session_token = dpath.get(next(self.decoder.decode(response)), self.session_token_path)
|
67
66
|
if self.expiration_duration is not None:
|
68
|
-
self._next_expiration_time =
|
67
|
+
self._next_expiration_time = ab_datetime_now() + self.expiration_duration
|
69
68
|
self._token = session_token # type: ignore # Returned decoded response will be Mapping and therefore session_token will be str or None
|
70
69
|
|
71
70
|
|
@@ -21,8 +21,12 @@ class CheckDynamicStream(ConnectionChecker):
|
|
21
21
|
stream_count (int): numbers of streams to check
|
22
22
|
"""
|
23
23
|
|
24
|
+
# TODO: Add field stream_names to check_connection for static streams
|
25
|
+
# https://github.com/airbytehq/airbyte-python-cdk/pull/293#discussion_r1934933483
|
26
|
+
|
24
27
|
stream_count: int
|
25
28
|
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
use_check_availability: bool = True
|
26
30
|
|
27
31
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
28
32
|
self._parameters = parameters
|
@@ -31,21 +35,27 @@ class CheckDynamicStream(ConnectionChecker):
|
|
31
35
|
self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any]
|
32
36
|
) -> Tuple[bool, Any]:
|
33
37
|
streams = source.streams(config=config)
|
38
|
+
|
34
39
|
if len(streams) == 0:
|
35
40
|
return False, f"No streams to connect to from source {source}"
|
41
|
+
if not self.use_check_availability:
|
42
|
+
return True, None
|
43
|
+
|
44
|
+
availability_strategy = HttpAvailabilityStrategy()
|
36
45
|
|
37
|
-
|
38
|
-
stream
|
39
|
-
availability_strategy = HttpAvailabilityStrategy()
|
40
|
-
try:
|
46
|
+
try:
|
47
|
+
for stream in streams[: min(self.stream_count, len(streams))]:
|
41
48
|
stream_is_available, reason = availability_strategy.check_availability(
|
42
49
|
stream, logger
|
43
50
|
)
|
44
51
|
if not stream_is_available:
|
52
|
+
logger.warning(f"Stream {stream.name} is not available: {reason}")
|
45
53
|
return False, reason
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
54
|
+
except Exception as error:
|
55
|
+
error_message = (
|
56
|
+
f"Encountered an error trying to connect to stream {stream.name}. Error: {error}"
|
57
|
+
)
|
58
|
+
logger.error(error_message, exc_info=True)
|
59
|
+
return False, error_message
|
60
|
+
|
51
61
|
return True, None
|
@@ -19,6 +19,7 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
|
19
19
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
|
+
from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
|
22
23
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
24
|
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
25
|
PerPartitionWithGlobalCursor,
|
@@ -34,8 +35,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
34
35
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
36
|
ModelToComponentFactory,
|
36
37
|
)
|
38
|
+
from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
|
37
39
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
38
|
-
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
40
|
+
from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
|
39
41
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
40
42
|
DeclarativePartitionFactory,
|
41
43
|
StreamSlicerPartitionGenerator,
|
@@ -48,7 +50,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
|
|
48
50
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
49
51
|
AlwaysAvailableAvailabilityStrategy,
|
50
52
|
)
|
51
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
53
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
|
52
54
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
53
55
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
54
56
|
|
@@ -69,6 +71,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
69
71
|
component_factory: Optional[ModelToComponentFactory] = None,
|
70
72
|
**kwargs: Any,
|
71
73
|
) -> None:
|
74
|
+
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
75
|
+
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
76
|
+
self._connector_state_manager = ConnectorStateManager(state=state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
77
|
+
|
72
78
|
# To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
|
73
79
|
# cursors. We do this by no longer automatically instantiating RFR cursors when converting
|
74
80
|
# the declarative models into runtime components. Concurrent sources will continue to checkpoint
|
@@ -76,6 +82,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
76
82
|
component_factory = component_factory or ModelToComponentFactory(
|
77
83
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
78
84
|
disable_resumable_full_refresh=True,
|
85
|
+
connector_state_manager=self._connector_state_manager,
|
79
86
|
)
|
80
87
|
|
81
88
|
super().__init__(
|
@@ -86,10 +93,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
86
93
|
component_factory=component_factory,
|
87
94
|
)
|
88
95
|
|
89
|
-
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
90
|
-
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
91
|
-
self._state = state
|
92
|
-
|
93
96
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
94
97
|
if concurrency_level_from_manifest:
|
95
98
|
concurrency_level_component = self._constructor.create_component(
|
@@ -179,8 +182,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
179
182
|
concurrent_streams: List[AbstractStream] = []
|
180
183
|
synchronous_streams: List[Stream] = []
|
181
184
|
|
182
|
-
state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
183
|
-
|
184
185
|
# Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
|
185
186
|
# and this is validated during the initialization of the source.
|
186
187
|
streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
|
@@ -220,31 +221,52 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
220
221
|
if self._is_datetime_incremental_without_partition_routing(
|
221
222
|
declarative_stream, incremental_sync_component_definition
|
222
223
|
):
|
223
|
-
stream_state =
|
224
|
+
stream_state = self._connector_state_manager.get_stream_state(
|
224
225
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
225
226
|
)
|
226
227
|
|
227
|
-
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
228
|
-
state_manager=state_manager,
|
229
|
-
model_type=DatetimeBasedCursorModel,
|
230
|
-
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
231
|
-
stream_name=declarative_stream.name,
|
232
|
-
stream_namespace=declarative_stream.namespace,
|
233
|
-
config=config or {},
|
234
|
-
stream_state=stream_state,
|
235
|
-
)
|
236
|
-
|
237
228
|
retriever = self._get_retriever(declarative_stream, stream_state)
|
238
229
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
230
|
+
if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
|
231
|
+
declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
|
232
|
+
):
|
233
|
+
cursor = declarative_stream.retriever.stream_slicer.stream_slicer
|
234
|
+
|
235
|
+
if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
|
236
|
+
# This should never happen since we instantiate ConcurrentCursor in
|
237
|
+
# model_to_component_factory.py
|
238
|
+
raise ValueError(
|
239
|
+
f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
|
240
|
+
)
|
241
|
+
|
242
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
243
|
+
partition_factory=DeclarativePartitionFactory(
|
244
|
+
declarative_stream.name,
|
245
|
+
declarative_stream.get_json_schema(),
|
246
|
+
retriever,
|
247
|
+
self.message_repository,
|
248
|
+
),
|
249
|
+
stream_slicer=declarative_stream.retriever.stream_slicer,
|
250
|
+
)
|
251
|
+
else:
|
252
|
+
cursor = (
|
253
|
+
self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
254
|
+
model_type=DatetimeBasedCursorModel,
|
255
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
256
|
+
stream_name=declarative_stream.name,
|
257
|
+
stream_namespace=declarative_stream.namespace,
|
258
|
+
config=config or {},
|
259
|
+
)
|
260
|
+
)
|
261
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
262
|
+
partition_factory=DeclarativePartitionFactory(
|
263
|
+
declarative_stream.name,
|
264
|
+
declarative_stream.get_json_schema(),
|
265
|
+
retriever,
|
266
|
+
self.message_repository,
|
267
|
+
),
|
268
|
+
stream_slicer=cursor,
|
269
|
+
)
|
248
270
|
|
249
271
|
concurrent_streams.append(
|
250
272
|
DefaultStream(
|
@@ -306,14 +328,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
306
328
|
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
307
329
|
)
|
308
330
|
):
|
309
|
-
stream_state =
|
331
|
+
stream_state = self._connector_state_manager.get_stream_state(
|
310
332
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
311
333
|
)
|
312
334
|
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
313
335
|
|
314
336
|
perpartition_cursor = (
|
315
337
|
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
316
|
-
state_manager=
|
338
|
+
state_manager=self._connector_state_manager,
|
317
339
|
model_type=DatetimeBasedCursorModel,
|
318
340
|
component_definition=incremental_sync_component_definition,
|
319
341
|
stream_name=declarative_stream.name,
|
@@ -369,7 +391,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
369
391
|
declarative_stream=declarative_stream
|
370
392
|
)
|
371
393
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
372
|
-
and
|
394
|
+
and (
|
395
|
+
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
396
|
+
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
397
|
+
)
|
373
398
|
)
|
374
399
|
|
375
400
|
def _stream_supports_concurrent_partition_processing(
|
@@ -438,8 +463,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
438
463
|
return False
|
439
464
|
return True
|
440
465
|
|
466
|
+
@staticmethod
|
441
467
|
def _get_retriever(
|
442
|
-
|
468
|
+
declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
443
469
|
) -> Retriever:
|
444
470
|
retriever = declarative_stream.retriever
|
445
471
|
|
@@ -449,10 +475,21 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
449
475
|
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
450
476
|
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
451
477
|
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
452
|
-
# like StopConditionPaginationStrategyDecorator
|
453
|
-
#
|
478
|
+
# like StopConditionPaginationStrategyDecorator still rely on a DatetimeBasedCursor that is
|
479
|
+
# properly initialized with state.
|
454
480
|
if retriever.cursor:
|
455
481
|
retriever.cursor.set_initial_state(stream_state=stream_state)
|
482
|
+
|
483
|
+
# Similar to above, the ClientSideIncrementalRecordFilterDecorator cursor is a separate instance
|
484
|
+
# from the one initialized on the SimpleRetriever, so it also must also have state initialized
|
485
|
+
# for semi-incremental streams using is_client_side_incremental to filter properly
|
486
|
+
if isinstance(retriever.record_selector, RecordSelector) and isinstance(
|
487
|
+
retriever.record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
|
488
|
+
):
|
489
|
+
retriever.record_selector.record_filter._cursor.set_initial_state(
|
490
|
+
stream_state=stream_state
|
491
|
+
) # type: ignore # After non-concurrent cursors are deprecated we can remove these cursor workarounds
|
492
|
+
|
456
493
|
# We zero it out here, but since this is a cursor reference, the state is still properly
|
457
494
|
# instantiated for the other components that reference it
|
458
495
|
retriever.cursor = None
|
@@ -320,6 +320,11 @@ definitions:
|
|
320
320
|
title: Stream Count
|
321
321
|
description: Numbers of the streams to try reading from when running a check operation.
|
322
322
|
type: integer
|
323
|
+
use_check_availability:
|
324
|
+
title: Use Check Availability
|
325
|
+
description: Enables stream check availability. This field is automatically set by the CDK.
|
326
|
+
type: boolean
|
327
|
+
default: true
|
323
328
|
CompositeErrorHandler:
|
324
329
|
title: Composite Error Handler
|
325
330
|
description: Error handler that sequentially iterates over a list of error handlers.
|
@@ -1800,6 +1805,19 @@ definitions:
|
|
1800
1805
|
$parameters:
|
1801
1806
|
type: object
|
1802
1807
|
additionalProperties: true
|
1808
|
+
ComplexFieldType:
|
1809
|
+
title: Schema Field Type
|
1810
|
+
description: (This component is experimental. Use at your own risk.) Represents a complex field type.
|
1811
|
+
type: object
|
1812
|
+
required:
|
1813
|
+
- field_type
|
1814
|
+
properties:
|
1815
|
+
field_type:
|
1816
|
+
type: string
|
1817
|
+
items:
|
1818
|
+
anyOf:
|
1819
|
+
- type: string
|
1820
|
+
- "$ref": "#/definitions/ComplexFieldType"
|
1803
1821
|
TypesMap:
|
1804
1822
|
title: Types Map
|
1805
1823
|
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
|
@@ -1814,6 +1832,7 @@ definitions:
|
|
1814
1832
|
- type: array
|
1815
1833
|
items:
|
1816
1834
|
type: string
|
1835
|
+
- "$ref": "#/definitions/ComplexFieldType"
|
1817
1836
|
current_type:
|
1818
1837
|
anyOf:
|
1819
1838
|
- type: string
|
@@ -2828,25 +2847,35 @@ definitions:
|
|
2828
2847
|
enum: [RequestPath]
|
2829
2848
|
RequestOption:
|
2830
2849
|
title: Request Option
|
2831
|
-
description: Specifies the key field and where in the request a component's value should be injected.
|
2850
|
+
description: Specifies the key field or path and where in the request a component's value should be injected.
|
2832
2851
|
type: object
|
2833
2852
|
required:
|
2834
2853
|
- type
|
2835
|
-
- field_name
|
2836
2854
|
- inject_into
|
2837
2855
|
properties:
|
2838
2856
|
type:
|
2839
2857
|
type: string
|
2840
2858
|
enum: [RequestOption]
|
2841
2859
|
field_name:
|
2842
|
-
title:
|
2843
|
-
description: Configures which key should be used in the location that the descriptor is being injected into
|
2860
|
+
title: Field Name
|
2861
|
+
description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
|
2844
2862
|
type: string
|
2845
2863
|
examples:
|
2846
2864
|
- segment_id
|
2847
2865
|
interpolation_context:
|
2848
2866
|
- config
|
2849
2867
|
- parameters
|
2868
|
+
field_path:
|
2869
|
+
title: Field Path
|
2870
|
+
description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
|
2871
|
+
type: array
|
2872
|
+
items:
|
2873
|
+
type: string
|
2874
|
+
examples:
|
2875
|
+
- ["data", "viewer", "id"]
|
2876
|
+
interpolation_context:
|
2877
|
+
- config
|
2878
|
+
- parameters
|
2850
2879
|
inject_into:
|
2851
2880
|
title: Inject Into
|
2852
2881
|
description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
|