airbyte-cdk 6.26.0.dev4107__py3-none-any.whl → 6.26.0.dev4109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +22 -13
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +71 -34
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +33 -4
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +93 -27
  13. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  14. airbyte_cdk/sources/declarative/manifest_declarative_source.py +5 -3
  15. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +22 -5
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +138 -38
  17. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  18. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  19. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +49 -25
  20. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  21. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  22. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  23. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  24. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  25. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  26. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -1
  27. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  28. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  29. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -17
  30. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +20 -9
  31. airbyte_cdk/sources/file_based/file_based_source.py +9 -6
  32. airbyte_cdk/sources/file_based/file_based_stream_reader.py +28 -2
  33. airbyte_cdk/sources/file_based/schema_helpers.py +0 -25
  34. airbyte_cdk/sources/file_based/stream/__init__.py +2 -2
  35. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -3
  36. airbyte_cdk/sources/file_based/stream/identities_stream.py +8 -57
  37. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  38. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  39. airbyte_cdk/sources/streams/core.py +6 -6
  40. airbyte_cdk/sources/streams/http/http.py +1 -2
  41. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  42. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +166 -83
  43. airbyte_cdk/sources/streams/permissions/identities.py +77 -0
  44. airbyte_cdk/sources/types.py +4 -2
  45. airbyte_cdk/sources/utils/transform.py +23 -2
  46. airbyte_cdk/utils/datetime_helpers.py +499 -0
  47. airbyte_cdk/utils/mapping_helpers.py +86 -27
  48. airbyte_cdk/utils/slice_hasher.py +8 -1
  49. airbyte_cdk-6.26.0.dev4109.dist-info/LICENSE_SHORT +1 -0
  50. {airbyte_cdk-6.26.0.dev4107.dist-info → airbyte_cdk-6.26.0.dev4109.dist-info}/METADATA +5 -5
  51. {airbyte_cdk-6.26.0.dev4107.dist-info → airbyte_cdk-6.26.0.dev4109.dist-info}/RECORD +54 -50
  52. {airbyte_cdk-6.26.0.dev4107.dist-info → airbyte_cdk-6.26.0.dev4109.dist-info}/WHEEL +1 -1
  53. {airbyte_cdk-6.26.0.dev4107.dist-info → airbyte_cdk-6.26.0.dev4109.dist-info}/LICENSE.txt +0 -0
  54. {airbyte_cdk-6.26.0.dev4107.dist-info → airbyte_cdk-6.26.0.dev4109.dist-info}/entry_points.txt +0 -0
@@ -21,7 +21,6 @@ import pkgutil
21
21
  import sys
22
22
  import traceback
23
23
  from collections.abc import Mapping
24
- from datetime import datetime
25
24
  from pathlib import Path
26
25
  from typing import Any, cast
27
26
 
@@ -44,6 +43,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
44
43
  )
45
44
  from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
46
45
  from airbyte_cdk.sources.source import TState
46
+ from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
47
47
 
48
48
 
49
49
  class SourceLocalYaml(YamlDeclarativeSource):
@@ -101,7 +101,7 @@ def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
101
101
  type=Type.TRACE,
102
102
  trace=AirbyteTraceMessage(
103
103
  type=TraceType.ERROR,
104
- emitted_at=int(datetime.now().timestamp() * 1000),
104
+ emitted_at=ab_datetime_now().to_epoch_millis(),
105
105
  error=AirbyteErrorTraceMessage(
106
106
  message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
107
107
  stack_trace=traceback.format_exc(),
@@ -191,7 +191,7 @@ def create_declarative_source(
191
191
  type=Type.TRACE,
192
192
  trace=AirbyteTraceMessage(
193
193
  type=TraceType.ERROR,
194
- emitted_at=int(datetime.now().timestamp() * 1000),
194
+ emitted_at=ab_datetime_now().to_epoch_millis(),
195
195
  error=AirbyteErrorTraceMessage(
196
196
  message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
197
197
  stack_trace=traceback.format_exc(),
@@ -3,7 +3,6 @@
3
3
  #
4
4
 
5
5
  import dataclasses
6
- from datetime import datetime
7
6
  from typing import Any, List, Mapping
8
7
 
9
8
  from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
@@ -21,6 +20,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
21
20
  ModelToComponentFactory,
22
21
  )
23
22
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
23
+ from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
24
24
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
25
25
 
26
26
  DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
@@ -114,4 +114,4 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage:
114
114
 
115
115
 
116
116
  def _emitted_at() -> int:
117
- return int(datetime.now().timestamp()) * 1000
117
+ return ab_datetime_now().to_epoch_millis()
@@ -437,10 +437,10 @@ class AsyncJobOrchestrator:
437
437
  yield from self._process_running_partitions_and_yield_completed_ones()
438
438
  self._wait_on_status_update()
439
439
  except Exception as exception:
440
+ LOGGER.warning(
441
+ f"Caught exception that stops the processing of the jobs: {exception}. Traceback: {traceback.format_exc()}"
442
+ )
440
443
  if self._is_breaking_exception(exception):
441
- LOGGER.warning(
442
- f"Caught exception that stops the processing of the jobs: {exception}"
443
- )
444
444
  self._abort_all_running_jobs()
445
445
  raise exception
446
446
 
@@ -482,16 +482,16 @@ class AsyncJobOrchestrator:
482
482
  and exception.failure_type == FailureType.config_error
483
483
  )
484
484
 
485
- def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
485
+ def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
486
486
  """
487
- Fetches records from the given partition's jobs.
487
+ Fetches records from the given jobs.
488
488
 
489
489
  Args:
490
- partition (AsyncPartition): The partition containing the jobs.
490
+ async_jobs Iterable[AsyncJob]: The list of AsyncJobs.
491
491
 
492
492
  Yields:
493
493
  Iterable[Mapping[str, Any]]: The fetched records from the jobs.
494
494
  """
495
- for job in partition.jobs:
495
+ for job in async_jobs:
496
496
  yield from self._job_repository.fetch_records(job)
497
497
  self._job_repository.delete(job)
@@ -3,6 +3,7 @@
3
3
  #
4
4
 
5
5
  import base64
6
+ import json
6
7
  from dataclasses import InitVar, dataclass
7
8
  from datetime import datetime
8
9
  from typing import Any, Mapping, Optional, Union
@@ -104,21 +105,21 @@ class JwtAuthenticator(DeclarativeAuthenticator):
104
105
  )
105
106
 
106
107
  def _get_jwt_headers(self) -> dict[str, Any]:
107
- """ "
108
+ """
108
109
  Builds and returns the headers used when signing the JWT.
109
110
  """
110
- headers = self._additional_jwt_headers.eval(self.config)
111
+ headers = self._additional_jwt_headers.eval(self.config, json_loads=json.loads)
111
112
  if any(prop in headers for prop in ["kid", "alg", "typ", "cty"]):
112
113
  raise ValueError(
113
114
  "'kid', 'alg', 'typ', 'cty' are reserved headers and should not be set as part of 'additional_jwt_headers'"
114
115
  )
115
116
 
116
117
  if self._kid:
117
- headers["kid"] = self._kid.eval(self.config)
118
+ headers["kid"] = self._kid.eval(self.config, json_loads=json.loads)
118
119
  if self._typ:
119
- headers["typ"] = self._typ.eval(self.config)
120
+ headers["typ"] = self._typ.eval(self.config, json_loads=json.loads)
120
121
  if self._cty:
121
- headers["cty"] = self._cty.eval(self.config)
122
+ headers["cty"] = self._cty.eval(self.config, json_loads=json.loads)
122
123
  headers["alg"] = self._algorithm
123
124
  return headers
124
125
 
@@ -130,18 +131,19 @@ class JwtAuthenticator(DeclarativeAuthenticator):
130
131
  exp = now + self._token_duration if isinstance(self._token_duration, int) else now
131
132
  nbf = now
132
133
 
133
- payload = self._additional_jwt_payload.eval(self.config)
134
+ payload = self._additional_jwt_payload.eval(self.config, json_loads=json.loads)
134
135
  if any(prop in payload for prop in ["iss", "sub", "aud", "iat", "exp", "nbf"]):
135
136
  raise ValueError(
136
137
  "'iss', 'sub', 'aud', 'iat', 'exp', 'nbf' are reserved properties and should not be set as part of 'additional_jwt_payload'"
137
138
  )
138
139
 
139
140
  if self._iss:
140
- payload["iss"] = self._iss.eval(self.config)
141
+ payload["iss"] = self._iss.eval(self.config, json_loads=json.loads)
141
142
  if self._sub:
142
- payload["sub"] = self._sub.eval(self.config)
143
+ payload["sub"] = self._sub.eval(self.config, json_loads=json.loads)
143
144
  if self._aud:
144
- payload["aud"] = self._aud.eval(self.config)
145
+ payload["aud"] = self._aud.eval(self.config, json_loads=json.loads)
146
+
145
147
  payload["iat"] = now
146
148
  payload["exp"] = exp
147
149
  payload["nbf"] = nbf
@@ -151,7 +153,7 @@ class JwtAuthenticator(DeclarativeAuthenticator):
151
153
  """
152
154
  Returns the secret key used to sign the JWT.
153
155
  """
154
- secret_key: str = self._secret_key.eval(self.config)
156
+ secret_key: str = self._secret_key.eval(self.config, json_loads=json.loads)
155
157
  return (
156
158
  base64.b64encode(secret_key.encode()).decode()
157
159
  if self._base64_encode_secret_key
@@ -176,7 +178,11 @@ class JwtAuthenticator(DeclarativeAuthenticator):
176
178
  """
177
179
  Returns the header prefix to be used when attaching the token to the request.
178
180
  """
179
- return self._header_prefix.eval(self.config) if self._header_prefix else None
181
+ return (
182
+ self._header_prefix.eval(self.config, json_loads=json.loads)
183
+ if self._header_prefix
184
+ else None
185
+ )
180
186
 
181
187
  @property
182
188
  def auth_header(self) -> str:
@@ -3,10 +3,9 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
+ from datetime import timedelta
6
7
  from typing import Any, List, Mapping, MutableMapping, Optional, Union
7
8
 
8
- import pendulum
9
-
10
9
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
11
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
12
11
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
@@ -18,6 +17,7 @@ from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import
18
17
  from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import (
19
18
  SingleUseRefreshTokenOauth2Authenticator,
20
19
  )
20
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
21
21
 
22
22
 
23
23
  @dataclass
@@ -53,7 +53,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
53
53
  refresh_token: Optional[Union[InterpolatedString, str]] = None
54
54
  scopes: Optional[List[str]] = None
55
55
  token_expiry_date: Optional[Union[InterpolatedString, str]] = None
56
- _token_expiry_date: Optional[pendulum.DateTime] = field(init=False, repr=False, default=None)
56
+ _token_expiry_date: Optional[AirbyteDateTime] = field(init=False, repr=False, default=None)
57
57
  token_expiry_date_format: Optional[str] = None
58
58
  token_expiry_is_time_of_expiration: bool = False
59
59
  access_token_name: Union[InterpolatedString, str] = "access_token"
@@ -122,15 +122,24 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
122
122
  self._refresh_request_headers = InterpolatedMapping(
123
123
  self.refresh_request_headers or {}, parameters=parameters
124
124
  )
125
- self._token_expiry_date: pendulum.DateTime = (
126
- pendulum.parse(
127
- InterpolatedString.create(self.token_expiry_date, parameters=parameters).eval(
128
- self.config
125
+ try:
126
+ if (
127
+ isinstance(self.token_expiry_date, (int, str))
128
+ and str(self.token_expiry_date).isdigit()
129
+ ):
130
+ self._token_expiry_date = ab_datetime_parse(self.token_expiry_date)
131
+ else:
132
+ self._token_expiry_date = (
133
+ ab_datetime_parse(
134
+ InterpolatedString.create(
135
+ self.token_expiry_date, parameters=parameters
136
+ ).eval(self.config)
137
+ )
138
+ if self.token_expiry_date
139
+ else ab_datetime_now() - timedelta(days=1)
129
140
  )
130
- ) # type: ignore # pendulum.parse returns a datetime in this context
131
- if self.token_expiry_date
132
- else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
133
- )
141
+ except ValueError as e:
142
+ raise ValueError(f"Invalid token expiry date format: {e}")
134
143
  self.use_profile_assertion = (
135
144
  InterpolatedBoolean(self.use_profile_assertion, parameters=parameters)
136
145
  if isinstance(self.use_profile_assertion, str)
@@ -222,8 +231,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
222
231
  def get_refresh_request_headers(self) -> Mapping[str, Any]:
223
232
  return self._refresh_request_headers.eval(self.config)
224
233
 
225
- def get_token_expiry_date(self) -> pendulum.DateTime:
226
- return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks
234
+ def get_token_expiry_date(self) -> AirbyteDateTime:
235
+ return self._token_expiry_date # type: ignore # _token_expiry_date is an AirbyteDateTime. It is never None despite what mypy thinks
227
236
 
228
237
  def set_token_expiry_date(self, value: Union[str, int]) -> None:
229
238
  self._token_expiry_date = self._parse_token_expiration_date(value)
@@ -5,7 +5,7 @@
5
5
  import base64
6
6
  import logging
7
7
  from dataclasses import InitVar, dataclass
8
- from typing import Any, Mapping, Union
8
+ from typing import Any, Mapping, MutableMapping, Union
9
9
 
10
10
  import requests
11
11
  from cachetools import TTLCache, cached
@@ -45,11 +45,6 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
45
45
  config: Config
46
46
  parameters: InitVar[Mapping[str, Any]]
47
47
 
48
- def __post_init__(self, parameters: Mapping[str, Any]) -> None:
49
- self._field_name = InterpolatedString.create(
50
- self.request_option.field_name, parameters=parameters
51
- )
52
-
53
48
  @property
54
49
  def auth_header(self) -> str:
55
50
  options = self._get_request_options(RequestOptionType.header)
@@ -60,9 +55,9 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
60
55
  return self.token_provider.get_token()
61
56
 
62
57
  def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]:
63
- options = {}
58
+ options: MutableMapping[str, Any] = {}
64
59
  if self.request_option.inject_into == option_type:
65
- options[self._field_name.eval(self.config)] = self.token
60
+ self.request_option.inject_into_request(options, self.token, self.config)
66
61
  return options
67
62
 
68
63
  def get_request_params(self) -> Mapping[str, Any]:
@@ -9,9 +9,7 @@ from dataclasses import InitVar, dataclass, field
9
9
  from typing import Any, List, Mapping, Optional, Union
10
10
 
11
11
  import dpath
12
- import pendulum
13
12
  from isodate import Duration
14
- from pendulum import DateTime
15
13
 
16
14
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
17
15
  from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
@@ -21,6 +19,7 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
21
19
  from airbyte_cdk.sources.http_logger import format_http_message
22
20
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
23
21
  from airbyte_cdk.sources.types import Config
22
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now
24
23
 
25
24
 
26
25
  class TokenProvider:
@@ -38,7 +37,7 @@ class SessionTokenProvider(TokenProvider):
38
37
  message_repository: MessageRepository = NoopMessageRepository()
39
38
  decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={}))
40
39
 
41
- _next_expiration_time: Optional[DateTime] = None
40
+ _next_expiration_time: Optional[AirbyteDateTime] = None
42
41
  _token: Optional[str] = None
43
42
 
44
43
  def get_token(self) -> str:
@@ -48,7 +47,7 @@ class SessionTokenProvider(TokenProvider):
48
47
  return self._token
49
48
 
50
49
  def _refresh_if_necessary(self) -> None:
51
- if self._next_expiration_time is None or self._next_expiration_time < pendulum.now():
50
+ if self._next_expiration_time is None or self._next_expiration_time < ab_datetime_now():
52
51
  self._refresh()
53
52
 
54
53
  def _refresh(self) -> None:
@@ -65,7 +64,7 @@ class SessionTokenProvider(TokenProvider):
65
64
  raise ReadException("Failed to get session token, response got ignored by requester")
66
65
  session_token = dpath.get(next(self.decoder.decode(response)), self.session_token_path)
67
66
  if self.expiration_duration is not None:
68
- self._next_expiration_time = pendulum.now() + self.expiration_duration
67
+ self._next_expiration_time = ab_datetime_now() + self.expiration_duration
69
68
  self._token = session_token # type: ignore # Returned decoded response will be Mapping and therefore session_token will be str or None
70
69
 
71
70
 
@@ -21,8 +21,12 @@ class CheckDynamicStream(ConnectionChecker):
21
21
  stream_count (int): numbers of streams to check
22
22
  """
23
23
 
24
+ # TODO: Add field stream_names to check_connection for static streams
25
+ # https://github.com/airbytehq/airbyte-python-cdk/pull/293#discussion_r1934933483
26
+
24
27
  stream_count: int
25
28
  parameters: InitVar[Mapping[str, Any]]
29
+ use_check_availability: bool = True
26
30
 
27
31
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
28
32
  self._parameters = parameters
@@ -31,21 +35,27 @@ class CheckDynamicStream(ConnectionChecker):
31
35
  self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any]
32
36
  ) -> Tuple[bool, Any]:
33
37
  streams = source.streams(config=config)
38
+
34
39
  if len(streams) == 0:
35
40
  return False, f"No streams to connect to from source {source}"
41
+ if not self.use_check_availability:
42
+ return True, None
43
+
44
+ availability_strategy = HttpAvailabilityStrategy()
36
45
 
37
- for stream_index in range(min(self.stream_count, len(streams))):
38
- stream = streams[stream_index]
39
- availability_strategy = HttpAvailabilityStrategy()
40
- try:
46
+ try:
47
+ for stream in streams[: min(self.stream_count, len(streams))]:
41
48
  stream_is_available, reason = availability_strategy.check_availability(
42
49
  stream, logger
43
50
  )
44
51
  if not stream_is_available:
52
+ logger.warning(f"Stream {stream.name} is not available: {reason}")
45
53
  return False, reason
46
- except Exception as error:
47
- logger.error(
48
- f"Encountered an error trying to connect to stream {stream.name}. Error: \n {traceback.format_exc()}"
49
- )
50
- return False, f"Unable to connect to stream {stream.name} - {error}"
54
+ except Exception as error:
55
+ error_message = (
56
+ f"Encountered an error trying to connect to stream {stream.name}. Error: {error}"
57
+ )
58
+ logger.error(error_message, exc_info=True)
59
+ return False, error_message
60
+
51
61
  return True, None
@@ -19,6 +19,7 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
19
19
  from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
+ from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
22
23
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
23
24
  from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
24
25
  PerPartitionWithGlobalCursor,
@@ -34,8 +35,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
34
35
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
35
36
  ModelToComponentFactory,
36
37
  )
38
+ from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
37
39
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
38
- from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
40
+ from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
39
41
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
40
42
  DeclarativePartitionFactory,
41
43
  StreamSlicerPartitionGenerator,
@@ -48,7 +50,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
48
50
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
49
51
  AlwaysAvailableAvailabilityStrategy,
50
52
  )
51
- from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
53
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
52
54
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
53
55
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
54
56
 
@@ -69,6 +71,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
69
71
  component_factory: Optional[ModelToComponentFactory] = None,
70
72
  **kwargs: Any,
71
73
  ) -> None:
74
+ # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
75
+ # no longer needs to store the original incoming state. But maybe there's an edge case?
76
+ self._connector_state_manager = ConnectorStateManager(state=state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
77
+
72
78
  # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
73
79
  # cursors. We do this by no longer automatically instantiating RFR cursors when converting
74
80
  # the declarative models into runtime components. Concurrent sources will continue to checkpoint
@@ -76,6 +82,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
76
82
  component_factory = component_factory or ModelToComponentFactory(
77
83
  emit_connector_builder_messages=emit_connector_builder_messages,
78
84
  disable_resumable_full_refresh=True,
85
+ connector_state_manager=self._connector_state_manager,
79
86
  )
80
87
 
81
88
  super().__init__(
@@ -86,10 +93,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
86
93
  component_factory=component_factory,
87
94
  )
88
95
 
89
- # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
90
- # no longer needs to store the original incoming state. But maybe there's an edge case?
91
- self._state = state
92
-
93
96
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
94
97
  if concurrency_level_from_manifest:
95
98
  concurrency_level_component = self._constructor.create_component(
@@ -179,8 +182,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
179
182
  concurrent_streams: List[AbstractStream] = []
180
183
  synchronous_streams: List[Stream] = []
181
184
 
182
- state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
183
-
184
185
  # Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
185
186
  # and this is validated during the initialization of the source.
186
187
  streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
@@ -220,31 +221,52 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
220
221
  if self._is_datetime_incremental_without_partition_routing(
221
222
  declarative_stream, incremental_sync_component_definition
222
223
  ):
223
- stream_state = state_manager.get_stream_state(
224
+ stream_state = self._connector_state_manager.get_stream_state(
224
225
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
225
226
  )
226
227
 
227
- cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
228
- state_manager=state_manager,
229
- model_type=DatetimeBasedCursorModel,
230
- component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
231
- stream_name=declarative_stream.name,
232
- stream_namespace=declarative_stream.namespace,
233
- config=config or {},
234
- stream_state=stream_state,
235
- )
236
-
237
228
  retriever = self._get_retriever(declarative_stream, stream_state)
238
229
 
239
- partition_generator = StreamSlicerPartitionGenerator(
240
- DeclarativePartitionFactory(
241
- declarative_stream.name,
242
- declarative_stream.get_json_schema(),
243
- retriever,
244
- self.message_repository,
245
- ),
246
- cursor,
247
- )
230
+ if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
231
+ declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
232
+ ):
233
+ cursor = declarative_stream.retriever.stream_slicer.stream_slicer
234
+
235
+ if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
236
+ # This should never happen since we instantiate ConcurrentCursor in
237
+ # model_to_component_factory.py
238
+ raise ValueError(
239
+ f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
240
+ )
241
+
242
+ partition_generator = StreamSlicerPartitionGenerator(
243
+ partition_factory=DeclarativePartitionFactory(
244
+ declarative_stream.name,
245
+ declarative_stream.get_json_schema(),
246
+ retriever,
247
+ self.message_repository,
248
+ ),
249
+ stream_slicer=declarative_stream.retriever.stream_slicer,
250
+ )
251
+ else:
252
+ cursor = (
253
+ self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
254
+ model_type=DatetimeBasedCursorModel,
255
+ component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
256
+ stream_name=declarative_stream.name,
257
+ stream_namespace=declarative_stream.namespace,
258
+ config=config or {},
259
+ )
260
+ )
261
+ partition_generator = StreamSlicerPartitionGenerator(
262
+ partition_factory=DeclarativePartitionFactory(
263
+ declarative_stream.name,
264
+ declarative_stream.get_json_schema(),
265
+ retriever,
266
+ self.message_repository,
267
+ ),
268
+ stream_slicer=cursor,
269
+ )
248
270
 
249
271
  concurrent_streams.append(
250
272
  DefaultStream(
@@ -306,14 +328,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
306
328
  declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
307
329
  )
308
330
  ):
309
- stream_state = state_manager.get_stream_state(
331
+ stream_state = self._connector_state_manager.get_stream_state(
310
332
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
311
333
  )
312
334
  partition_router = declarative_stream.retriever.stream_slicer._partition_router
313
335
 
314
336
  perpartition_cursor = (
315
337
  self._constructor.create_concurrent_cursor_from_perpartition_cursor(
316
- state_manager=state_manager,
338
+ state_manager=self._connector_state_manager,
317
339
  model_type=DatetimeBasedCursorModel,
318
340
  component_definition=incremental_sync_component_definition,
319
341
  stream_name=declarative_stream.name,
@@ -369,7 +391,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
369
391
  declarative_stream=declarative_stream
370
392
  )
371
393
  and hasattr(declarative_stream.retriever, "stream_slicer")
372
- and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
394
+ and (
395
+ isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
396
+ or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
397
+ )
373
398
  )
374
399
 
375
400
  def _stream_supports_concurrent_partition_processing(
@@ -438,8 +463,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
438
463
  return False
439
464
  return True
440
465
 
466
+ @staticmethod
441
467
  def _get_retriever(
442
- self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
468
+ declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
443
469
  ) -> Retriever:
444
470
  retriever = declarative_stream.retriever
445
471
 
@@ -449,10 +475,21 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
449
475
  # Also a temporary hack. In the legacy Stream implementation, as part of the read,
450
476
  # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
451
477
  # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
452
- # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
453
- # still rely on a DatetimeBasedCursor that is properly initialized with state.
478
+ # like StopConditionPaginationStrategyDecorator still rely on a DatetimeBasedCursor that is
479
+ # properly initialized with state.
454
480
  if retriever.cursor:
455
481
  retriever.cursor.set_initial_state(stream_state=stream_state)
482
+
483
+ # Similar to above, the ClientSideIncrementalRecordFilterDecorator cursor is a separate instance
484
+ # from the one initialized on the SimpleRetriever, so it also must also have state initialized
485
+ # for semi-incremental streams using is_client_side_incremental to filter properly
486
+ if isinstance(retriever.record_selector, RecordSelector) and isinstance(
487
+ retriever.record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
488
+ ):
489
+ retriever.record_selector.record_filter._cursor.set_initial_state(
490
+ stream_state=stream_state
491
+ ) # type: ignore # After non-concurrent cursors are deprecated we can remove these cursor workarounds
492
+
456
493
  # We zero it out here, but since this is a cursor reference, the state is still properly
457
494
  # instantiated for the other components that reference it
458
495
  retriever.cursor = None
@@ -320,6 +320,11 @@ definitions:
320
320
  title: Stream Count
321
321
  description: Numbers of the streams to try reading from when running a check operation.
322
322
  type: integer
323
+ use_check_availability:
324
+ title: Use Check Availability
325
+ description: Enables stream check availability. This field is automatically set by the CDK.
326
+ type: boolean
327
+ default: true
323
328
  CompositeErrorHandler:
324
329
  title: Composite Error Handler
325
330
  description: Error handler that sequentially iterates over a list of error handlers.
@@ -1800,6 +1805,19 @@ definitions:
1800
1805
  $parameters:
1801
1806
  type: object
1802
1807
  additionalProperties: true
1808
+ ComplexFieldType:
1809
+ title: Schema Field Type
1810
+ description: (This component is experimental. Use at your own risk.) Represents a complex field type.
1811
+ type: object
1812
+ required:
1813
+ - field_type
1814
+ properties:
1815
+ field_type:
1816
+ type: string
1817
+ items:
1818
+ anyOf:
1819
+ - type: string
1820
+ - "$ref": "#/definitions/ComplexFieldType"
1803
1821
  TypesMap:
1804
1822
  title: Types Map
1805
1823
  description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1814,6 +1832,7 @@ definitions:
1814
1832
  - type: array
1815
1833
  items:
1816
1834
  type: string
1835
+ - "$ref": "#/definitions/ComplexFieldType"
1817
1836
  current_type:
1818
1837
  anyOf:
1819
1838
  - type: string
@@ -2828,25 +2847,35 @@ definitions:
2828
2847
  enum: [RequestPath]
2829
2848
  RequestOption:
2830
2849
  title: Request Option
2831
- description: Specifies the key field and where in the request a component's value should be injected.
2850
+ description: Specifies the key field or path and where in the request a component's value should be injected.
2832
2851
  type: object
2833
2852
  required:
2834
2853
  - type
2835
- - field_name
2836
2854
  - inject_into
2837
2855
  properties:
2838
2856
  type:
2839
2857
  type: string
2840
2858
  enum: [RequestOption]
2841
2859
  field_name:
2842
- title: Request Option
2843
- description: Configures which key should be used in the location that the descriptor is being injected into
2860
+ title: Field Name
2861
+ description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
2844
2862
  type: string
2845
2863
  examples:
2846
2864
  - segment_id
2847
2865
  interpolation_context:
2848
2866
  - config
2849
2867
  - parameters
2868
+ field_path:
2869
+ title: Field Path
2870
+ description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
2871
+ type: array
2872
+ items:
2873
+ type: string
2874
+ examples:
2875
+ - ["data", "viewer", "id"]
2876
+ interpolation_context:
2877
+ - config
2878
+ - parameters
2850
2879
  inject_into:
2851
2880
  title: Inject Into
2852
2881
  description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.