PyPI - airbyte-cdk - Versions diffs - 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl - Mend

airbyte-cdk 6.8.1rc9py3-none-any.whl → 6.8.2.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

airbyte_cdk/sources/declarative/extractors/record_filter.py CHANGED Viewed

@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
     def __init__(
         self,
-        date_time_based_cursor: DatetimeBasedCursor,
-        substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
+        cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
         **kwargs: Any,
     ):
         super().__init__(**kwargs)
-        self._date_time_based_cursor = date_time_based_cursor
-        self._substream_cursor = substream_cursor
+        self._cursor = cursor
     def filter_records(
         self,
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
         records = (
             record
             for record in records
-            if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
+            if self._cursor.should_be_synced(
                 # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
                 # Record stream name is empty cause it is not used durig the filtering
                 Record(data=record, associated_slice=stream_slice, stream_name="")

airbyte_cdk/sources/declarative/incremental/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
 #
+from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
 from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
 from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor
@@ -14,6 +15,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
 __all__ = [
     "CursorFactory",
+    "ConcurrentCursorFactory"
+    "ConcurrentPerPartitionCursor",
     "DatetimeBasedCursor",
     "DeclarativeCursor",
     "GlobalSubstreamCursor",

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py ADDED Viewed

@@ -0,0 +1,270 @@
+import copy
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+import logging
+from collections import OrderedDict
+from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
+from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
+from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
+from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
+from airbyte_cdk.sources.message import MessageRepository
+from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
+    PerPartitionKeySerializer,
+)
+from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
+from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
+from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
+logger = logging.getLogger("airbyte")
+class ConcurrentCursorFactory:
+    def __init__(self, create_function: Callable[..., Cursor]):
+        self._create_function = create_function
+    def create(self, stream_state: Mapping[str, Any]) -> Cursor:
+        return self._create_function(stream_state=stream_state)
+class ConcurrentPerPartitionCursor(Cursor):
+    """
+    Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
+    **Partition Limitation and Limit Reached Logic**
+    - **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
+    - **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
+    - **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
+    The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
+    - When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
+    - The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
+    This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
+    """
+    DEFAULT_MAX_PARTITIONS_NUMBER = 10000
+    _NO_STATE: Mapping[str, Any] = {}
+    _NO_CURSOR_STATE: Mapping[str, Any] = {}
+    _KEY = 0
+    _VALUE = 1
+    _state_to_migrate_from: Mapping[str, Any] = {}
+    def __init__(
+        self,
+        cursor_factory: ConcurrentCursorFactory,
+        partition_router: PartitionRouter,
+        stream_name: str,
+        stream_namespace: Optional[str],
+        stream_state: Any,
+        message_repository: MessageRepository,
+        connector_state_manager: ConnectorStateManager,
+        cursor_field: CursorField,
+    ) -> None:
+        self._stream_name = stream_name
+        self._stream_namespace = stream_namespace
+        self._message_repository = message_repository
+        self._connector_state_manager = connector_state_manager
+        self._cursor_field = cursor_field
+        self._cursor_factory = cursor_factory
+        self._partition_router = partition_router
+        # The dict is ordered to ensure that once the maximum number of partitions is reached,
+        # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
+        self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
+        self._over_limit = 0
+        self._partition_serializer = PerPartitionKeySerializer()
+        self._set_initial_state(stream_state)
+    @property
+    def cursor_field(self) -> CursorField:
+        return self._cursor_field
+    @property
+    def state(self) -> MutableMapping[str, Any]:
+        states = []
+        for partition_tuple, cursor in self._cursor_per_partition.items():
+            cursor_state = cursor._connector_state_converter.convert_to_state_message(
+                cursor._cursor_field, cursor.state
+            )
+            if cursor_state:
+                states.append(
+                    {
+                        "partition": self._to_dict(partition_tuple),
+                        "cursor": copy.deepcopy(cursor_state),
+                    }
+                )
+        state: dict[str, Any] = {"states": states}
+        return state
+    def close_partition(self, partition: Partition) -> None:
+        self._cursor_per_partition[self._to_partition_key(partition._stream_slice.partition)].close_partition_without_emit(partition=partition)
+    def ensure_at_least_one_state_emitted(self) -> None:
+        """
+        The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
+        called.
+        """
+        self._emit_state_message()
+    def _emit_state_message(self) -> None:
+        self._connector_state_manager.update_state_for_stream(
+            self._stream_name,
+            self._stream_namespace,
+            self.state,
+        )
+        state_message = self._connector_state_manager.create_state_message(
+            self._stream_name, self._stream_namespace
+        )
+        self._message_repository.emit_message(state_message)
+    def stream_slices(self) -> Iterable[StreamSlice]:
+        slices = self._partition_router.stream_slices()
+        for partition in slices:
+            yield from self.generate_slices_from_partition(partition)
+    def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
+        # Ensure the maximum number of partitions is not exceeded
+        self._ensure_partition_limit()
+        cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
+        if not cursor:
+            partition_state = (
+                self._state_to_migrate_from
+                if self._state_to_migrate_from
+                else self._NO_CURSOR_STATE
+            )
+            cursor = self._create_cursor(partition_state)
+            self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
+        for cursor_slice in cursor.stream_slices():
+            yield StreamSlice(
+                partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
+            )
+    def _ensure_partition_limit(self) -> None:
+        """
+        Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
+        """
+        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
+            self._over_limit += 1
+            oldest_partition = self._cursor_per_partition.popitem(last=False)[
+                0
+            ]  # Remove the oldest partition
+            logger.warning(
+                f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
+            )
+    def limit_reached(self) -> bool:
+        return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
+    def _set_initial_state(self, stream_state: StreamState) -> None:
+        """
+        Set the initial state for the cursors.
+        This method initializes the state for each partition cursor using the provided stream state.
+        If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
+        Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
+        does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
+        Args:
+            stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
+                {
+                    "states": [
+                        {
+                            "partition": {
+                                "partition_key": "value"
+                            },
+                            "cursor": {
+                                "last_updated": "2023-05-27T00:00:00Z"
+                            }
+                        }
+                    ],
+                    "parent_state": {
+                        "parent_stream_name": {
+                            "last_updated": "2023-05-27T00:00:00Z"
+                        }
+                    }
+                }
+        """
+        if not stream_state:
+            return
+        if "states" not in stream_state:
+            # We assume that `stream_state` is in a global format that can be applied to all partitions.
+            # Example: {"global_state_format_key": "global_state_format_value"}
+            self._state_to_migrate_from = stream_state
+        else:
+            for state in stream_state["states"]:
+                self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
+                    self._create_cursor(state["cursor"])
+                )
+            # set default state for missing partitions if it is per partition with fallback to global
+            if "state" in stream_state:
+                self._state_to_migrate_from = stream_state["state"]
+        # Set parent state for partition routers based on parent streams
+        self._partition_router.set_initial_state(stream_state)
+    def observe(self, record: Record) -> None:
+        self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)].observe(record)
+    def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
+        return self._partition_serializer.to_partition_key(partition)
+    def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
+        return self._partition_serializer.to_partition(partition_key)
+    def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor:
+        cursor = self._cursor_factory.create(stream_state=cursor_state)
+        return cursor
+    def should_be_synced(self, record: Record) -> bool:
+        return self._get_cursor(record).should_be_synced(record)
+    def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
+        if not first.associated_slice or not second.associated_slice:
+            raise ValueError(
+                f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
+            )
+        if first.associated_slice.partition != second.associated_slice.partition:
+            raise ValueError(
+                f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
+            )
+        return self._get_cursor(first).is_greater_than_or_equal(
+            self._convert_record_to_cursor_record(first),
+            self._convert_record_to_cursor_record(second),
+        )
+    @staticmethod
+    def _convert_record_to_cursor_record(record: Record) -> Record:
+        return Record(
+            record.data,
+            StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
+            if record.associated_slice
+            else None,
+        )
+    def _get_cursor(self, record: Record) -> Cursor:
+        if not record.associated_slice:
+            raise ValueError(
+                "Invalid state as stream slices that are emitted should refer to an existing cursor"
+            )
+        partition_key = self._to_partition_key(record.associated_slice.partition)
+        if partition_key not in self._cursor_per_partition:
+            raise ValueError(
+                "Invalid state as stream slices that are emitted should refer to an existing cursor"
+            )
+        cursor = self._cursor_per_partition[partition_key]
+        return cursor

airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py CHANGED Viewed

@@ -133,8 +133,8 @@ class DatetimeBasedCursor(DeclarativeCursor):
         :param stream_state: The state of the stream as returned by get_stream_state
         """
         self._cursor = (
-            stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None
-        )  # type: ignore  # cursor_field is converted to an InterpolatedString in __post_init__
+            stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None  # type: ignore [union-attr]
+        )
     def observe(self, stream_slice: StreamSlice, record: Record) -> None:
         """
@@ -158,8 +158,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
         )
         if (
             self._is_within_daterange_boundaries(
-                record, stream_slice.get(start_field), stream_slice.get(end_field)
-            )  # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
+                record,
+                stream_slice.get(start_field),  # type: ignore [arg-type]
+                stream_slice.get(end_field),  # type: ignore [arg-type]
+            )
             and is_highest_observed_cursor_value
         ):
             self._highest_observed_cursor_field_value = record_cursor_value
@@ -368,9 +370,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
                 self._partition_field_start.eval(self.config)
             )
         if self.end_time_option and self.end_time_option.inject_into == option_type:
-            options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
+            options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(  # type: ignore [union-attr]
                 self._partition_field_end.eval(self.config)
-            )  # type: ignore # field_name is always casted to an interpolated string
+            )
         return options
     def should_be_synced(self, record: Record) -> bool:

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py CHANGED Viewed

@@ -303,6 +303,15 @@ class PerPartitionCursor(DeclarativeCursor):
             raise ValueError("A partition needs to be provided in order to get request body json")
     def should_be_synced(self, record: Record) -> bool:
+        if self._to_partition_key(record.associated_slice.partition) not in self._cursor_per_partition:
+            partition_state = (
+                self._state_to_migrate_from
+                if self._state_to_migrate_from
+                else self._NO_CURSOR_STATE
+            )
+            cursor = self._create_cursor(partition_state)
+            self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)] = cursor
         return self._get_cursor(record).should_be_synced(
             self._convert_record_to_cursor_record(record)
         )

airbyte_cdk/sources/declarative/interpolation/jinja.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import ast
 from functools import cache
-from typing import Any, Mapping, Optional, Set, Tuple, Type
+from typing import Any, Mapping, Optional, Tuple, Type
 from jinja2 import meta
 from jinja2.environment import Template
@@ -27,35 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
     def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
         if attr in ["_partition"]:
             return True
-        return super().is_safe_attribute(obj, attr, value)  # type: ignore  # for some reason, mypy says 'Returning Any from function declared to return "bool"'
-# These aliases are used to deprecate existing keywords without breaking all existing connectors.
-_ALIASES = {
-    "stream_interval": "stream_slice",  # Use stream_interval to access incremental_sync values
-    "stream_partition": "stream_slice",  # Use stream_partition to access partition router's values
-}
-# These extensions are not installed so they're not currently a problem,
-# but we're still explicitely removing them from the jinja context.
-# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
-_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"]  # Adds support for break continue in loops
-# By default, these Python builtin functions are available in the Jinja context.
-# We explicitely remove them because of the potential security risk.
-# Please add a unit test to test_jinja.py when adding a restriction.
-_RESTRICTED_BUILTIN_FUNCTIONS = [
-    "range"
-]  # The range function can cause very expensive computations
-_ENVIRONMENT = StreamPartitionAccessEnvironment()
-_ENVIRONMENT.filters.update(**filters)
-_ENVIRONMENT.globals.update(**macros)
-for extension in _RESTRICTED_EXTENSIONS:
-    _ENVIRONMENT.extensions.pop(extension, None)
-for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
-    _ENVIRONMENT.globals.pop(builtin, None)
+        return super().is_safe_attribute(obj, attr, value)
 class JinjaInterpolation(Interpolation):
@@ -76,6 +48,34 @@ class JinjaInterpolation(Interpolation):
     Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
     """
+    # These aliases are used to deprecate existing keywords without breaking all existing connectors.
+    ALIASES = {
+        "stream_interval": "stream_slice",  # Use stream_interval to access incremental_sync values
+        "stream_partition": "stream_slice",  # Use stream_partition to access partition router's values
+    }
+    # These extensions are not installed so they're not currently a problem,
+    # but we're still explicitely removing them from the jinja context.
+    # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
+    RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"]  # Adds support for break continue in loops
+    # By default, these Python builtin functions are available in the Jinja context.
+    # We explicitely remove them because of the potential security risk.
+    # Please add a unit test to test_jinja.py when adding a restriction.
+    RESTRICTED_BUILTIN_FUNCTIONS = [
+        "range"
+    ]  # The range function can cause very expensive computations
+    def __init__(self) -> None:
+        self._environment = StreamPartitionAccessEnvironment()
+        self._environment.filters.update(**filters)
+        self._environment.globals.update(**macros)
+        for extension in self.RESTRICTED_EXTENSIONS:
+            self._environment.extensions.pop(extension, None)
+        for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
+            self._environment.globals.pop(builtin, None)
     def eval(
         self,
         input_str: str,
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
     ) -> Any:
         context = {"config": config, **additional_parameters}
-        for alias, equivalent in _ALIASES.items():
+        for alias, equivalent in self.ALIASES.items():
             if alias in context:
                 # This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
                 raise ValueError(
@@ -105,7 +105,6 @@ class JinjaInterpolation(Interpolation):
                 raise Exception(f"Expected a string, got {input_str}")
         except UndefinedError:
             pass
         # If result is empty or resulted in an undefined error, evaluate and return the default string
         return self._literal_eval(self._eval(default, context), valid_types)
@@ -133,16 +132,16 @@ class JinjaInterpolation(Interpolation):
             return s
     @cache
-    def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
+    def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
         """
         Find undeclared variables and cache them
         """
-        ast = _ENVIRONMENT.parse(s)  # type: ignore # parse is able to handle None
+        ast = self._environment.parse(s)  # type: ignore # parse is able to handle None
         return meta.find_undeclared_variables(ast)
     @cache
-    def _compile(self, s: str) -> Template:
+    def _compile(self, s: Optional[str]) -> Template:
         """
         We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
         """
-        return _ENVIRONMENT.from_string(s)
+        return self._environment.from_string(s)  # type: ignore [arg-type]  # Expected `str | Template` but passed `str | None`

airbyte_cdk/sources/declarative/interpolation/macros.py CHANGED Viewed

@@ -116,7 +116,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
     Usage:
     `"{{ now_utc() - duration('P1D') }}"`
     """
-    return parse_duration(datestring)  # type: ignore # mypy thinks this returns Any for some reason
+    return parse_duration(datestring)
 def format_datetime(

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -81,6 +81,8 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import (
 )
 from airbyte_cdk.sources.declarative.incremental import (
     ChildPartitionResumableFullRefreshCursor,
+    ConcurrentCursorFactory,
+    ConcurrentPerPartitionCursor,
     CursorFactory,
     DatetimeBasedCursor,
     DeclarativeCursor,
@@ -396,7 +398,7 @@ class ModelToComponentFactory:
         self._disable_retries = disable_retries
         self._disable_cache = disable_cache
         self._disable_resumable_full_refresh = disable_resumable_full_refresh
-        self._message_repository = message_repository or InMemoryMessageRepository(  # type: ignore
+        self._message_repository = message_repository or InMemoryMessageRepository(
             self._evaluate_log_level(emit_connector_builder_messages)
         )
@@ -644,7 +646,7 @@ class ModelToComponentFactory:
             declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
             config,
             declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
-        )  # type: ignore # The retriever type was already checked
+        )
     def create_session_token_authenticator(
         self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
@@ -674,7 +676,7 @@ class ModelToComponentFactory:
             return ModelToComponentFactory.create_bearer_authenticator(
                 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
                 config,
-                token_provider=token_provider,  # type: ignore # $parameters defaults to None
+                token_provider=token_provider,
             )
         else:
             return ModelToComponentFactory.create_api_key_authenticator(
@@ -821,7 +823,6 @@ class ModelToComponentFactory:
             input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
             is_sequential_state=True,
             cursor_granularity=cursor_granularity,
-            # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
         )
         start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
@@ -894,7 +895,7 @@ class ModelToComponentFactory:
             stream_name=stream_name,
             stream_namespace=stream_namespace,
             stream_state=stream_state,
-            message_repository=self._message_repository,  # type: ignore  # message_repository is always instantiated with a value by factory
+            message_repository=self._message_repository,
             connector_state_manager=state_manager,
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
@@ -906,6 +907,62 @@ class ModelToComponentFactory:
             cursor_granularity=cursor_granularity,
         )
+    def create_concurrent_cursor_from_perpartition_cursor(
+        self,
+        state_manager: ConnectorStateManager,
+        model_type: Type[BaseModel],
+        component_definition: ComponentDefinition,
+        stream_name: str,
+        stream_namespace: Optional[str],
+        config: Config,
+        stream_state: MutableMapping[str, Any],
+        partition_router,
+            **kwargs: Any,
+    ) -> ConcurrentPerPartitionCursor:
+        component_type = component_definition.get("type")
+        if component_definition.get("type") != model_type.__name__:
+            raise ValueError(
+                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
+            )
+        datetime_based_cursor_model = model_type.parse_obj(component_definition)
+        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
+            raise ValueError(
+                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
+            )
+        interpolated_cursor_field = InterpolatedString.create(
+            datetime_based_cursor_model.cursor_field,
+            parameters=datetime_based_cursor_model.parameters or {},
+        )
+        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
+        # Create the cursor factory
+        cursor_factory = ConcurrentCursorFactory(
+            partial(
+                self.create_concurrent_cursor_from_datetime_based_cursor,
+                state_manager=state_manager,
+                model_type=model_type,
+                component_definition=component_definition,
+                stream_name=stream_name,
+                stream_namespace=stream_namespace,
+                config=config,
+            )
+        )
+        # Return the concurrent cursor and state converter
+        return ConcurrentPerPartitionCursor(
+                cursor_factory=cursor_factory,
+                partition_router=partition_router,
+                stream_name=stream_name,
+                stream_namespace=stream_namespace,
+                stream_state=stream_state,
+                message_repository=self._message_repository,  # type: ignore
+                connector_state_manager=state_manager,
+                cursor_field=cursor_field,
+            )
     @staticmethod
     def create_constant_backoff_strategy(
         model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1188,17 +1245,14 @@ class ModelToComponentFactory:
                 raise ValueError(
                     "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
                 )
+            cursor = combined_slicers if isinstance(
+                combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
+            ) else self._create_component_from_model(
+                model=model.incremental_sync, config=config
+            )
             client_side_incremental_sync = {
-                "date_time_based_cursor": self._create_component_from_model(
-                    model=model.incremental_sync, config=config
-                ),
-                "substream_cursor": (
-                    combined_slicers
-                    if isinstance(
-                        combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
-                    )
-                    else None
-                ),
+                "cursor": cursor
             }
         if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
@@ -1705,7 +1759,7 @@ class ModelToComponentFactory:
             refresh_token=model.refresh_token,
             scopes=model.scopes,
             token_expiry_date=model.token_expiry_date,
-            token_expiry_date_format=model.token_expiry_date_format,  # type: ignore
+            token_expiry_date_format=model.token_expiry_date_format,
             token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
             token_refresh_endpoint=model.token_refresh_endpoint,
             config=config,
@@ -1912,7 +1966,7 @@ class ModelToComponentFactory:
         if (
             not isinstance(stream_slicer, DatetimeBasedCursor)
             or type(stream_slicer) is not DatetimeBasedCursor
-        ):
+        ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
             # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
             # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
             # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's

airbyte-cdk 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl

airbyte-cdk 6.8.1rc9py3-none-any.whl → 6.8.2.dev1py3-none-any.whl