PyPI - airbyte-cdk - Versions diffs - 6.26.0.dev4105__py3-none-any.whl → 6.27.0__py3-none-any.whl - Mend

airbyte-cdk 6.26.0.dev4105py3-none-any.whl → 6.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -34,8 +34,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
     ModelToComponentFactory,
 )
+from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
 from airbyte_cdk.sources.declarative.requesters import HttpRequester
-from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
+from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
 from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
     DeclarativePartitionFactory,
     StreamSlicerPartitionGenerator,
@@ -48,7 +49,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
 from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
     AlwaysAvailableAvailabilityStrategy,
 )
-from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
+from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
 from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
 from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
@@ -69,6 +70,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         component_factory: Optional[ModelToComponentFactory] = None,
         **kwargs: Any,
     ) -> None:
+        # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
+        #  no longer needs to store the original incoming state. But maybe there's an edge case?
+        self._connector_state_manager = ConnectorStateManager(state=state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
         # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
         # cursors. We do this by no longer automatically instantiating RFR cursors when converting
         # the declarative models into runtime components. Concurrent sources will continue to checkpoint
@@ -76,6 +81,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         component_factory = component_factory or ModelToComponentFactory(
             emit_connector_builder_messages=emit_connector_builder_messages,
             disable_resumable_full_refresh=True,
+            connector_state_manager=self._connector_state_manager,
         )
         super().__init__(
@@ -86,10 +92,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             component_factory=component_factory,
         )
-        # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
-        #  no longer needs to store the original incoming state. But maybe there's an edge case?
-        self._state = state
         concurrency_level_from_manifest = self._source_config.get("concurrency_level")
         if concurrency_level_from_manifest:
             concurrency_level_component = self._constructor.create_component(
@@ -179,8 +181,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         concurrent_streams: List[AbstractStream] = []
         synchronous_streams: List[Stream] = []
-        state_manager = ConnectorStateManager(state=self._state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
         # Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
         # and this is validated during the initialization of the source.
         streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
@@ -220,31 +220,52 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                 if self._is_datetime_incremental_without_partition_routing(
                     declarative_stream, incremental_sync_component_definition
                 ):
-                    stream_state = state_manager.get_stream_state(
+                    stream_state = self._connector_state_manager.get_stream_state(
                         stream_name=declarative_stream.name, namespace=declarative_stream.namespace
                     )
-                    cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
-                        state_manager=state_manager,
-                        model_type=DatetimeBasedCursorModel,
-                        component_definition=incremental_sync_component_definition,  # type: ignore  # Not None because of the if condition above
-                        stream_name=declarative_stream.name,
-                        stream_namespace=declarative_stream.namespace,
-                        config=config or {},
-                        stream_state=stream_state,
-                    )
                     retriever = self._get_retriever(declarative_stream, stream_state)
-                    partition_generator = StreamSlicerPartitionGenerator(
-                        DeclarativePartitionFactory(
-                            declarative_stream.name,
-                            declarative_stream.get_json_schema(),
-                            retriever,
-                            self.message_repository,
-                        ),
-                        cursor,
-                    )
+                    if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
+                        declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
+                    ):
+                        cursor = declarative_stream.retriever.stream_slicer.stream_slicer
+                        if not isinstance(cursor, ConcurrentCursor):
+                            # This should never happen since we instantiate ConcurrentCursor in
+                            # model_to_component_factory.py
+                            raise ValueError(
+                                f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
+                            )
+                        partition_generator = StreamSlicerPartitionGenerator(
+                            partition_factory=DeclarativePartitionFactory(
+                                declarative_stream.name,
+                                declarative_stream.get_json_schema(),
+                                retriever,
+                                self.message_repository,
+                            ),
+                            stream_slicer=declarative_stream.retriever.stream_slicer,
+                        )
+                    else:
+                        cursor = (
+                            self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
+                                model_type=DatetimeBasedCursorModel,
+                                component_definition=incremental_sync_component_definition,  # type: ignore  # Not None because of the if condition above
+                                stream_name=declarative_stream.name,
+                                stream_namespace=declarative_stream.namespace,
+                                config=config or {},
+                            )
+                        )
+                        partition_generator = StreamSlicerPartitionGenerator(
+                            partition_factory=DeclarativePartitionFactory(
+                                declarative_stream.name,
+                                declarative_stream.get_json_schema(),
+                                retriever,
+                                self.message_repository,
+                            ),
+                            stream_slicer=cursor,
+                        )
                     concurrent_streams.append(
                         DefaultStream(
@@ -306,14 +327,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                         declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
                     )
                 ):
-                    stream_state = state_manager.get_stream_state(
+                    stream_state = self._connector_state_manager.get_stream_state(
                         stream_name=declarative_stream.name, namespace=declarative_stream.namespace
                     )
                     partition_router = declarative_stream.retriever.stream_slicer._partition_router
                     perpartition_cursor = (
                         self._constructor.create_concurrent_cursor_from_perpartition_cursor(
-                            state_manager=state_manager,
+                            state_manager=self._connector_state_manager,
                             model_type=DatetimeBasedCursorModel,
                             component_definition=incremental_sync_component_definition,
                             stream_name=declarative_stream.name,
@@ -369,7 +390,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                 declarative_stream=declarative_stream
             )
             and hasattr(declarative_stream.retriever, "stream_slicer")
-            and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
+            and (
+                isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
+                or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
+            )
         )
     def _stream_supports_concurrent_partition_processing(
@@ -438,8 +462,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             return False
         return True
+    @staticmethod
     def _get_retriever(
-        self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
+        declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
     ) -> Retriever:
         retriever = declarative_stream.retriever

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -1800,6 +1800,19 @@ definitions:
       $parameters:
         type: object
         additionalProperties: true
+  ComplexFieldType:
+    title: Schema Field Type
+    description: (This component is experimental. Use at your own risk.) Represents a complex field type.
+    type: object
+    required:
+      - field_type
+    properties:
+      field_type:
+        type: string
+      items:
+        anyOf:
+          - type: string
+          - "$ref": "#/definitions/ComplexFieldType"
   TypesMap:
     title: Types Map
     description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1814,6 +1827,7 @@ definitions:
           - type: array
             items:
               type: string
+          - "$ref": "#/definitions/ComplexFieldType"
       current_type:
         anyOf:
           - type: string

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py CHANGED Viewed

@@ -147,7 +147,7 @@ class ConcurrentPerPartitionCursor(Cursor):
                     < cursor.state[self.cursor_field.cursor_field_key]
                 ):
                     self._new_global_cursor = copy.deepcopy(cursor.state)
-        self._emit_state_message()
+            self._emit_state_message()
     def ensure_at_least_one_state_emitted(self) -> None:
         """
@@ -192,7 +192,8 @@ class ConcurrentPerPartitionCursor(Cursor):
                 self._global_cursor,
                 self._lookback_window if self._global_cursor else 0,
             )
-            self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
+            with self._lock:
+                self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
             self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
                 threading.Semaphore(0)
             )
@@ -210,16 +211,38 @@ class ConcurrentPerPartitionCursor(Cursor):
     def _ensure_partition_limit(self) -> None:
         """
-        Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
+        Ensure the maximum number of partitions does not exceed the predefined limit.
+        Steps:
+        1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
+           These partitions are considered processed and safe to delete.
+        2. If the limit is still exceeded and no finished partitions are available for removal,
+           remove the oldest partition unconditionally. We expect failed partitions to be removed.
+        Logging:
+        - Logs a warning each time a partition is removed, indicating whether it was finished
+          or removed due to being the oldest.
         """
-        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
-            self._over_limit += 1
-            oldest_partition = self._cursor_per_partition.popitem(last=False)[
-                0
-            ]  # Remove the oldest partition
-            logger.warning(
-                f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
-            )
+        with self._lock:
+            while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
+                # Try removing finished partitions first
+                for partition_key in list(self._cursor_per_partition.keys()):
+                    if partition_key in self._finished_partitions:
+                        oldest_partition = self._cursor_per_partition.pop(
+                            partition_key
+                        )  # Remove the oldest partition
+                        logger.warning(
+                            f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
+                        )
+                        break
+                else:
+                    # If no finished partitions can be removed, fall back to removing the oldest partition
+                    oldest_partition = self._cursor_per_partition.popitem(last=False)[
+                        1
+                    ]  # Remove the oldest partition
+                    logger.warning(
+                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
+                    )
     def _set_initial_state(self, stream_state: StreamState) -> None:
         """
@@ -264,7 +287,10 @@ class ConcurrentPerPartitionCursor(Cursor):
         if not stream_state:
             return
-        if self._PERPARTITION_STATE_KEY not in stream_state:
+        if (
+            self._PERPARTITION_STATE_KEY not in stream_state
+            and self._GLOBAL_STATE_KEY not in stream_state
+        ):
             # We assume that `stream_state` is in a global format that can be applied to all partitions.
             # Example: {"global_state_format_key": "global_state_format_value"}
             self._global_cursor = deepcopy(stream_state)
@@ -273,7 +299,7 @@ class ConcurrentPerPartitionCursor(Cursor):
         else:
             self._lookback_window = int(stream_state.get("lookback_window", 0))
-            for state in stream_state[self._PERPARTITION_STATE_KEY]:
+            for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
                 self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
                     self._create_cursor(state["cursor"])
                 )

airbyte_cdk/sources/declarative/manifest_declarative_source.py CHANGED Viewed

@@ -26,9 +26,6 @@ from airbyte_cdk.models import (
 from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
 from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
 from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    CheckStream as CheckStreamModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     DeclarativeStream as DeclarativeStreamModel,
 )

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -736,8 +736,13 @@ class HttpResponseFilter(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
+class ComplexFieldType(BaseModel):
+    field_type: str
+    items: Optional[Union[str, ComplexFieldType]] = None
 class TypesMap(BaseModel):
-    target_type: Union[str, List[str]]
+    target_type: Union[str, List[str], ComplexFieldType]
     current_type: Union[str, List[str]]
     condition: Optional[str] = None
@@ -2260,6 +2265,7 @@ class DynamicDeclarativeStream(BaseModel):
     )
+ComplexFieldType.update_forward_refs()
 CompositeErrorHandler.update_forward_refs()
 DeclarativeSource1.update_forward_refs()
 DeclarativeSource2.update_forward_refs()

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -133,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     CheckStream as CheckStreamModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    ComplexFieldType as ComplexFieldTypeModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ComponentMappingDefinition as ComponentMappingDefinitionModel,
 )
@@ -429,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
     SimpleRetrieverTestReadDecorator,
 )
 from airbyte_cdk.sources.declarative.schema import (
+    ComplexFieldType,
     DefaultSchemaLoader,
     DynamicSchemaLoader,
     InlineSchemaLoader,
@@ -503,6 +507,7 @@ class ModelToComponentFactory:
         disable_cache: bool = False,
         disable_resumable_full_refresh: bool = False,
         message_repository: Optional[MessageRepository] = None,
+        connector_state_manager: Optional[ConnectorStateManager] = None,
     ):
         self._init_mappings()
         self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
@@ -514,6 +519,7 @@ class ModelToComponentFactory:
         self._message_repository = message_repository or InMemoryMessageRepository(
             self._evaluate_log_level(emit_connector_builder_messages)
         )
+        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
     def _init_mappings(self) -> None:
         self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -572,6 +578,7 @@ class ModelToComponentFactory:
             DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
             SchemaTypeIdentifierModel: self.create_schema_type_identifier,
             TypesMapModel: self.create_types_map,
+            ComplexFieldTypeModel: self.create_complex_field_type,
             JwtAuthenticatorModel: self.create_jwt_authenticator,
             LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
             ListPartitionRouterModel: self.create_list_partition_router,
@@ -922,17 +929,24 @@ class ModelToComponentFactory:
     def create_concurrent_cursor_from_datetime_based_cursor(
         self,
-        state_manager: ConnectorStateManager,
         model_type: Type[BaseModel],
         component_definition: ComponentDefinition,
         stream_name: str,
         stream_namespace: Optional[str],
         config: Config,
-        stream_state: MutableMapping[str, Any],
         message_repository: Optional[MessageRepository] = None,
         runtime_lookback_window: Optional[datetime.timedelta] = None,
         **kwargs: Any,
     ) -> ConcurrentCursor:
+        # Per-partition incremental streams can dynamically create child cursors which will pass their current
+        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
+        # incoming state and connector_state_manager that is initialized when the component factory is created
+        stream_state = (
+            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
+            if "stream_state" not in kwargs
+            else kwargs["stream_state"]
+        )
         component_type = component_definition.get("type")
         if component_definition.get("type") != model_type.__name__:
             raise ValueError(
@@ -1126,7 +1140,7 @@ class ModelToComponentFactory:
             stream_namespace=stream_namespace,
             stream_state=stream_state,
             message_repository=message_repository or self._message_repository,
-            connector_state_manager=state_manager,
+            connector_state_manager=self._connector_state_manager,
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
             slice_boundary_fields=slice_boundary_fields,
@@ -1676,6 +1690,22 @@ class ModelToComponentFactory:
                     stream_cursor=cursor_component,
                 )
         elif model.incremental_sync:
+            if model.retriever.type == "AsyncRetriever":
+                if model.incremental_sync.type != "DatetimeBasedCursor":
+                    # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
+                    raise ValueError(
+                        "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
+                    )
+                if model.retriever.partition_router:
+                    # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
+                    raise ValueError("Per partition state is not supported yet for AsyncRetriever")
+                return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
+                    model_type=DatetimeBasedCursorModel,
+                    component_definition=model.incremental_sync.__dict__,
+                    stream_name=model.name or "",
+                    stream_namespace=None,
+                    config=config or {},
+                )
             return (
                 self._create_component_from_model(model=model.incremental_sync, config=config)
                 if model.incremental_sync
@@ -1894,10 +1924,26 @@ class ModelToComponentFactory:
     ) -> InlineSchemaLoader:
         return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
-    @staticmethod
-    def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
+    def create_complex_field_type(
+        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
+    ) -> ComplexFieldType:
+        items = (
+            self._create_component_from_model(model=model.items, config=config)
+            if isinstance(model.items, ComplexFieldTypeModel)
+            else model.items
+        )
+        return ComplexFieldType(field_type=model.field_type, items=items)
+    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
+        target_type = (
+            self._create_component_from_model(model=model.target_type, config=config)
+            if isinstance(model.target_type, ComplexFieldTypeModel)
+            else model.target_type
+        )
         return TypesMap(
-            target_type=model.target_type,
+            target_type=target_type,
             current_type=model.current_type,
             condition=model.condition if model.condition is not None else "True",
         )

airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py CHANGED Viewed

@@ -295,28 +295,58 @@ class SubstreamPartitionRouter(PartitionRouter):
             return
         if not parent_state and incremental_dependency:
-            # Attempt to retrieve child state
-            substream_state_values = list(stream_state.values())
-            substream_state = substream_state_values[0] if substream_state_values else {}
-            # Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
-            if isinstance(substream_state, (list, dict)):
-                substream_state = {}
-            parent_state = {}
-            # Copy child state to parent streams with incremental dependencies
-            if substream_state:
-                for parent_config in self.parent_stream_configs:
-                    if parent_config.incremental_dependency:
-                        parent_state[parent_config.stream.name] = {
-                            parent_config.stream.cursor_field: substream_state
-                        }
+            # Migrate child state to parent state format
+            parent_state = self._migrate_child_state_to_parent_state(stream_state)
         # Set state for each parent stream with an incremental dependency
         for parent_config in self.parent_stream_configs:
             if parent_config.incremental_dependency:
                 parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
+    def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
+        """
+        Migrate the child stream state to the parent stream's state format.
+        This method converts the global or child state into a format compatible with parent
+        streams. The migration occurs only for parent streams with incremental dependencies.
+        The method filters out per-partition states and retains only the global state in the
+        format `{cursor_field: cursor_value}`.
+        Args:
+            stream_state (StreamState): The state to migrate. Expected formats include:
+                - {"updated_at": "2023-05-27T00:00:00Z"}
+                - {"states": [...] } (ignored during migration)
+        Returns:
+            StreamState: A migrated state for parent streams in the format:
+                {
+                    "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
+                }
+        Example:
+            Input: {"updated_at": "2023-05-27T00:00:00Z"}
+            Output: {
+                "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
+            }
+        """
+        substream_state_values = list(stream_state.values())
+        substream_state = substream_state_values[0] if substream_state_values else {}
+        # Ignore per-partition states or invalid formats
+        if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
+            return {}
+        # Copy child state to parent streams with incremental dependencies
+        parent_state = {}
+        if substream_state:
+            for parent_config in self.parent_stream_configs:
+                if parent_config.incremental_dependency:
+                    parent_state[parent_config.stream.name] = {
+                        parent_config.stream.cursor_field: substream_state
+                    }
+        return parent_state
     def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
         """
         Get the state of the parent streams.

airbyte_cdk/sources/declarative/retrievers/async_retriever.py CHANGED Viewed

@@ -75,7 +75,7 @@ class AsyncRetriever(Retriever):
         """
         if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
             raise AirbyteTracedException(
-                message="Invalid arguments to AsyncJobRetriever.read_records: stream_slice is no optional. Please contact Airbyte Support",
+                message="Invalid arguments to AsyncRetriever.read_records: stream_slice is not optional. Please contact Airbyte Support",
                 failure_type=FailureType.system_error,
             )
         return stream_slice["partition"]  # type: ignore  # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices

airbyte_cdk/sources/declarative/schema/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@
 from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
 from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
+    ComplexFieldType,
     DynamicSchemaLoader,
     SchemaTypeIdentifier,
     TypesMap,
@@ -18,6 +19,7 @@ __all__ = [
     "SchemaLoader",
     "InlineSchemaLoader",
     "DynamicSchemaLoader",
+    "ComplexFieldType",
     "TypesMap",
     "SchemaTypeIdentifier",
 ]

airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py CHANGED Viewed

@@ -18,7 +18,7 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
 from airbyte_cdk.sources.source import ExperimentalClassWarning
 from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
-AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
+AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
     "string": {"type": ["null", "string"]},
     "boolean": {"type": ["null", "boolean"]},
     "date": {"type": ["null", "string"], "format": "date"},
@@ -45,6 +45,25 @@ AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
 }
+@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
+@dataclass(frozen=True)
+class ComplexFieldType:
+    """
+    Identifies complex field type
+    """
+    field_type: str
+    items: Optional[Union[str, "ComplexFieldType"]] = None
+    def __post_init__(self) -> None:
+        """
+        Enforces that `items` is only used when `field_type` is a array
+        """
+        # `items_type` is valid only for array target types
+        if self.items and self.field_type != "array":
+            raise ValueError("'items' can only be used when 'field_type' is an array.")
 @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
 @dataclass(frozen=True)
 class TypesMap:
@@ -52,7 +71,7 @@ class TypesMap:
     Represents a mapping between a current type and its corresponding target type.
     """
-    target_type: Union[List[str], str]
+    target_type: Union[List[str], str, ComplexFieldType]
     current_type: Union[List[str], str]
     condition: Optional[str]
@@ -135,8 +154,9 @@ class DynamicSchemaLoader(SchemaLoader):
         transformed_properties = self._transform(properties, {})
         return {
-            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$schema": "https://json-schema.org/draft-07/schema#",
             "type": "object",
+            "additionalProperties": True,
             "properties": transformed_properties,
         }
@@ -188,18 +208,36 @@ class DynamicSchemaLoader(SchemaLoader):
             first_type = self._get_airbyte_type(mapped_field_type[0])
             second_type = self._get_airbyte_type(mapped_field_type[1])
             return {"oneOf": [first_type, second_type]}
         elif isinstance(mapped_field_type, str):
             return self._get_airbyte_type(mapped_field_type)
+        elif isinstance(mapped_field_type, ComplexFieldType):
+            return self._resolve_complex_type(mapped_field_type)
         else:
             raise ValueError(
                 f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
             )
+    def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
+        if not complex_type.items:
+            return self._get_airbyte_type(complex_type.field_type)
+        field_type = self._get_airbyte_type(complex_type.field_type)
+        field_type["items"] = (
+            self._get_airbyte_type(complex_type.items)
+            if isinstance(complex_type.items, str)
+            else self._resolve_complex_type(complex_type.items)
+        )
+        return field_type
     def _replace_type_if_not_valid(
         self,
         field_type: Union[List[str], str],
         raw_schema: MutableMapping[str, Any],
-    ) -> Union[List[str], str]:
+    ) -> Union[List[str], str, ComplexFieldType]:
         """
         Replaces a field type if it matches a type mapping in `types_map`.
         """
@@ -216,7 +254,7 @@ class DynamicSchemaLoader(SchemaLoader):
         return field_type
     @staticmethod
-    def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
+    def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
         """
         Maps a field type to its corresponding Airbyte type definition.
         """

airbyte-cdk 6.26.0.dev4105__py3-none-any.whl → 6.27.0__py3-none-any.whl

airbyte-cdk 6.26.0.dev4105py3-none-any.whl → 6.27.0py3-none-any.whl