airbyte-cdk 6.26.0.dev4105__py3-none-any.whl → 6.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +57 -32
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +14 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +39 -13
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +7 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +52 -6
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +46 -16
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +43 -5
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -10
- airbyte_cdk/sources/file_based/file_based_source.py +1 -44
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +0 -33
- airbyte_cdk/sources/file_based/schema_helpers.py +0 -25
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +0 -29
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/utils/slice_hasher.py +8 -1
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.27.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.27.0.dist-info}/RECORD +23 -25
- airbyte_cdk/sources/file_based/config/permissions.py +0 -34
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -96
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.27.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.27.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.27.0.dist-info}/entry_points.txt +0 -0
| @@ -34,8 +34,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import | |
| 34 34 | 
             
            from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
         | 
| 35 35 | 
             
                ModelToComponentFactory,
         | 
| 36 36 | 
             
            )
         | 
| 37 | 
            +
            from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
         | 
| 37 38 | 
             
            from airbyte_cdk.sources.declarative.requesters import HttpRequester
         | 
| 38 | 
            -
            from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
         | 
| 39 | 
            +
            from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
         | 
| 39 40 | 
             
            from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
         | 
| 40 41 | 
             
                DeclarativePartitionFactory,
         | 
| 41 42 | 
             
                StreamSlicerPartitionGenerator,
         | 
| @@ -48,7 +49,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea | |
| 48 49 | 
             
            from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
         | 
| 49 50 | 
             
                AlwaysAvailableAvailabilityStrategy,
         | 
| 50 51 | 
             
            )
         | 
| 51 | 
            -
            from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
         | 
| 52 | 
            +
            from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
         | 
| 52 53 | 
             
            from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
         | 
| 53 54 | 
             
            from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
         | 
| 54 55 |  | 
| @@ -69,6 +70,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 69 70 | 
             
                    component_factory: Optional[ModelToComponentFactory] = None,
         | 
| 70 71 | 
             
                    **kwargs: Any,
         | 
| 71 72 | 
             
                ) -> None:
         | 
| 73 | 
            +
                    # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
         | 
| 74 | 
            +
                    #  no longer needs to store the original incoming state. But maybe there's an edge case?
         | 
| 75 | 
            +
                    self._connector_state_manager = ConnectorStateManager(state=state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
         | 
| 76 | 
            +
             | 
| 72 77 | 
             
                    # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
         | 
| 73 78 | 
             
                    # cursors. We do this by no longer automatically instantiating RFR cursors when converting
         | 
| 74 79 | 
             
                    # the declarative models into runtime components. Concurrent sources will continue to checkpoint
         | 
| @@ -76,6 +81,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 76 81 | 
             
                    component_factory = component_factory or ModelToComponentFactory(
         | 
| 77 82 | 
             
                        emit_connector_builder_messages=emit_connector_builder_messages,
         | 
| 78 83 | 
             
                        disable_resumable_full_refresh=True,
         | 
| 84 | 
            +
                        connector_state_manager=self._connector_state_manager,
         | 
| 79 85 | 
             
                    )
         | 
| 80 86 |  | 
| 81 87 | 
             
                    super().__init__(
         | 
| @@ -86,10 +92,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 86 92 | 
             
                        component_factory=component_factory,
         | 
| 87 93 | 
             
                    )
         | 
| 88 94 |  | 
| 89 | 
            -
                    # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
         | 
| 90 | 
            -
                    #  no longer needs to store the original incoming state. But maybe there's an edge case?
         | 
| 91 | 
            -
                    self._state = state
         | 
| 92 | 
            -
             | 
| 93 95 | 
             
                    concurrency_level_from_manifest = self._source_config.get("concurrency_level")
         | 
| 94 96 | 
             
                    if concurrency_level_from_manifest:
         | 
| 95 97 | 
             
                        concurrency_level_component = self._constructor.create_component(
         | 
| @@ -179,8 +181,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 179 181 | 
             
                    concurrent_streams: List[AbstractStream] = []
         | 
| 180 182 | 
             
                    synchronous_streams: List[Stream] = []
         | 
| 181 183 |  | 
| 182 | 
            -
                    state_manager = ConnectorStateManager(state=self._state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
         | 
| 183 | 
            -
             | 
| 184 184 | 
             
                    # Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
         | 
| 185 185 | 
             
                    # and this is validated during the initialization of the source.
         | 
| 186 186 | 
             
                    streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
         | 
| @@ -220,31 +220,52 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 220 220 | 
             
                            if self._is_datetime_incremental_without_partition_routing(
         | 
| 221 221 | 
             
                                declarative_stream, incremental_sync_component_definition
         | 
| 222 222 | 
             
                            ):
         | 
| 223 | 
            -
                                stream_state =  | 
| 223 | 
            +
                                stream_state = self._connector_state_manager.get_stream_state(
         | 
| 224 224 | 
             
                                    stream_name=declarative_stream.name, namespace=declarative_stream.namespace
         | 
| 225 225 | 
             
                                )
         | 
| 226 226 |  | 
| 227 | 
            -
                                cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
         | 
| 228 | 
            -
                                    state_manager=state_manager,
         | 
| 229 | 
            -
                                    model_type=DatetimeBasedCursorModel,
         | 
| 230 | 
            -
                                    component_definition=incremental_sync_component_definition,  # type: ignore  # Not None because of the if condition above
         | 
| 231 | 
            -
                                    stream_name=declarative_stream.name,
         | 
| 232 | 
            -
                                    stream_namespace=declarative_stream.namespace,
         | 
| 233 | 
            -
                                    config=config or {},
         | 
| 234 | 
            -
                                    stream_state=stream_state,
         | 
| 235 | 
            -
                                )
         | 
| 236 | 
            -
             | 
| 237 227 | 
             
                                retriever = self._get_retriever(declarative_stream, stream_state)
         | 
| 238 228 |  | 
| 239 | 
            -
                                 | 
| 240 | 
            -
                                     | 
| 241 | 
            -
             | 
| 242 | 
            -
             | 
| 243 | 
            -
             | 
| 244 | 
            -
             | 
| 245 | 
            -
             | 
| 246 | 
            -
             | 
| 247 | 
            -
             | 
| 229 | 
            +
                                if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
         | 
| 230 | 
            +
                                    declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
         | 
| 231 | 
            +
                                ):
         | 
| 232 | 
            +
                                    cursor = declarative_stream.retriever.stream_slicer.stream_slicer
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                                    if not isinstance(cursor, ConcurrentCursor):
         | 
| 235 | 
            +
                                        # This should never happen since we instantiate ConcurrentCursor in
         | 
| 236 | 
            +
                                        # model_to_component_factory.py
         | 
| 237 | 
            +
                                        raise ValueError(
         | 
| 238 | 
            +
                                            f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
         | 
| 239 | 
            +
                                        )
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                                    partition_generator = StreamSlicerPartitionGenerator(
         | 
| 242 | 
            +
                                        partition_factory=DeclarativePartitionFactory(
         | 
| 243 | 
            +
                                            declarative_stream.name,
         | 
| 244 | 
            +
                                            declarative_stream.get_json_schema(),
         | 
| 245 | 
            +
                                            retriever,
         | 
| 246 | 
            +
                                            self.message_repository,
         | 
| 247 | 
            +
                                        ),
         | 
| 248 | 
            +
                                        stream_slicer=declarative_stream.retriever.stream_slicer,
         | 
| 249 | 
            +
                                    )
         | 
| 250 | 
            +
                                else:
         | 
| 251 | 
            +
                                    cursor = (
         | 
| 252 | 
            +
                                        self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
         | 
| 253 | 
            +
                                            model_type=DatetimeBasedCursorModel,
         | 
| 254 | 
            +
                                            component_definition=incremental_sync_component_definition,  # type: ignore  # Not None because of the if condition above
         | 
| 255 | 
            +
                                            stream_name=declarative_stream.name,
         | 
| 256 | 
            +
                                            stream_namespace=declarative_stream.namespace,
         | 
| 257 | 
            +
                                            config=config or {},
         | 
| 258 | 
            +
                                        )
         | 
| 259 | 
            +
                                    )
         | 
| 260 | 
            +
                                    partition_generator = StreamSlicerPartitionGenerator(
         | 
| 261 | 
            +
                                        partition_factory=DeclarativePartitionFactory(
         | 
| 262 | 
            +
                                            declarative_stream.name,
         | 
| 263 | 
            +
                                            declarative_stream.get_json_schema(),
         | 
| 264 | 
            +
                                            retriever,
         | 
| 265 | 
            +
                                            self.message_repository,
         | 
| 266 | 
            +
                                        ),
         | 
| 267 | 
            +
                                        stream_slicer=cursor,
         | 
| 268 | 
            +
                                    )
         | 
| 248 269 |  | 
| 249 270 | 
             
                                concurrent_streams.append(
         | 
| 250 271 | 
             
                                    DefaultStream(
         | 
| @@ -306,14 +327,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 306 327 | 
             
                                    declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
         | 
| 307 328 | 
             
                                )
         | 
| 308 329 | 
             
                            ):
         | 
| 309 | 
            -
                                stream_state =  | 
| 330 | 
            +
                                stream_state = self._connector_state_manager.get_stream_state(
         | 
| 310 331 | 
             
                                    stream_name=declarative_stream.name, namespace=declarative_stream.namespace
         | 
| 311 332 | 
             
                                )
         | 
| 312 333 | 
             
                                partition_router = declarative_stream.retriever.stream_slicer._partition_router
         | 
| 313 334 |  | 
| 314 335 | 
             
                                perpartition_cursor = (
         | 
| 315 336 | 
             
                                    self._constructor.create_concurrent_cursor_from_perpartition_cursor(
         | 
| 316 | 
            -
                                        state_manager= | 
| 337 | 
            +
                                        state_manager=self._connector_state_manager,
         | 
| 317 338 | 
             
                                        model_type=DatetimeBasedCursorModel,
         | 
| 318 339 | 
             
                                        component_definition=incremental_sync_component_definition,
         | 
| 319 340 | 
             
                                        stream_name=declarative_stream.name,
         | 
| @@ -369,7 +390,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 369 390 | 
             
                            declarative_stream=declarative_stream
         | 
| 370 391 | 
             
                        )
         | 
| 371 392 | 
             
                        and hasattr(declarative_stream.retriever, "stream_slicer")
         | 
| 372 | 
            -
                        and  | 
| 393 | 
            +
                        and (
         | 
| 394 | 
            +
                            isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
         | 
| 395 | 
            +
                            or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
         | 
| 396 | 
            +
                        )
         | 
| 373 397 | 
             
                    )
         | 
| 374 398 |  | 
| 375 399 | 
             
                def _stream_supports_concurrent_partition_processing(
         | 
| @@ -438,8 +462,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 438 462 | 
             
                                        return False
         | 
| 439 463 | 
             
                    return True
         | 
| 440 464 |  | 
| 465 | 
            +
                @staticmethod
         | 
| 441 466 | 
             
                def _get_retriever(
         | 
| 442 | 
            -
                     | 
| 467 | 
            +
                    declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
         | 
| 443 468 | 
             
                ) -> Retriever:
         | 
| 444 469 | 
             
                    retriever = declarative_stream.retriever
         | 
| 445 470 |  | 
| @@ -1800,6 +1800,19 @@ definitions: | |
| 1800 1800 | 
             
                  $parameters:
         | 
| 1801 1801 | 
             
                    type: object
         | 
| 1802 1802 | 
             
                    additionalProperties: true
         | 
| 1803 | 
            +
              ComplexFieldType:
         | 
| 1804 | 
            +
                title: Schema Field Type
         | 
| 1805 | 
            +
                description: (This component is experimental. Use at your own risk.) Represents a complex field type.
         | 
| 1806 | 
            +
                type: object
         | 
| 1807 | 
            +
                required:
         | 
| 1808 | 
            +
                  - field_type
         | 
| 1809 | 
            +
                properties:
         | 
| 1810 | 
            +
                  field_type:
         | 
| 1811 | 
            +
                    type: string
         | 
| 1812 | 
            +
                  items:
         | 
| 1813 | 
            +
                    anyOf:
         | 
| 1814 | 
            +
                      - type: string
         | 
| 1815 | 
            +
                      - "$ref": "#/definitions/ComplexFieldType"
         | 
| 1803 1816 | 
             
              TypesMap:
         | 
| 1804 1817 | 
             
                title: Types Map
         | 
| 1805 1818 | 
             
                description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
         | 
| @@ -1814,6 +1827,7 @@ definitions: | |
| 1814 1827 | 
             
                      - type: array
         | 
| 1815 1828 | 
             
                        items:
         | 
| 1816 1829 | 
             
                          type: string
         | 
| 1830 | 
            +
                      - "$ref": "#/definitions/ComplexFieldType"
         | 
| 1817 1831 | 
             
                  current_type:
         | 
| 1818 1832 | 
             
                    anyOf:
         | 
| 1819 1833 | 
             
                      - type: string
         | 
| @@ -147,7 +147,7 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 147 147 | 
             
                                < cursor.state[self.cursor_field.cursor_field_key]
         | 
| 148 148 | 
             
                            ):
         | 
| 149 149 | 
             
                                self._new_global_cursor = copy.deepcopy(cursor.state)
         | 
| 150 | 
            -
             | 
| 150 | 
            +
                        self._emit_state_message()
         | 
| 151 151 |  | 
| 152 152 | 
             
                def ensure_at_least_one_state_emitted(self) -> None:
         | 
| 153 153 | 
             
                    """
         | 
| @@ -192,7 +192,8 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 192 192 | 
             
                            self._global_cursor,
         | 
| 193 193 | 
             
                            self._lookback_window if self._global_cursor else 0,
         | 
| 194 194 | 
             
                        )
         | 
| 195 | 
            -
                        self. | 
| 195 | 
            +
                        with self._lock:
         | 
| 196 | 
            +
                            self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
         | 
| 196 197 | 
             
                        self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
         | 
| 197 198 | 
             
                            threading.Semaphore(0)
         | 
| 198 199 | 
             
                        )
         | 
| @@ -210,16 +211,38 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 210 211 |  | 
| 211 212 | 
             
                def _ensure_partition_limit(self) -> None:
         | 
| 212 213 | 
             
                    """
         | 
| 213 | 
            -
                    Ensure the maximum number of partitions  | 
| 214 | 
            +
                    Ensure the maximum number of partitions does not exceed the predefined limit.
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                    Steps:
         | 
| 217 | 
            +
                    1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
         | 
| 218 | 
            +
                       These partitions are considered processed and safe to delete.
         | 
| 219 | 
            +
                    2. If the limit is still exceeded and no finished partitions are available for removal,
         | 
| 220 | 
            +
                       remove the oldest partition unconditionally. We expect failed partitions to be removed.
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                    Logging:
         | 
| 223 | 
            +
                    - Logs a warning each time a partition is removed, indicating whether it was finished
         | 
| 224 | 
            +
                      or removed due to being the oldest.
         | 
| 214 225 | 
             
                    """
         | 
| 215 | 
            -
                     | 
| 216 | 
            -
                        self. | 
| 217 | 
            -
             | 
| 218 | 
            -
                             | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 226 | 
            +
                    with self._lock:
         | 
| 227 | 
            +
                        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
         | 
| 228 | 
            +
                            # Try removing finished partitions first
         | 
| 229 | 
            +
                            for partition_key in list(self._cursor_per_partition.keys()):
         | 
| 230 | 
            +
                                if partition_key in self._finished_partitions:
         | 
| 231 | 
            +
                                    oldest_partition = self._cursor_per_partition.pop(
         | 
| 232 | 
            +
                                        partition_key
         | 
| 233 | 
            +
                                    )  # Remove the oldest partition
         | 
| 234 | 
            +
                                    logger.warning(
         | 
| 235 | 
            +
                                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
         | 
| 236 | 
            +
                                    )
         | 
| 237 | 
            +
                                    break
         | 
| 238 | 
            +
                            else:
         | 
| 239 | 
            +
                                # If no finished partitions can be removed, fall back to removing the oldest partition
         | 
| 240 | 
            +
                                oldest_partition = self._cursor_per_partition.popitem(last=False)[
         | 
| 241 | 
            +
                                    1
         | 
| 242 | 
            +
                                ]  # Remove the oldest partition
         | 
| 243 | 
            +
                                logger.warning(
         | 
| 244 | 
            +
                                    f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
         | 
| 245 | 
            +
                                )
         | 
| 223 246 |  | 
| 224 247 | 
             
                def _set_initial_state(self, stream_state: StreamState) -> None:
         | 
| 225 248 | 
             
                    """
         | 
| @@ -264,7 +287,10 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 264 287 | 
             
                    if not stream_state:
         | 
| 265 288 | 
             
                        return
         | 
| 266 289 |  | 
| 267 | 
            -
                    if  | 
| 290 | 
            +
                    if (
         | 
| 291 | 
            +
                        self._PERPARTITION_STATE_KEY not in stream_state
         | 
| 292 | 
            +
                        and self._GLOBAL_STATE_KEY not in stream_state
         | 
| 293 | 
            +
                    ):
         | 
| 268 294 | 
             
                        # We assume that `stream_state` is in a global format that can be applied to all partitions.
         | 
| 269 295 | 
             
                        # Example: {"global_state_format_key": "global_state_format_value"}
         | 
| 270 296 | 
             
                        self._global_cursor = deepcopy(stream_state)
         | 
| @@ -273,7 +299,7 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 273 299 | 
             
                    else:
         | 
| 274 300 | 
             
                        self._lookback_window = int(stream_state.get("lookback_window", 0))
         | 
| 275 301 |  | 
| 276 | 
            -
                        for state in stream_state | 
| 302 | 
            +
                        for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
         | 
| 277 303 | 
             
                            self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
         | 
| 278 304 | 
             
                                self._create_cursor(state["cursor"])
         | 
| 279 305 | 
             
                            )
         | 
| @@ -26,9 +26,6 @@ from airbyte_cdk.models import ( | |
| 26 26 | 
             
            from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
         | 
| 27 27 | 
             
            from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
         | 
| 28 28 | 
             
            from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
         | 
| 29 | 
            -
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| 30 | 
            -
                CheckStream as CheckStreamModel,
         | 
| 31 | 
            -
            )
         | 
| 32 29 | 
             
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| 33 30 | 
             
                DeclarativeStream as DeclarativeStreamModel,
         | 
| 34 31 | 
             
            )
         | 
| @@ -736,8 +736,13 @@ class HttpResponseFilter(BaseModel): | |
| 736 736 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 737 737 |  | 
| 738 738 |  | 
| 739 | 
            +
            class ComplexFieldType(BaseModel):
         | 
| 740 | 
            +
                field_type: str
         | 
| 741 | 
            +
                items: Optional[Union[str, ComplexFieldType]] = None
         | 
| 742 | 
            +
             | 
| 743 | 
            +
             | 
| 739 744 | 
             
            class TypesMap(BaseModel):
         | 
| 740 | 
            -
                target_type: Union[str, List[str]]
         | 
| 745 | 
            +
                target_type: Union[str, List[str], ComplexFieldType]
         | 
| 741 746 | 
             
                current_type: Union[str, List[str]]
         | 
| 742 747 | 
             
                condition: Optional[str] = None
         | 
| 743 748 |  | 
| @@ -2260,6 +2265,7 @@ class DynamicDeclarativeStream(BaseModel): | |
| 2260 2265 | 
             
                )
         | 
| 2261 2266 |  | 
| 2262 2267 |  | 
| 2268 | 
            +
            ComplexFieldType.update_forward_refs()
         | 
| 2263 2269 | 
             
            CompositeErrorHandler.update_forward_refs()
         | 
| 2264 2270 | 
             
            DeclarativeSource1.update_forward_refs()
         | 
| 2265 2271 | 
             
            DeclarativeSource2.update_forward_refs()
         | 
| @@ -133,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import | |
| 133 133 | 
             
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| 134 134 | 
             
                CheckStream as CheckStreamModel,
         | 
| 135 135 | 
             
            )
         | 
| 136 | 
            +
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| 137 | 
            +
                ComplexFieldType as ComplexFieldTypeModel,
         | 
| 138 | 
            +
            )
         | 
| 136 139 | 
             
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| 137 140 | 
             
                ComponentMappingDefinition as ComponentMappingDefinitionModel,
         | 
| 138 141 | 
             
            )
         | 
| @@ -429,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import ( | |
| 429 432 | 
             
                SimpleRetrieverTestReadDecorator,
         | 
| 430 433 | 
             
            )
         | 
| 431 434 | 
             
            from airbyte_cdk.sources.declarative.schema import (
         | 
| 435 | 
            +
                ComplexFieldType,
         | 
| 432 436 | 
             
                DefaultSchemaLoader,
         | 
| 433 437 | 
             
                DynamicSchemaLoader,
         | 
| 434 438 | 
             
                InlineSchemaLoader,
         | 
| @@ -503,6 +507,7 @@ class ModelToComponentFactory: | |
| 503 507 | 
             
                    disable_cache: bool = False,
         | 
| 504 508 | 
             
                    disable_resumable_full_refresh: bool = False,
         | 
| 505 509 | 
             
                    message_repository: Optional[MessageRepository] = None,
         | 
| 510 | 
            +
                    connector_state_manager: Optional[ConnectorStateManager] = None,
         | 
| 506 511 | 
             
                ):
         | 
| 507 512 | 
             
                    self._init_mappings()
         | 
| 508 513 | 
             
                    self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
         | 
| @@ -514,6 +519,7 @@ class ModelToComponentFactory: | |
| 514 519 | 
             
                    self._message_repository = message_repository or InMemoryMessageRepository(
         | 
| 515 520 | 
             
                        self._evaluate_log_level(emit_connector_builder_messages)
         | 
| 516 521 | 
             
                    )
         | 
| 522 | 
            +
                    self._connector_state_manager = connector_state_manager or ConnectorStateManager()
         | 
| 517 523 |  | 
| 518 524 | 
             
                def _init_mappings(self) -> None:
         | 
| 519 525 | 
             
                    self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
         | 
| @@ -572,6 +578,7 @@ class ModelToComponentFactory: | |
| 572 578 | 
             
                        DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
         | 
| 573 579 | 
             
                        SchemaTypeIdentifierModel: self.create_schema_type_identifier,
         | 
| 574 580 | 
             
                        TypesMapModel: self.create_types_map,
         | 
| 581 | 
            +
                        ComplexFieldTypeModel: self.create_complex_field_type,
         | 
| 575 582 | 
             
                        JwtAuthenticatorModel: self.create_jwt_authenticator,
         | 
| 576 583 | 
             
                        LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
         | 
| 577 584 | 
             
                        ListPartitionRouterModel: self.create_list_partition_router,
         | 
| @@ -922,17 +929,24 @@ class ModelToComponentFactory: | |
| 922 929 |  | 
| 923 930 | 
             
                def create_concurrent_cursor_from_datetime_based_cursor(
         | 
| 924 931 | 
             
                    self,
         | 
| 925 | 
            -
                    state_manager: ConnectorStateManager,
         | 
| 926 932 | 
             
                    model_type: Type[BaseModel],
         | 
| 927 933 | 
             
                    component_definition: ComponentDefinition,
         | 
| 928 934 | 
             
                    stream_name: str,
         | 
| 929 935 | 
             
                    stream_namespace: Optional[str],
         | 
| 930 936 | 
             
                    config: Config,
         | 
| 931 | 
            -
                    stream_state: MutableMapping[str, Any],
         | 
| 932 937 | 
             
                    message_repository: Optional[MessageRepository] = None,
         | 
| 933 938 | 
             
                    runtime_lookback_window: Optional[datetime.timedelta] = None,
         | 
| 934 939 | 
             
                    **kwargs: Any,
         | 
| 935 940 | 
             
                ) -> ConcurrentCursor:
         | 
| 941 | 
            +
                    # Per-partition incremental streams can dynamically create child cursors which will pass their current
         | 
| 942 | 
            +
                    # state via the stream_state keyword argument. Incremental syncs without parent streams use the
         | 
| 943 | 
            +
                    # incoming state and connector_state_manager that is initialized when the component factory is created
         | 
| 944 | 
            +
                    stream_state = (
         | 
| 945 | 
            +
                        self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
         | 
| 946 | 
            +
                        if "stream_state" not in kwargs
         | 
| 947 | 
            +
                        else kwargs["stream_state"]
         | 
| 948 | 
            +
                    )
         | 
| 949 | 
            +
             | 
| 936 950 | 
             
                    component_type = component_definition.get("type")
         | 
| 937 951 | 
             
                    if component_definition.get("type") != model_type.__name__:
         | 
| 938 952 | 
             
                        raise ValueError(
         | 
| @@ -1126,7 +1140,7 @@ class ModelToComponentFactory: | |
| 1126 1140 | 
             
                        stream_namespace=stream_namespace,
         | 
| 1127 1141 | 
             
                        stream_state=stream_state,
         | 
| 1128 1142 | 
             
                        message_repository=message_repository or self._message_repository,
         | 
| 1129 | 
            -
                        connector_state_manager= | 
| 1143 | 
            +
                        connector_state_manager=self._connector_state_manager,
         | 
| 1130 1144 | 
             
                        connector_state_converter=connector_state_converter,
         | 
| 1131 1145 | 
             
                        cursor_field=cursor_field,
         | 
| 1132 1146 | 
             
                        slice_boundary_fields=slice_boundary_fields,
         | 
| @@ -1676,6 +1690,22 @@ class ModelToComponentFactory: | |
| 1676 1690 | 
             
                                stream_cursor=cursor_component,
         | 
| 1677 1691 | 
             
                            )
         | 
| 1678 1692 | 
             
                    elif model.incremental_sync:
         | 
| 1693 | 
            +
                        if model.retriever.type == "AsyncRetriever":
         | 
| 1694 | 
            +
                            if model.incremental_sync.type != "DatetimeBasedCursor":
         | 
| 1695 | 
            +
                                # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
         | 
| 1696 | 
            +
                                raise ValueError(
         | 
| 1697 | 
            +
                                    "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
         | 
| 1698 | 
            +
                                )
         | 
| 1699 | 
            +
                            if model.retriever.partition_router:
         | 
| 1700 | 
            +
                                # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
         | 
| 1701 | 
            +
                                raise ValueError("Per partition state is not supported yet for AsyncRetriever")
         | 
| 1702 | 
            +
                            return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
         | 
| 1703 | 
            +
                                model_type=DatetimeBasedCursorModel,
         | 
| 1704 | 
            +
                                component_definition=model.incremental_sync.__dict__,
         | 
| 1705 | 
            +
                                stream_name=model.name or "",
         | 
| 1706 | 
            +
                                stream_namespace=None,
         | 
| 1707 | 
            +
                                config=config or {},
         | 
| 1708 | 
            +
                            )
         | 
| 1679 1709 | 
             
                        return (
         | 
| 1680 1710 | 
             
                            self._create_component_from_model(model=model.incremental_sync, config=config)
         | 
| 1681 1711 | 
             
                            if model.incremental_sync
         | 
| @@ -1894,10 +1924,26 @@ class ModelToComponentFactory: | |
| 1894 1924 | 
             
                ) -> InlineSchemaLoader:
         | 
| 1895 1925 | 
             
                    return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
         | 
| 1896 1926 |  | 
| 1897 | 
            -
                 | 
| 1898 | 
            -
             | 
| 1927 | 
            +
                def create_complex_field_type(
         | 
| 1928 | 
            +
                    self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
         | 
| 1929 | 
            +
                ) -> ComplexFieldType:
         | 
| 1930 | 
            +
                    items = (
         | 
| 1931 | 
            +
                        self._create_component_from_model(model=model.items, config=config)
         | 
| 1932 | 
            +
                        if isinstance(model.items, ComplexFieldTypeModel)
         | 
| 1933 | 
            +
                        else model.items
         | 
| 1934 | 
            +
                    )
         | 
| 1935 | 
            +
             | 
| 1936 | 
            +
                    return ComplexFieldType(field_type=model.field_type, items=items)
         | 
| 1937 | 
            +
             | 
| 1938 | 
            +
                def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
         | 
| 1939 | 
            +
                    target_type = (
         | 
| 1940 | 
            +
                        self._create_component_from_model(model=model.target_type, config=config)
         | 
| 1941 | 
            +
                        if isinstance(model.target_type, ComplexFieldTypeModel)
         | 
| 1942 | 
            +
                        else model.target_type
         | 
| 1943 | 
            +
                    )
         | 
| 1944 | 
            +
             | 
| 1899 1945 | 
             
                    return TypesMap(
         | 
| 1900 | 
            -
                        target_type= | 
| 1946 | 
            +
                        target_type=target_type,
         | 
| 1901 1947 | 
             
                        current_type=model.current_type,
         | 
| 1902 1948 | 
             
                        condition=model.condition if model.condition is not None else "True",
         | 
| 1903 1949 | 
             
                    )
         | 
| @@ -295,28 +295,58 @@ class SubstreamPartitionRouter(PartitionRouter): | |
| 295 295 | 
             
                        return
         | 
| 296 296 |  | 
| 297 297 | 
             
                    if not parent_state and incremental_dependency:
         | 
| 298 | 
            -
                        #  | 
| 299 | 
            -
                         | 
| 300 | 
            -
                        substream_state = substream_state_values[0] if substream_state_values else {}
         | 
| 301 | 
            -
                        # Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
         | 
| 302 | 
            -
                        if isinstance(substream_state, (list, dict)):
         | 
| 303 | 
            -
                            substream_state = {}
         | 
| 304 | 
            -
             | 
| 305 | 
            -
                        parent_state = {}
         | 
| 306 | 
            -
             | 
| 307 | 
            -
                        # Copy child state to parent streams with incremental dependencies
         | 
| 308 | 
            -
                        if substream_state:
         | 
| 309 | 
            -
                            for parent_config in self.parent_stream_configs:
         | 
| 310 | 
            -
                                if parent_config.incremental_dependency:
         | 
| 311 | 
            -
                                    parent_state[parent_config.stream.name] = {
         | 
| 312 | 
            -
                                        parent_config.stream.cursor_field: substream_state
         | 
| 313 | 
            -
                                    }
         | 
| 298 | 
            +
                        # Migrate child state to parent state format
         | 
| 299 | 
            +
                        parent_state = self._migrate_child_state_to_parent_state(stream_state)
         | 
| 314 300 |  | 
| 315 301 | 
             
                    # Set state for each parent stream with an incremental dependency
         | 
| 316 302 | 
             
                    for parent_config in self.parent_stream_configs:
         | 
| 317 303 | 
             
                        if parent_config.incremental_dependency:
         | 
| 318 304 | 
             
                            parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
         | 
| 319 305 |  | 
| 306 | 
            +
                def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
         | 
| 307 | 
            +
                    """
         | 
| 308 | 
            +
                    Migrate the child stream state to the parent stream's state format.
         | 
| 309 | 
            +
             | 
| 310 | 
            +
                    This method converts the global or child state into a format compatible with parent
         | 
| 311 | 
            +
                    streams. The migration occurs only for parent streams with incremental dependencies.
         | 
| 312 | 
            +
                    The method filters out per-partition states and retains only the global state in the
         | 
| 313 | 
            +
                    format `{cursor_field: cursor_value}`.
         | 
| 314 | 
            +
             | 
| 315 | 
            +
                    Args:
         | 
| 316 | 
            +
                        stream_state (StreamState): The state to migrate. Expected formats include:
         | 
| 317 | 
            +
                            - {"updated_at": "2023-05-27T00:00:00Z"}
         | 
| 318 | 
            +
                            - {"states": [...] } (ignored during migration)
         | 
| 319 | 
            +
             | 
| 320 | 
            +
                    Returns:
         | 
| 321 | 
            +
                        StreamState: A migrated state for parent streams in the format:
         | 
| 322 | 
            +
                            {
         | 
| 323 | 
            +
                                "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
         | 
| 324 | 
            +
                            }
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                    Example:
         | 
| 327 | 
            +
                        Input: {"updated_at": "2023-05-27T00:00:00Z"}
         | 
| 328 | 
            +
                        Output: {
         | 
| 329 | 
            +
                            "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
         | 
| 330 | 
            +
                        }
         | 
| 331 | 
            +
                    """
         | 
| 332 | 
            +
                    substream_state_values = list(stream_state.values())
         | 
| 333 | 
            +
                    substream_state = substream_state_values[0] if substream_state_values else {}
         | 
| 334 | 
            +
             | 
| 335 | 
            +
                    # Ignore per-partition states or invalid formats
         | 
| 336 | 
            +
                    if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
         | 
| 337 | 
            +
                        return {}
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                    # Copy child state to parent streams with incremental dependencies
         | 
| 340 | 
            +
                    parent_state = {}
         | 
| 341 | 
            +
                    if substream_state:
         | 
| 342 | 
            +
                        for parent_config in self.parent_stream_configs:
         | 
| 343 | 
            +
                            if parent_config.incremental_dependency:
         | 
| 344 | 
            +
                                parent_state[parent_config.stream.name] = {
         | 
| 345 | 
            +
                                    parent_config.stream.cursor_field: substream_state
         | 
| 346 | 
            +
                                }
         | 
| 347 | 
            +
             | 
| 348 | 
            +
                    return parent_state
         | 
| 349 | 
            +
             | 
| 320 350 | 
             
                def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
         | 
| 321 351 | 
             
                    """
         | 
| 322 352 | 
             
                    Get the state of the parent streams.
         | 
| @@ -75,7 +75,7 @@ class AsyncRetriever(Retriever): | |
| 75 75 | 
             
                    """
         | 
| 76 76 | 
             
                    if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
         | 
| 77 77 | 
             
                        raise AirbyteTracedException(
         | 
| 78 | 
            -
                            message="Invalid arguments to  | 
| 78 | 
            +
                            message="Invalid arguments to AsyncRetriever.read_records: stream_slice is not optional. Please contact Airbyte Support",
         | 
| 79 79 | 
             
                            failure_type=FailureType.system_error,
         | 
| 80 80 | 
             
                        )
         | 
| 81 81 | 
             
                    return stream_slice["partition"]  # type: ignore  # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
         | 
| @@ -4,6 +4,7 @@ | |
| 4 4 |  | 
| 5 5 | 
             
            from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
         | 
| 6 6 | 
             
            from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
         | 
| 7 | 
            +
                ComplexFieldType,
         | 
| 7 8 | 
             
                DynamicSchemaLoader,
         | 
| 8 9 | 
             
                SchemaTypeIdentifier,
         | 
| 9 10 | 
             
                TypesMap,
         | 
| @@ -18,6 +19,7 @@ __all__ = [ | |
| 18 19 | 
             
                "SchemaLoader",
         | 
| 19 20 | 
             
                "InlineSchemaLoader",
         | 
| 20 21 | 
             
                "DynamicSchemaLoader",
         | 
| 22 | 
            +
                "ComplexFieldType",
         | 
| 21 23 | 
             
                "TypesMap",
         | 
| 22 24 | 
             
                "SchemaTypeIdentifier",
         | 
| 23 25 | 
             
            ]
         | 
| @@ -18,7 +18,7 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation | |
| 18 18 | 
             
            from airbyte_cdk.sources.source import ExperimentalClassWarning
         | 
| 19 19 | 
             
            from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
         | 
| 20 20 |  | 
| 21 | 
            -
            AIRBYTE_DATA_TYPES: Mapping[str,  | 
| 21 | 
            +
            AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
         | 
| 22 22 | 
             
                "string": {"type": ["null", "string"]},
         | 
| 23 23 | 
             
                "boolean": {"type": ["null", "boolean"]},
         | 
| 24 24 | 
             
                "date": {"type": ["null", "string"], "format": "date"},
         | 
| @@ -45,6 +45,25 @@ AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = { | |
| 45 45 | 
             
            }
         | 
| 46 46 |  | 
| 47 47 |  | 
| 48 | 
            +
            @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
         | 
| 49 | 
            +
            @dataclass(frozen=True)
         | 
| 50 | 
            +
            class ComplexFieldType:
         | 
| 51 | 
            +
                """
         | 
| 52 | 
            +
                Identifies complex field type
         | 
| 53 | 
            +
                """
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                field_type: str
         | 
| 56 | 
            +
                items: Optional[Union[str, "ComplexFieldType"]] = None
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def __post_init__(self) -> None:
         | 
| 59 | 
            +
                    """
         | 
| 60 | 
            +
                    Enforces that `items` is only used when `field_type` is a array
         | 
| 61 | 
            +
                    """
         | 
| 62 | 
            +
                    # `items_type` is valid only for array target types
         | 
| 63 | 
            +
                    if self.items and self.field_type != "array":
         | 
| 64 | 
            +
                        raise ValueError("'items' can only be used when 'field_type' is an array.")
         | 
| 65 | 
            +
             | 
| 66 | 
            +
             | 
| 48 67 | 
             
            @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
         | 
| 49 68 | 
             
            @dataclass(frozen=True)
         | 
| 50 69 | 
             
            class TypesMap:
         | 
| @@ -52,7 +71,7 @@ class TypesMap: | |
| 52 71 | 
             
                Represents a mapping between a current type and its corresponding target type.
         | 
| 53 72 | 
             
                """
         | 
| 54 73 |  | 
| 55 | 
            -
                target_type: Union[List[str], str]
         | 
| 74 | 
            +
                target_type: Union[List[str], str, ComplexFieldType]
         | 
| 56 75 | 
             
                current_type: Union[List[str], str]
         | 
| 57 76 | 
             
                condition: Optional[str]
         | 
| 58 77 |  | 
| @@ -135,8 +154,9 @@ class DynamicSchemaLoader(SchemaLoader): | |
| 135 154 | 
             
                    transformed_properties = self._transform(properties, {})
         | 
| 136 155 |  | 
| 137 156 | 
             
                    return {
         | 
| 138 | 
            -
                        "$schema": " | 
| 157 | 
            +
                        "$schema": "https://json-schema.org/draft-07/schema#",
         | 
| 139 158 | 
             
                        "type": "object",
         | 
| 159 | 
            +
                        "additionalProperties": True,
         | 
| 140 160 | 
             
                        "properties": transformed_properties,
         | 
| 141 161 | 
             
                    }
         | 
| 142 162 |  | 
| @@ -188,18 +208,36 @@ class DynamicSchemaLoader(SchemaLoader): | |
| 188 208 | 
             
                        first_type = self._get_airbyte_type(mapped_field_type[0])
         | 
| 189 209 | 
             
                        second_type = self._get_airbyte_type(mapped_field_type[1])
         | 
| 190 210 | 
             
                        return {"oneOf": [first_type, second_type]}
         | 
| 211 | 
            +
             | 
| 191 212 | 
             
                    elif isinstance(mapped_field_type, str):
         | 
| 192 213 | 
             
                        return self._get_airbyte_type(mapped_field_type)
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                    elif isinstance(mapped_field_type, ComplexFieldType):
         | 
| 216 | 
            +
                        return self._resolve_complex_type(mapped_field_type)
         | 
| 217 | 
            +
             | 
| 193 218 | 
             
                    else:
         | 
| 194 219 | 
             
                        raise ValueError(
         | 
| 195 220 | 
             
                            f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
         | 
| 196 221 | 
             
                        )
         | 
| 197 222 |  | 
| 223 | 
            +
                def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
         | 
| 224 | 
            +
                    if not complex_type.items:
         | 
| 225 | 
            +
                        return self._get_airbyte_type(complex_type.field_type)
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                    field_type = self._get_airbyte_type(complex_type.field_type)
         | 
| 228 | 
            +
                    field_type["items"] = (
         | 
| 229 | 
            +
                        self._get_airbyte_type(complex_type.items)
         | 
| 230 | 
            +
                        if isinstance(complex_type.items, str)
         | 
| 231 | 
            +
                        else self._resolve_complex_type(complex_type.items)
         | 
| 232 | 
            +
                    )
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                    return field_type
         | 
| 235 | 
            +
             | 
| 198 236 | 
             
                def _replace_type_if_not_valid(
         | 
| 199 237 | 
             
                    self,
         | 
| 200 238 | 
             
                    field_type: Union[List[str], str],
         | 
| 201 239 | 
             
                    raw_schema: MutableMapping[str, Any],
         | 
| 202 | 
            -
                ) -> Union[List[str], str]:
         | 
| 240 | 
            +
                ) -> Union[List[str], str, ComplexFieldType]:
         | 
| 203 241 | 
             
                    """
         | 
| 204 242 | 
             
                    Replaces a field type if it matches a type mapping in `types_map`.
         | 
| 205 243 | 
             
                    """
         | 
| @@ -216,7 +254,7 @@ class DynamicSchemaLoader(SchemaLoader): | |
| 216 254 | 
             
                    return field_type
         | 
| 217 255 |  | 
| 218 256 | 
             
                @staticmethod
         | 
| 219 | 
            -
                def _get_airbyte_type(field_type: str) ->  | 
| 257 | 
            +
                def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
         | 
| 220 258 | 
             
                    """
         | 
| 221 259 | 
             
                    Maps a field type to its corresponding Airbyte type definition.
         | 
| 222 260 | 
             
                    """
         |