airbyte-cdk 0.61.2__py3-none-any.whl → 0.62.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/abstract_source.py +14 -33
- airbyte_cdk/sources/connector_state_manager.py +16 -4
- airbyte_cdk/sources/file_based/file_based_source.py +87 -35
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +3 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +15 -13
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -0
- airbyte_cdk/sources/file_based/stream/concurrent/{cursor.py → cursor/abstract_concurrent_file_based_cursor.py} +22 -44
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +279 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_noop_cursor.py +56 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +11 -2
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response.py +9 -1
- airbyte_cdk/utils/traced_exception.py +1 -16
- {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/RECORD +33 -26
- unit_tests/sources/file_based/helpers.py +5 -0
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +2860 -0
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +11 -0
- unit_tests/sources/file_based/scenarios/scenario_builder.py +6 -2
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +365 -0
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +462 -0
- unit_tests/sources/file_based/test_file_based_scenarios.py +45 -0
- unit_tests/sources/file_based/test_scenarios.py +16 -8
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +13 -2
- unit_tests/sources/test_abstract_source.py +36 -170
- unit_tests/sources/test_connector_state_manager.py +20 -13
- unit_tests/sources/test_integration_source.py +8 -25
- unit_tests/sources/test_source_read.py +1 -1
- unit_tests/test/mock_http/test_mocker.py +3 -1
- {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/top_level.txt +0 -0
| @@ -15,7 +15,6 @@ from airbyte_cdk.models import ( | |
| 15 15 | 
             
                ConfiguredAirbyteCatalog,
         | 
| 16 16 | 
             
                ConfiguredAirbyteStream,
         | 
| 17 17 | 
             
                Status,
         | 
| 18 | 
            -
                StreamDescriptor,
         | 
| 19 18 | 
             
                SyncMode,
         | 
| 20 19 | 
             
            )
         | 
| 21 20 | 
             
            from airbyte_cdk.models import Type as MessageType
         | 
| @@ -28,7 +27,6 @@ from airbyte_cdk.sources.streams.http.http import HttpStream | |
| 28 27 | 
             
            from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
         | 
| 29 28 | 
             
            from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
         | 
| 30 29 | 
             
            from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
         | 
| 31 | 
            -
            from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
         | 
| 32 30 | 
             
            from airbyte_cdk.utils.event_timing import create_timer
         | 
| 33 31 | 
             
            from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
         | 
| 34 32 | 
             
            from airbyte_cdk.utils.traced_exception import AirbyteTracedException
         | 
| @@ -101,7 +99,7 @@ class AbstractSource(Source, ABC): | |
| 101 99 | 
             
                    # TODO assert all streams exist in the connector
         | 
| 102 100 | 
             
                    # get the streams once in case the connector needs to make any queries to generate them
         | 
| 103 101 | 
             
                    stream_instances = {s.name: s for s in self.streams(config)}
         | 
| 104 | 
            -
                    state_manager = ConnectorStateManager(stream_instance_map= | 
| 102 | 
            +
                    state_manager = ConnectorStateManager(stream_instance_map={s.stream.name: s.stream for s in catalog.streams}, state=state)
         | 
| 105 103 | 
             
                    self._stream_to_instance_map = stream_instances
         | 
| 106 104 |  | 
| 107 105 | 
             
                    stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
         | 
| @@ -135,16 +133,11 @@ class AbstractSource(Source, ABC): | |
| 135 133 | 
             
                                logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
         | 
| 136 134 | 
             
                                yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
         | 
| 137 135 | 
             
                            except AirbyteTracedException as e:
         | 
| 138 | 
            -
                                logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
         | 
| 139 | 
            -
                                logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
         | 
| 140 136 | 
             
                                yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
         | 
| 141 | 
            -
                                 | 
| 142 | 
            -
             | 
| 143 | 
            -
                                 | 
| 144 | 
            -
                                     | 
| 145 | 
            -
                                        f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
         | 
| 146 | 
            -
                                    )
         | 
| 147 | 
            -
                                    break
         | 
| 137 | 
            +
                                if self.continue_sync_on_stream_failure:
         | 
| 138 | 
            +
                                    stream_name_to_exception[stream_instance.name] = e
         | 
| 139 | 
            +
                                else:
         | 
| 140 | 
            +
                                    raise e
         | 
| 148 141 | 
             
                            except Exception as e:
         | 
| 149 142 | 
             
                                yield from self._emit_queued_messages()
         | 
| 150 143 | 
             
                                logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
         | 
| @@ -152,27 +145,15 @@ class AbstractSource(Source, ABC): | |
| 152 145 | 
             
                                yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
         | 
| 153 146 | 
             
                                display_message = stream_instance.get_error_display_message(e)
         | 
| 154 147 | 
             
                                if display_message:
         | 
| 155 | 
            -
                                     | 
| 156 | 
            -
                                 | 
| 157 | 
            -
                                    traced_exception = AirbyteTracedException.from_exception(e)
         | 
| 158 | 
            -
                                yield traced_exception.as_sanitized_airbyte_message(
         | 
| 159 | 
            -
                                    stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
         | 
| 160 | 
            -
                                )
         | 
| 161 | 
            -
                                stream_name_to_exception[stream_instance.name] = traced_exception
         | 
| 162 | 
            -
                                if self.stop_sync_on_stream_failure:
         | 
| 163 | 
            -
                                    logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
         | 
| 164 | 
            -
                                    break
         | 
| 148 | 
            +
                                    raise AirbyteTracedException.from_exception(e, message=display_message) from e
         | 
| 149 | 
            +
                                raise e
         | 
| 165 150 | 
             
                            finally:
         | 
| 166 151 | 
             
                                timer.finish_event()
         | 
| 167 152 | 
             
                                logger.info(f"Finished syncing {configured_stream.stream.name}")
         | 
| 168 153 | 
             
                                logger.info(timer.report())
         | 
| 169 154 |  | 
| 170 | 
            -
                    if len(stream_name_to_exception) > 0:
         | 
| 171 | 
            -
                         | 
| 172 | 
            -
                        logger.info(error_message)
         | 
| 173 | 
            -
                        # We still raise at least one exception when a stream raises an exception because the platform
         | 
| 174 | 
            -
                        # currently relies on a non-zero exit code to determine if a sync attempt has failed
         | 
| 175 | 
            -
                        raise AirbyteTracedException(message=error_message)
         | 
| 155 | 
            +
                    if self.continue_sync_on_stream_failure and len(stream_name_to_exception) > 0:
         | 
| 156 | 
            +
                        raise AirbyteTracedException(message=self._generate_failed_streams_error_message(stream_name_to_exception))
         | 
| 176 157 | 
             
                    logger.info(f"Finished syncing {self.name}")
         | 
| 177 158 |  | 
| 178 159 | 
             
                @property
         | 
| @@ -301,17 +282,17 @@ class AbstractSource(Source, ABC): | |
| 301 282 | 
             
                    return _default_message_repository
         | 
| 302 283 |  | 
| 303 284 | 
             
                @property
         | 
| 304 | 
            -
                def  | 
| 285 | 
            +
                def continue_sync_on_stream_failure(self) -> bool:
         | 
| 305 286 | 
             
                    """
         | 
| 306 287 | 
             
                    WARNING: This function is in-development which means it is subject to change. Use at your own risk.
         | 
| 307 288 |  | 
| 308 | 
            -
                    By default,  | 
| 309 | 
            -
                     | 
| 310 | 
            -
                     | 
| 289 | 
            +
                    By default, a source should raise an exception and stop the sync when it encounters an error while syncing a stream. This
         | 
| 290 | 
            +
                    method can be overridden on a per-source basis so that a source will continue syncing streams other streams even if an
         | 
| 291 | 
            +
                    exception is raised for a stream.
         | 
| 311 292 | 
             
                    """
         | 
| 312 293 | 
             
                    return False
         | 
| 313 294 |  | 
| 314 295 | 
             
                @staticmethod
         | 
| 315 296 | 
             
                def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
         | 
| 316 | 
            -
                    failures = ", ".join([f"{stream}: { | 
| 297 | 
            +
                    failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
         | 
| 317 298 | 
             
                    return f"During the sync, the following streams did not sync successfully: {failures}"
         | 
| @@ -5,7 +5,15 @@ | |
| 5 5 | 
             
            import copy
         | 
| 6 6 | 
             
            from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
         | 
| 7 7 |  | 
| 8 | 
            -
            from airbyte_cdk.models import  | 
| 8 | 
            +
            from airbyte_cdk.models import (
         | 
| 9 | 
            +
                AirbyteMessage,
         | 
| 10 | 
            +
                AirbyteStateBlob,
         | 
| 11 | 
            +
                AirbyteStateMessage,
         | 
| 12 | 
            +
                AirbyteStateType,
         | 
| 13 | 
            +
                AirbyteStream,
         | 
| 14 | 
            +
                AirbyteStreamState,
         | 
| 15 | 
            +
                StreamDescriptor,
         | 
| 16 | 
            +
            )
         | 
| 9 17 | 
             
            from airbyte_cdk.models import Type as MessageType
         | 
| 10 18 | 
             
            from airbyte_cdk.sources.streams import Stream
         | 
| 11 19 | 
             
            from pydantic import Extra
         | 
| @@ -29,7 +37,9 @@ class ConnectorStateManager: | |
| 29 37 | 
             
                """
         | 
| 30 38 |  | 
| 31 39 | 
             
                def __init__(
         | 
| 32 | 
            -
                    self, | 
| 40 | 
            +
                    self,
         | 
| 41 | 
            +
                    stream_instance_map: Mapping[str, Union[Stream, AirbyteStream]],
         | 
| 42 | 
            +
                    state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
         | 
| 33 43 | 
             
                ):
         | 
| 34 44 | 
             
                    shared_state, per_stream_states = self._extract_from_state_message(state, stream_instance_map)
         | 
| 35 45 |  | 
| @@ -97,7 +107,9 @@ class ConnectorStateManager: | |
| 97 107 |  | 
| 98 108 | 
             
                @classmethod
         | 
| 99 109 | 
             
                def _extract_from_state_message(
         | 
| 100 | 
            -
                    cls, | 
| 110 | 
            +
                    cls,
         | 
| 111 | 
            +
                    state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]],
         | 
| 112 | 
            +
                    stream_instance_map: Mapping[str, Union[Stream, AirbyteStream]],
         | 
| 101 113 | 
             
                ) -> Tuple[Optional[AirbyteStateBlob], MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]]]:
         | 
| 102 114 | 
             
                    """
         | 
| 103 115 | 
             
                    Takes an incoming list of state messages or the legacy state format and extracts state attributes according to type
         | 
| @@ -149,7 +161,7 @@ class ConnectorStateManager: | |
| 149 161 |  | 
| 150 162 | 
             
                @staticmethod
         | 
| 151 163 | 
             
                def _create_descriptor_to_stream_state_mapping(
         | 
| 152 | 
            -
                    state: MutableMapping[str, Any], stream_to_instance_map: Mapping[str, Stream]
         | 
| 164 | 
            +
                    state: MutableMapping[str, Any], stream_to_instance_map: Mapping[str, Union[Stream, AirbyteStream]]
         | 
| 153 165 | 
             
                ) -> MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]]:
         | 
| 154 166 | 
             
                    """
         | 
| 155 167 | 
             
                    Takes incoming state received in the legacy format and transforms it into a mapping of StreamDescriptor to AirbyteStreamState
         | 
| @@ -12,6 +12,7 @@ from airbyte_cdk.logger import AirbyteLogFormatter, init_logger | |
| 12 12 | 
             
            from airbyte_cdk.models import (
         | 
| 13 13 | 
             
                AirbyteMessage,
         | 
| 14 14 | 
             
                AirbyteStateMessage,
         | 
| 15 | 
            +
                AirbyteStream,
         | 
| 15 16 | 
             
                ConfiguredAirbyteCatalog,
         | 
| 16 17 | 
             
                ConnectorSpecification,
         | 
| 17 18 | 
             
                FailureType,
         | 
| @@ -20,6 +21,7 @@ from airbyte_cdk.models import ( | |
| 20 21 | 
             
            )
         | 
| 21 22 | 
             
            from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
         | 
| 22 23 | 
             
            from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
         | 
| 24 | 
            +
            from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
         | 
| 23 25 | 
             
            from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
         | 
| 24 26 | 
             
            from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
         | 
| 25 27 | 
             
            from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, ValidationPolicy
         | 
| @@ -31,12 +33,15 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP | |
| 31 33 | 
             
            from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
         | 
| 32 34 | 
             
            from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
         | 
| 33 35 | 
             
            from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
         | 
| 34 | 
            -
            from airbyte_cdk.sources.file_based.stream.concurrent.cursor import  | 
| 36 | 
            +
            from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
         | 
| 37 | 
            +
                AbstractConcurrentFileBasedCursor,
         | 
| 38 | 
            +
                FileBasedConcurrentCursor,
         | 
| 39 | 
            +
                FileBasedNoopCursor,
         | 
| 40 | 
            +
            )
         | 
| 35 41 | 
             
            from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
         | 
| 36 | 
            -
            from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
         | 
| 37 42 | 
             
            from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
         | 
| 38 | 
            -
            from airbyte_cdk.sources.source import TState
         | 
| 39 43 | 
             
            from airbyte_cdk.sources.streams import Stream
         | 
| 44 | 
            +
            from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
         | 
| 40 45 | 
             
            from airbyte_cdk.utils.analytics_message import create_analytics_message
         | 
| 41 46 | 
             
            from airbyte_cdk.utils.traced_exception import AirbyteTracedException
         | 
| 42 47 | 
             
            from pydantic.error_wrappers import ValidationError
         | 
| @@ -56,12 +61,12 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 56 61 | 
             
                    spec_class: Type[AbstractFileBasedSpec],
         | 
| 57 62 | 
             
                    catalog: Optional[ConfiguredAirbyteCatalog],
         | 
| 58 63 | 
             
                    config: Optional[Mapping[str, Any]],
         | 
| 59 | 
            -
                    state: Optional[ | 
| 64 | 
            +
                    state: Optional[MutableMapping[str, Any]],
         | 
| 60 65 | 
             
                    availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
         | 
| 61 66 | 
             
                    discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
         | 
| 62 67 | 
             
                    parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
         | 
| 63 68 | 
             
                    validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
         | 
| 64 | 
            -
                    cursor_cls: Type[AbstractFileBasedCursor] =  | 
| 69 | 
            +
                    cursor_cls: Type[Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor]] = FileBasedConcurrentCursor,
         | 
| 65 70 | 
             
                ):
         | 
| 66 71 | 
             
                    self.stream_reader = stream_reader
         | 
| 67 72 | 
             
                    self.spec_class = spec_class
         | 
| @@ -137,52 +142,99 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 137 142 | 
             
                    """
         | 
| 138 143 | 
             
                    Return a list of this source's streams.
         | 
| 139 144 | 
             
                    """
         | 
| 140 | 
            -
                    file_based_streams = self._get_file_based_streams(config)
         | 
| 141 | 
            -
             | 
| 142 | 
            -
                    configured_streams: List[Stream] = []
         | 
| 143 | 
            -
             | 
| 144 | 
            -
                    for stream in file_based_streams:
         | 
| 145 | 
            -
                        sync_mode = self._get_sync_mode_from_catalog(stream)
         | 
| 146 | 
            -
                        if sync_mode == SyncMode.full_refresh and hasattr(self, "_concurrency_level") and self._concurrency_level is not None:
         | 
| 147 | 
            -
                            configured_streams.append(
         | 
| 148 | 
            -
                                FileBasedStreamFacade.create_from_stream(stream, self, self.logger, None, FileBasedNoopCursor(stream.config))
         | 
| 149 | 
            -
                            )
         | 
| 150 | 
            -
                        else:
         | 
| 151 | 
            -
                            configured_streams.append(stream)
         | 
| 152 145 |  | 
| 153 | 
            -
                     | 
| 146 | 
            +
                    if self.catalog:
         | 
| 147 | 
            +
                        state_manager = ConnectorStateManager(
         | 
| 148 | 
            +
                            stream_instance_map={s.stream.name: s.stream for s in self.catalog.streams},
         | 
| 149 | 
            +
                            state=self.state,
         | 
| 150 | 
            +
                        )
         | 
| 151 | 
            +
                    else:
         | 
| 152 | 
            +
                        # During `check` operations we don't have a catalog so cannot create a state manager.
         | 
| 153 | 
            +
                        # Since the state manager is only required for incremental syncs, this is fine.
         | 
| 154 | 
            +
                        state_manager = None
         | 
| 154 155 |  | 
| 155 | 
            -
                def _get_file_based_streams(self, config: Mapping[str, Any]) -> List[AbstractFileBasedStream]:
         | 
| 156 156 | 
             
                    try:
         | 
| 157 157 | 
             
                        parsed_config = self._get_parsed_config(config)
         | 
| 158 158 | 
             
                        self.stream_reader.config = parsed_config
         | 
| 159 | 
            -
                        streams: List[ | 
| 159 | 
            +
                        streams: List[Stream] = []
         | 
| 160 160 | 
             
                        for stream_config in parsed_config.streams:
         | 
| 161 | 
            +
                            # Like state_manager, `catalog_stream` may be None during `check`
         | 
| 162 | 
            +
                            catalog_stream = self._get_stream_from_catalog(stream_config)
         | 
| 163 | 
            +
                            stream_state = (
         | 
| 164 | 
            +
                                state_manager.get_stream_state(catalog_stream.name, catalog_stream.namespace)
         | 
| 165 | 
            +
                                if (state_manager and catalog_stream)
         | 
| 166 | 
            +
                                else None
         | 
| 167 | 
            +
                            )
         | 
| 161 168 | 
             
                            self._validate_input_schema(stream_config)
         | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 168 | 
            -
                                     | 
| 169 | 
            -
                                    parsers=self.parsers,
         | 
| 170 | 
            -
                                    validation_policy=self._validate_and_get_validation_policy(stream_config),
         | 
| 171 | 
            -
                                    cursor=self.cursor_cls(stream_config),
         | 
| 172 | 
            -
                                    errors_collector=self.errors_collector,
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                            sync_mode = self._get_sync_mode_from_catalog(stream_config.name)
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                            if sync_mode == SyncMode.full_refresh and hasattr(self, "_concurrency_level") and self._concurrency_level is not None:
         | 
| 173 | 
            +
                                cursor = FileBasedNoopCursor(stream_config)
         | 
| 174 | 
            +
                                stream = FileBasedStreamFacade.create_from_stream(
         | 
| 175 | 
            +
                                    self._make_default_stream(stream_config, cursor), self, self.logger, stream_state, cursor
         | 
| 173 176 | 
             
                                )
         | 
| 174 | 
            -
             | 
| 177 | 
            +
             | 
| 178 | 
            +
                            elif (
         | 
| 179 | 
            +
                                sync_mode == SyncMode.incremental
         | 
| 180 | 
            +
                                and issubclass(self.cursor_cls, AbstractConcurrentFileBasedCursor)
         | 
| 181 | 
            +
                                and hasattr(self, "_concurrency_level")
         | 
| 182 | 
            +
                                and self._concurrency_level is not None
         | 
| 183 | 
            +
                            ):
         | 
| 184 | 
            +
                                assert (
         | 
| 185 | 
            +
                                    state_manager is not None
         | 
| 186 | 
            +
                                ), "No ConnectorStateManager was created, but it is required for incremental syncs. This is unexpected. Please contact Support."
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                                cursor = self.cursor_cls(
         | 
| 189 | 
            +
                                    stream_config,
         | 
| 190 | 
            +
                                    stream_config.name,
         | 
| 191 | 
            +
                                    None,
         | 
| 192 | 
            +
                                    stream_state,
         | 
| 193 | 
            +
                                    self.message_repository,
         | 
| 194 | 
            +
                                    state_manager,
         | 
| 195 | 
            +
                                    CursorField(DefaultFileBasedStream.ab_last_mod_col),
         | 
| 196 | 
            +
                                )
         | 
| 197 | 
            +
                                stream = FileBasedStreamFacade.create_from_stream(
         | 
| 198 | 
            +
                                    self._make_default_stream(stream_config, cursor), self, self.logger, stream_state, cursor
         | 
| 199 | 
            +
                                )
         | 
| 200 | 
            +
                            else:
         | 
| 201 | 
            +
                                cursor = self.cursor_cls(stream_config)
         | 
| 202 | 
            +
                                stream = self._make_default_stream(stream_config, cursor)
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                            streams.append(stream)
         | 
| 175 205 | 
             
                        return streams
         | 
| 176 206 |  | 
| 177 207 | 
             
                    except ValidationError as exc:
         | 
| 178 208 | 
             
                        raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR) from exc
         | 
| 179 209 |  | 
| 180 | 
            -
                def  | 
| 210 | 
            +
                def _make_default_stream(
         | 
| 211 | 
            +
                    self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
         | 
| 212 | 
            +
                ) -> AbstractFileBasedStream:
         | 
| 213 | 
            +
                    return DefaultFileBasedStream(
         | 
| 214 | 
            +
                        config=stream_config,
         | 
| 215 | 
            +
                        catalog_schema=self.stream_schemas.get(stream_config.name),
         | 
| 216 | 
            +
                        stream_reader=self.stream_reader,
         | 
| 217 | 
            +
                        availability_strategy=self.availability_strategy,
         | 
| 218 | 
            +
                        discovery_policy=self.discovery_policy,
         | 
| 219 | 
            +
                        parsers=self.parsers,
         | 
| 220 | 
            +
                        validation_policy=self._validate_and_get_validation_policy(stream_config),
         | 
| 221 | 
            +
                        errors_collector=self.errors_collector,
         | 
| 222 | 
            +
                        cursor=cursor,
         | 
| 223 | 
            +
                    )
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                def _get_stream_from_catalog(self, stream_config: FileBasedStreamConfig) -> Optional[AirbyteStream]:
         | 
| 226 | 
            +
                    if self.catalog:
         | 
| 227 | 
            +
                        for stream in self.catalog.streams or []:
         | 
| 228 | 
            +
                            if stream.stream.name == stream_config.name:
         | 
| 229 | 
            +
                                return stream.stream
         | 
| 230 | 
            +
                    return None
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                def _get_sync_mode_from_catalog(self, stream_name: str) -> Optional[SyncMode]:
         | 
| 181 233 | 
             
                    if self.catalog:
         | 
| 182 234 | 
             
                        for catalog_stream in self.catalog.streams:
         | 
| 183 | 
            -
                            if  | 
| 235 | 
            +
                            if stream_name == catalog_stream.stream.name:
         | 
| 184 236 | 
             
                                return catalog_stream.sync_mode
         | 
| 185 | 
            -
                        self.logger.warning(f"No sync mode was found for { | 
| 237 | 
            +
                        self.logger.warning(f"No sync mode was found for {stream_name}.")
         | 
| 186 238 | 
             
                    return None
         | 
| 187 239 |  | 
| 188 240 | 
             
                def read(
         | 
| @@ -15,6 +15,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFile | |
| 15 15 | 
             
            from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
         | 
| 16 16 | 
             
            from airbyte_cdk.sources.file_based.remote_file import RemoteFile
         | 
| 17 17 | 
             
            from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
         | 
| 18 | 
            +
            from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
         | 
| 18 19 | 
             
            from airbyte_cdk.sources.file_based.types import StreamSlice
         | 
| 19 20 | 
             
            from airbyte_cdk.sources.streams import Stream
         | 
| 20 21 |  | 
| @@ -45,6 +46,7 @@ class AbstractFileBasedStream(Stream): | |
| 45 46 | 
             
                    parsers: Dict[Type[Any], FileTypeParser],
         | 
| 46 47 | 
             
                    validation_policy: AbstractSchemaValidationPolicy,
         | 
| 47 48 | 
             
                    errors_collector: FileBasedErrorsCollector,
         | 
| 49 | 
            +
                    cursor: AbstractFileBasedCursor,
         | 
| 48 50 | 
             
                ):
         | 
| 49 51 | 
             
                    super().__init__()
         | 
| 50 52 | 
             
                    self.config = config
         | 
| @@ -55,6 +57,7 @@ class AbstractFileBasedStream(Stream): | |
| 55 57 | 
             
                    self._availability_strategy = availability_strategy
         | 
| 56 58 | 
             
                    self._parsers = parsers
         | 
| 57 59 | 
             
                    self.errors_collector = errors_collector
         | 
| 60 | 
            +
                    self._cursor = cursor
         | 
| 58 61 |  | 
| 59 62 | 
             
                @property
         | 
| 60 63 | 
             
                @abstractmethod
         | 
| @@ -5,7 +5,7 @@ | |
| 5 5 | 
             
            import copy
         | 
| 6 6 | 
             
            import logging
         | 
| 7 7 | 
             
            from functools import lru_cache
         | 
| 8 | 
            -
            from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
         | 
| 8 | 
            +
            from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
         | 
| 9 9 |  | 
| 10 10 | 
             
            from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode, Type
         | 
| 11 11 | 
             
            from airbyte_cdk.sources import AbstractSource
         | 
| @@ -19,6 +19,7 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP | |
| 19 19 | 
             
            from airbyte_cdk.sources.file_based.remote_file import RemoteFile
         | 
| 20 20 | 
             
            from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream
         | 
| 21 21 | 
             
            from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedNoopCursor
         | 
| 22 | 
            +
            from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
         | 
| 22 23 | 
             
            from airbyte_cdk.sources.file_based.types import StreamSlice
         | 
| 23 24 | 
             
            from airbyte_cdk.sources.message import MessageRepository
         | 
| 24 25 | 
             
            from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
         | 
| @@ -33,6 +34,9 @@ from airbyte_cdk.sources.utils.schema_helpers import InternalConfig | |
| 33 34 | 
             
            from airbyte_cdk.sources.utils.slice_logger import SliceLogger
         | 
| 34 35 | 
             
            from deprecated.classic import deprecated
         | 
| 35 36 |  | 
| 37 | 
            +
            if TYPE_CHECKING:
         | 
| 38 | 
            +
                from airbyte_cdk.sources.file_based.stream.concurrent.cursor import AbstractConcurrentFileBasedCursor
         | 
| 39 | 
            +
             | 
| 36 40 | 
             
            """
         | 
| 37 41 | 
             
            This module contains adapters to help enabling concurrency on File-based Stream objects without needing to migrate to AbstractStream
         | 
| 38 42 | 
             
            """
         | 
| @@ -47,13 +51,14 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas | |
| 47 51 | 
             
                    source: AbstractSource,
         | 
| 48 52 | 
             
                    logger: logging.Logger,
         | 
| 49 53 | 
             
                    state: Optional[MutableMapping[str, Any]],
         | 
| 50 | 
            -
                    cursor:  | 
| 54 | 
            +
                    cursor: "AbstractConcurrentFileBasedCursor",
         | 
| 51 55 | 
             
                ) -> "FileBasedStreamFacade":
         | 
| 52 56 | 
             
                    """
         | 
| 53 57 | 
             
                    Create a ConcurrentStream from a FileBasedStream object.
         | 
| 54 58 | 
             
                    """
         | 
| 55 59 | 
             
                    pk = get_primary_key_from_stream(stream.primary_key)
         | 
| 56 60 | 
             
                    cursor_field = get_cursor_field_from_stream(stream)
         | 
| 61 | 
            +
                    stream._cursor = cursor
         | 
| 57 62 |  | 
| 58 63 | 
             
                    if not source.message_repository:
         | 
| 59 64 | 
             
                        raise ValueError(
         | 
| @@ -62,7 +67,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas | |
| 62 67 |  | 
| 63 68 | 
             
                    message_repository = source.message_repository
         | 
| 64 69 | 
             
                    return FileBasedStreamFacade(
         | 
| 65 | 
            -
                        DefaultStream( | 
| 70 | 
            +
                        DefaultStream(
         | 
| 66 71 | 
             
                            partition_generator=FileBasedStreamPartitionGenerator(
         | 
| 67 72 | 
             
                                stream,
         | 
| 68 73 | 
             
                                message_repository,
         | 
| @@ -90,14 +95,13 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas | |
| 90 95 | 
             
                    self,
         | 
| 91 96 | 
             
                    stream: DefaultStream,
         | 
| 92 97 | 
             
                    legacy_stream: AbstractFileBasedStream,
         | 
| 93 | 
            -
                    cursor:  | 
| 98 | 
            +
                    cursor: AbstractFileBasedCursor,
         | 
| 94 99 | 
             
                    slice_logger: SliceLogger,
         | 
| 95 100 | 
             
                    logger: logging.Logger,
         | 
| 96 101 | 
             
                ):
         | 
| 97 102 | 
             
                    """
         | 
| 98 103 | 
             
                    :param stream: The underlying AbstractStream
         | 
| 99 104 | 
             
                    """
         | 
| 100 | 
            -
                    # super().__init__(stream, legacy_stream, cursor, slice_logger, logger)
         | 
| 101 105 | 
             
                    self._abstract_stream = stream
         | 
| 102 106 | 
             
                    self._legacy_stream = legacy_stream
         | 
| 103 107 | 
             
                    self._cursor = cursor
         | 
| @@ -216,7 +220,7 @@ class FileBasedStreamPartition(Partition): | |
| 216 220 | 
             
                    sync_mode: SyncMode,
         | 
| 217 221 | 
             
                    cursor_field: Optional[List[str]],
         | 
| 218 222 | 
             
                    state: Optional[MutableMapping[str, Any]],
         | 
| 219 | 
            -
                    cursor:  | 
| 223 | 
            +
                    cursor: "AbstractConcurrentFileBasedCursor",
         | 
| 220 224 | 
             
                ):
         | 
| 221 225 | 
             
                    self._stream = stream
         | 
| 222 226 | 
             
                    self._slice = _slice
         | 
| @@ -292,7 +296,7 @@ class FileBasedStreamPartitionGenerator(PartitionGenerator): | |
| 292 296 | 
             
                    sync_mode: SyncMode,
         | 
| 293 297 | 
             
                    cursor_field: Optional[List[str]],
         | 
| 294 298 | 
             
                    state: Optional[MutableMapping[str, Any]],
         | 
| 295 | 
            -
                    cursor:  | 
| 299 | 
            +
                    cursor: "AbstractConcurrentFileBasedCursor",
         | 
| 296 300 | 
             
                ):
         | 
| 297 301 | 
             
                    self._stream = stream
         | 
| 298 302 | 
             
                    self._message_repository = message_repository
         | 
| @@ -305,19 +309,17 @@ class FileBasedStreamPartitionGenerator(PartitionGenerator): | |
| 305 309 | 
             
                    pending_partitions = []
         | 
| 306 310 | 
             
                    for _slice in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
         | 
| 307 311 | 
             
                        if _slice is not None:
         | 
| 308 | 
            -
                             | 
| 309 | 
            -
                                 | 
| 312 | 
            +
                            for file in _slice.get("files", []):
         | 
| 313 | 
            +
                                pending_partitions.append(
         | 
| 310 314 | 
             
                                    FileBasedStreamPartition(
         | 
| 311 315 | 
             
                                        self._stream,
         | 
| 312 | 
            -
                                        {"files": [copy.deepcopy( | 
| 316 | 
            +
                                        {"files": [copy.deepcopy(file)]},
         | 
| 313 317 | 
             
                                        self._message_repository,
         | 
| 314 318 | 
             
                                        self._sync_mode,
         | 
| 315 319 | 
             
                                        self._cursor_field,
         | 
| 316 320 | 
             
                                        self._state,
         | 
| 317 321 | 
             
                                        self._cursor,
         | 
| 318 322 | 
             
                                    )
         | 
| 319 | 
            -
             | 
| 320 | 
            -
                                ]
         | 
| 321 | 
            -
                            )
         | 
| 323 | 
            +
                                )
         | 
| 322 324 | 
             
                    self._cursor.set_pending_partitions(pending_partitions)
         | 
| 323 325 | 
             
                    yield from pending_partitions
         | 
| @@ -0,0 +1,5 @@ | |
| 1 | 
            +
            from .abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCursor
         | 
| 2 | 
            +
            from .file_based_noop_cursor import FileBasedNoopCursor
         | 
| 3 | 
            +
            from .file_based_concurrent_cursor import FileBasedConcurrentCursor
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            __all__ = ["AbstractConcurrentFileBasedCursor", "FileBasedConcurrentCursor", "FileBasedNoopCursor"]
         | 
| @@ -1,12 +1,12 @@ | |
| 1 1 | 
             
            #
         | 
| 2 2 | 
             
            # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
         | 
| 3 3 | 
             
            #
         | 
| 4 | 
            +
             | 
| 4 5 | 
             
            import logging
         | 
| 5 | 
            -
            from abc import abstractmethod
         | 
| 6 | 
            +
            from abc import ABC, abstractmethod
         | 
| 6 7 | 
             
            from datetime import datetime
         | 
| 7 | 
            -
            from typing import Any, Iterable, MutableMapping
         | 
| 8 | 
            +
            from typing import TYPE_CHECKING, Any, Iterable, List, MutableMapping
         | 
| 8 9 |  | 
| 9 | 
            -
            from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
         | 
| 10 10 | 
             
            from airbyte_cdk.sources.file_based.remote_file import RemoteFile
         | 
| 11 11 | 
             
            from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
         | 
| 12 12 | 
             
            from airbyte_cdk.sources.file_based.types import StreamState
         | 
| @@ -14,27 +14,33 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor | |
| 14 14 | 
             
            from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
         | 
| 15 15 | 
             
            from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
         | 
| 16 16 |  | 
| 17 | 
            +
            if TYPE_CHECKING:
         | 
| 18 | 
            +
                from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            class AbstractConcurrentFileBasedCursor(Cursor, AbstractFileBasedCursor, ABC):
         | 
| 22 | 
            +
                def __init__(self, *args: Any, **kwargs: Any) -> None:
         | 
| 23 | 
            +
                    pass
         | 
| 17 24 |  | 
| 18 | 
            -
            class AbstractFileBasedConcurrentCursor(Cursor, AbstractFileBasedCursor):
         | 
| 19 25 | 
             
                @property
         | 
| 20 26 | 
             
                @abstractmethod
         | 
| 21 27 | 
             
                def state(self) -> MutableMapping[str, Any]:
         | 
| 22 28 | 
             
                    ...
         | 
| 23 29 |  | 
| 24 30 | 
             
                @abstractmethod
         | 
| 25 | 
            -
                def  | 
| 31 | 
            +
                def observe(self, record: Record) -> None:
         | 
| 26 32 | 
             
                    ...
         | 
| 27 33 |  | 
| 28 34 | 
             
                @abstractmethod
         | 
| 29 | 
            -
                def  | 
| 35 | 
            +
                def close_partition(self, partition: Partition) -> None:
         | 
| 30 36 | 
             
                    ...
         | 
| 31 37 |  | 
| 32 38 | 
             
                @abstractmethod
         | 
| 33 | 
            -
                def  | 
| 39 | 
            +
                def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None:
         | 
| 34 40 | 
             
                    ...
         | 
| 35 41 |  | 
| 36 42 | 
             
                @abstractmethod
         | 
| 37 | 
            -
                def  | 
| 43 | 
            +
                def add_file(self, file: RemoteFile) -> None:
         | 
| 38 44 | 
             
                    ...
         | 
| 39 45 |  | 
| 40 46 | 
             
                @abstractmethod
         | 
| @@ -42,49 +48,21 @@ class AbstractFileBasedConcurrentCursor(Cursor, AbstractFileBasedCursor): | |
| 42 48 | 
             
                    ...
         | 
| 43 49 |  | 
| 44 50 | 
             
                @abstractmethod
         | 
| 45 | 
            -
                def  | 
| 51 | 
            +
                def get_state(self) -> MutableMapping[str, Any]:
         | 
| 46 52 | 
             
                    ...
         | 
| 47 53 |  | 
| 48 54 | 
             
                @abstractmethod
         | 
| 49 | 
            -
                def  | 
| 55 | 
            +
                def set_initial_state(self, value: StreamState) -> None:
         | 
| 50 56 | 
             
                    ...
         | 
| 51 57 |  | 
| 52 58 | 
             
                @abstractmethod
         | 
| 53 | 
            -
                def set_pending_partitions(self, partitions: Iterable[Partition]) -> None:
         | 
| 54 | 
            -
                    ...
         | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
            class FileBasedNoopCursor(AbstractFileBasedConcurrentCursor):
         | 
| 58 | 
            -
                def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any):
         | 
| 59 | 
            -
                    pass
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                @property
         | 
| 62 | 
            -
                def state(self) -> MutableMapping[str, Any]:
         | 
| 63 | 
            -
                    return {}
         | 
| 64 | 
            -
             | 
| 65 | 
            -
                def add_file(self, file: RemoteFile) -> None:
         | 
| 66 | 
            -
                    return None
         | 
| 67 | 
            -
             | 
| 68 | 
            -
                def set_initial_state(self, value: StreamState) -> None:
         | 
| 69 | 
            -
                    return None
         | 
| 70 | 
            -
             | 
| 71 | 
            -
                def get_state(self) -> MutableMapping[str, Any]:
         | 
| 72 | 
            -
                    return {}
         | 
| 73 | 
            -
             | 
| 74 59 | 
             
                def get_start_time(self) -> datetime:
         | 
| 75 | 
            -
                     | 
| 76 | 
            -
             | 
| 77 | 
            -
                def get_files_to_sync(self, all_files: Iterable[RemoteFile], logger: logging.Logger) -> Iterable[RemoteFile]:
         | 
| 78 | 
            -
                    return []
         | 
| 79 | 
            -
             | 
| 80 | 
            -
                def observe(self, record: Record) -> None:
         | 
| 81 | 
            -
                    return None
         | 
| 60 | 
            +
                    ...
         | 
| 82 61 |  | 
| 83 | 
            -
                 | 
| 84 | 
            -
             | 
| 62 | 
            +
                @abstractmethod
         | 
| 63 | 
            +
                def emit_state_message(self) -> None:
         | 
| 64 | 
            +
                    ...
         | 
| 85 65 |  | 
| 66 | 
            +
                @abstractmethod
         | 
| 86 67 | 
             
                def ensure_at_least_one_state_emitted(self) -> None:
         | 
| 87 | 
            -
                     | 
| 88 | 
            -
             | 
| 89 | 
            -
                def set_pending_partitions(self, partitions: Iterable[Partition]) -> None:
         | 
| 90 | 
            -
                    return None
         | 
| 68 | 
            +
                    ...
         |