PyPI - airbyte-cdk - Versions diffs - 6.17.1.dev1__py3-none-any.whl → 6.18.0__py3-none-any.whl - Mend

airbyte-cdk 6.17.1.dev1py3-none-any.whl → 6.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of airbyte-cdk might be problematic. Click here for more details.

Files changed (18) hide show

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -20,9 +20,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
     ClientSideIncrementalRecordFilterDecorator,
 )
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
-from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
-    PerPartitionWithGlobalCursor,
-)
 from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -307,72 +304,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             cursor=final_state_cursor,
                         )
                     )
-                elif (
-                    incremental_sync_component_definition
-                    and incremental_sync_component_definition.get("type", "")
-                    == DatetimeBasedCursorModel.__name__
-                    and self._stream_supports_concurrent_partition_processing(
-                        declarative_stream=declarative_stream
-                    )
-                    and hasattr(declarative_stream.retriever, "stream_slicer")
-                    and isinstance(
-                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
-                    )
-                ):
-                    stream_state = state_manager.get_stream_state(
-                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
-                    )
-                    partition_router = declarative_stream.retriever.stream_slicer._partition_router
-                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
-                        state_manager=state_manager,
-                        model_type=DatetimeBasedCursorModel,
-                        component_definition=incremental_sync_component_definition,
-                        stream_name=declarative_stream.name,
-                        stream_namespace=declarative_stream.namespace,
-                        config=config or {},
-                        stream_state=stream_state,
-                        partition_router=partition_router,
-                    )
-                    retriever = declarative_stream.retriever
-                    # This is an optimization so that we don't invoke any cursor or state management flows within the
-                    # low-code framework because state management is handled through the ConcurrentCursor.
-                    if declarative_stream and isinstance(retriever, SimpleRetriever):
-                        # Also a temporary hack. In the legacy Stream implementation, as part of the read,
-                        # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
-                        # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
-                        # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
-                        # still rely on a DatetimeBasedCursor that is properly initialized with state.
-                        if retriever.cursor:
-                            retriever.cursor.set_initial_state(stream_state=stream_state)
-                        # We zero it out here, but since this is a cursor reference, the state is still properly
-                        # instantiated for the other components that reference it
-                        retriever.cursor = None
-                    partition_generator = StreamSlicerPartitionGenerator(
-                        DeclarativePartitionFactory(
-                            declarative_stream.name,
-                            declarative_stream.get_json_schema(),
-                            retriever,
-                            self.message_repository,
-                        ),
-                        cursor,
-                    )
-                    concurrent_streams.append(
-                        DefaultStream(
-                            partition_generator=partition_generator,
-                            name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
-                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
-                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
-                            cursor_field=cursor.cursor_field.cursor_field_key,
-                            logger=self.logger,
-                            cursor=cursor,
-                        )
-                    )
                 else:
                     synchronous_streams.append(declarative_stream)
             else:

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -2977,6 +2977,11 @@ definitions:
         anyOf:
           - "$ref": "#/definitions/CustomRequester"
           - "$ref": "#/definitions/HttpRequester"
+      url_requester:
+        description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRequester"
+          - "$ref": "#/definitions/HttpRequester"
       download_requester:
         description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
         anyOf:

airbyte_cdk/sources/declarative/extractors/record_filter.py CHANGED Viewed

@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
     def __init__(
         self,
-        cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
+        date_time_based_cursor: DatetimeBasedCursor,
+        substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
         **kwargs: Any,
     ):
         super().__init__(**kwargs)
-        self._cursor = cursor
+        self._date_time_based_cursor = date_time_based_cursor
+        self._substream_cursor = substream_cursor
     def filter_records(
         self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
         records = (
             record
             for record in records
-            if self._cursor.should_be_synced(
+            if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
                 # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
                 # Record stream name is empty cause it is not used durig the filtering
                 Record(data=record, associated_slice=stream_slice, stream_name="")

airbyte_cdk/sources/declarative/incremental/__init__.py CHANGED Viewed

@@ -2,10 +2,6 @@
 # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
 #
-from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
-    ConcurrentCursorFactory,
-    ConcurrentPerPartitionCursor,
-)
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
 from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
 from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
 __all__ = [
     "CursorFactory",
-    "ConcurrentCursorFactory",
-    "ConcurrentPerPartitionCursor",
     "DatetimeBasedCursor",
     "DeclarativeCursor",
     "GlobalSubstreamCursor",

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py CHANGED Viewed

@@ -303,21 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
             raise ValueError("A partition needs to be provided in order to get request body json")
     def should_be_synced(self, record: Record) -> bool:
-        if (
-            record.associated_slice
-            and self._to_partition_key(record.associated_slice.partition)
-            not in self._cursor_per_partition
-        ):
-            partition_state = (
-                self._state_to_migrate_from
-                if self._state_to_migrate_from
-                else self._NO_CURSOR_STATE
-            )
-            cursor = self._create_cursor(partition_state)
-            self._cursor_per_partition[
-                self._to_partition_key(record.associated_slice.partition)
-            ] = cursor
         return self._get_cursor(record).should_be_synced(
             self._convert_record_to_cursor_record(record)
         )

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -737,33 +737,43 @@ class KeysToSnakeCase(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
+class FlattenFields(BaseModel):
+    type: Literal["FlattenFields"]
+    flatten_lists: Optional[bool] = Field(
+        True,
+        description="Whether to flatten lists or leave it as is. Default is True.",
+        title="Flatten Lists",
+    )
+    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class KeysReplace(BaseModel):
     type: Literal["KeysReplace"]
     old: str = Field(
         ...,
         description="Old value to replace.",
-        examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
+        examples=[
+            " ",
+            "{{ record.id }}",
+            "{{ config['id'] }}",
+            "{{ stream_slice['id'] }}",
+        ],
         title="Old value",
     )
     new: str = Field(
         ...,
         description="New value to set.",
-        examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
+        examples=[
+            "_",
+            "{{ record.id }}",
+            "{{ config['id'] }}",
+            "{{ stream_slice['id'] }}",
+        ],
         title="New value",
     )
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
-class FlattenFields(BaseModel):
-    type: Literal["FlattenFields"]
-    flatten_lists: Optional[bool] = Field(
-        True,
-        description="Whether to flatten lists or leave it as is. Default is True.",
-        title="Flatten Lists",
-    )
-    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class IterableDecoder(BaseModel):
     type: Literal["IterableDecoder"]
@@ -2040,6 +2050,10 @@ class AsyncRetriever(BaseModel):
         ...,
         description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
     )
+    url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
+        None,
+        description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
+    )
     download_requester: Union[CustomRequester, HttpRequester] = Field(
         ...,
         description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -84,8 +84,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
 )
 from airbyte_cdk.sources.declarative.incremental import (
     ChildPartitionResumableFullRefreshCursor,
-    ConcurrentCursorFactory,
-    ConcurrentPerPartitionCursor,
     CursorFactory,
     DatetimeBasedCursor,
     DeclarativeCursor,
@@ -440,7 +438,6 @@ from airbyte_cdk.sources.message import (
     InMemoryMessageRepository,
     LogAppenderMessageRepositoryDecorator,
     MessageRepository,
-    NoopMessageRepository,
 )
 from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
 from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -874,8 +871,6 @@ class ModelToComponentFactory:
         stream_namespace: Optional[str],
         config: Config,
         stream_state: MutableMapping[str, Any],
-        message_repository: Optional[MessageRepository] = None,
-        runtime_lookback_window: Optional[int] = None,
         **kwargs: Any,
     ) -> ConcurrentCursor:
         component_type = component_definition.get("type")
@@ -933,11 +928,6 @@ class ModelToComponentFactory:
             if evaluated_lookback_window:
                 lookback_window = parse_duration(evaluated_lookback_window)
-        if runtime_lookback_window and lookback_window:
-            lookback_window = max(lookback_window, runtime_lookback_window)
-        elif runtime_lookback_window:
-            lookback_window = runtime_lookback_window
         connector_state_converter: DateTimeStreamStateConverter
         connector_state_converter = CustomFormatConcurrentStreamStateConverter(
             datetime_format=datetime_format,
@@ -1016,7 +1006,7 @@ class ModelToComponentFactory:
             stream_name=stream_name,
             stream_namespace=stream_namespace,
             stream_state=stream_state,
-            message_repository=message_repository or self._message_repository,
+            message_repository=self._message_repository,
             connector_state_manager=state_manager,
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
@@ -1028,63 +1018,6 @@ class ModelToComponentFactory:
             cursor_granularity=cursor_granularity,
         )
-    def create_concurrent_cursor_from_perpartition_cursor(
-        self,
-        state_manager: ConnectorStateManager,
-        model_type: Type[BaseModel],
-        component_definition: ComponentDefinition,
-        stream_name: str,
-        stream_namespace: Optional[str],
-        config: Config,
-        stream_state: MutableMapping[str, Any],
-        partition_router,
-        **kwargs: Any,
-    ) -> ConcurrentPerPartitionCursor:
-        component_type = component_definition.get("type")
-        if component_definition.get("type") != model_type.__name__:
-            raise ValueError(
-                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
-            )
-        datetime_based_cursor_model = model_type.parse_obj(component_definition)
-        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
-            raise ValueError(
-                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
-            )
-        interpolated_cursor_field = InterpolatedString.create(
-            datetime_based_cursor_model.cursor_field,
-            parameters=datetime_based_cursor_model.parameters or {},
-        )
-        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
-        # Create the cursor factory
-        cursor_factory = ConcurrentCursorFactory(
-            partial(
-                self.create_concurrent_cursor_from_datetime_based_cursor,
-                state_manager=state_manager,
-                model_type=model_type,
-                component_definition=component_definition,
-                stream_name=stream_name,
-                stream_namespace=stream_namespace,
-                config=config,
-                message_repository=NoopMessageRepository(),
-            )
-        )
-        # Return the concurrent cursor and state converter
-        return ConcurrentPerPartitionCursor(
-            cursor_factory=cursor_factory,
-            partition_router=partition_router,
-            stream_name=stream_name,
-            stream_namespace=stream_namespace,
-            stream_state=stream_state,
-            message_repository=self._message_repository,  # type: ignore
-            connector_state_manager=state_manager,
-            cursor_field=cursor_field,
-        )
     @staticmethod
     def create_constant_backoff_strategy(
         model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1367,15 +1300,18 @@ class ModelToComponentFactory:
                 raise ValueError(
                     "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
                 )
-            cursor = (
-                combined_slicers
-                if isinstance(
-                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
-                )
-                else self._create_component_from_model(model=model.incremental_sync, config=config)
-            )
-            client_side_incremental_sync = {"cursor": cursor}
+            client_side_incremental_sync = {
+                "date_time_based_cursor": self._create_component_from_model(
+                    model=model.incremental_sync, config=config
+                ),
+                "substream_cursor": (
+                    combined_slicers
+                    if isinstance(
+                        combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
+                    )
+                    else None
+                ),
+            }
         if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
             cursor_model = model.incremental_sync
@@ -2191,7 +2127,7 @@ class ModelToComponentFactory:
         if (
             not isinstance(stream_slicer, DatetimeBasedCursor)
             or type(stream_slicer) is not DatetimeBasedCursor
-        ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
+        ):
             # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
             # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
             # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -2351,7 +2287,7 @@ class ModelToComponentFactory:
                 extractor=download_extractor,
                 name=name,
                 record_filter=None,
-                transformations=[],
+                transformations=transformations,
                 schema_normalization=TypeTransformer(TransformConfig.NoTransform),
                 config=config,
                 parameters={},
@@ -2388,6 +2324,16 @@ class ModelToComponentFactory:
             if model.delete_requester
             else None
         )
+        url_requester = (
+            self._create_component_from_model(
+                model=model.url_requester,
+                decoder=decoder,
+                config=config,
+                name=f"job extract_url - {name}",
+            )
+            if model.url_requester
+            else None
+        )
         status_extractor = self._create_component_from_model(
             model=model.status_extractor, decoder=decoder, config=config, name=name
         )
@@ -2398,6 +2344,7 @@ class ModelToComponentFactory:
             creation_requester=creation_requester,
             polling_requester=polling_requester,
             download_retriever=download_retriever,
+            url_requester=url_requester,
             abort_requester=abort_requester,
             delete_requester=delete_requester,
             status_extractor=status_extractor,

airbyte_cdk/sources/declarative/requesters/README.md ADDED Viewed

@@ -0,0 +1,57 @@
+# AsyncHttpJobRepository sequence diagram
+- Components marked as optional are not required and can be ignored.
+- if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
+- interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
+```mermaid
+---
+title: AsyncHttpJobRepository Sequence Diagram
+---
+sequenceDiagram
+    participant AsyncHttpJobRepository as AsyncOrchestrator
+    participant CreationRequester as creation_requester
+    participant PollingRequester as polling_requester
+    participant UrlRequester as url_requester (Optional)
+    participant DownloadRetriever as download_retriever
+    participant AbortRequester as abort_requester (Optional)
+    participant DeleteRequester as delete_requester (Optional)
+    participant Reporting Server as Async Reporting Server
+    AsyncHttpJobRepository ->> CreationRequester: Initiate job creation
+    CreationRequester ->> Reporting Server: Create job request
+    Reporting Server -->> CreationRequester: Job ID response
+    CreationRequester -->> AsyncHttpJobRepository: Job ID
+    loop Poll for job status
+        AsyncHttpJobRepository ->> PollingRequester: Check job status
+        PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
+        Reporting Server -->> PollingRequester: Status response
+        PollingRequester -->> AsyncHttpJobRepository: Job status
+    end
+    alt Status: Ready
+        AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
+        UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
+        Reporting Server -->> UrlRequester: Download URLs
+        UrlRequester -->> AsyncHttpJobRepository: Download URLs
+        AsyncHttpJobRepository ->> DownloadRetriever: Download reports
+        DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
+        Reporting Server -->> DownloadRetriever: Report data
+        DownloadRetriever -->> AsyncHttpJobRepository: Report data
+    else Status: Failed
+        AsyncHttpJobRepository ->> AbortRequester: Send abort request
+        AbortRequester ->> Reporting Server: Abort job
+        Reporting Server -->> AbortRequester: Abort confirmation
+        AbortRequester -->> AsyncHttpJobRepository: Confirmation
+    end
+    AsyncHttpJobRepository ->> DeleteRequester: Send delete job request
+    DeleteRequester ->> Reporting Server: Delete job
+    Reporting Server -->> DeleteRequester: Deletion confirmation
+    DeleteRequester -->> AsyncHttpJobRepository: Confirmation
+```

airbyte_cdk/sources/declarative/requesters/http_job_repository.py CHANGED Viewed

@@ -31,6 +31,10 @@ LOGGER = logging.getLogger("airbyte")
 @dataclass
 class AsyncHttpJobRepository(AsyncJobRepository):
+    """
+    See Readme file for more details about flow.
+    """
     creation_requester: Requester
     polling_requester: Requester
     download_retriever: SimpleRetriever
@@ -44,6 +48,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
     record_extractor: RecordExtractor = field(
         init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
     )
+    url_requester: Optional[Requester] = (
+        None  # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
+    )
     def __post_init__(self) -> None:
         self._create_job_response_by_id: Dict[str, Response] = {}
@@ -186,10 +193,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
         """
-        for url in self.urls_extractor.extract_records(
-            self._polling_job_response_by_id[job.api_job_id()]
-        ):
-            stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
+        for url in self._get_download_url(job):
+            job_slice = job.job_parameters()
+            stream_slice = StreamSlice(
+                partition=job_slice.partition,
+                cursor_slice=job_slice.cursor_slice,
+                extra_fields={**job_slice.extra_fields, "url": url},
+            )
             for message in self.download_retriever.read_records({}, stream_slice):
                 if isinstance(message, Record):
                     yield message.data
@@ -226,3 +236,22 @@ class AsyncHttpJobRepository(AsyncJobRepository):
             cursor_slice={},
         )
         return stream_slice
+    def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
+        if not self.url_requester:
+            url_response = self._polling_job_response_by_id[job.api_job_id()]
+        else:
+            stream_slice: StreamSlice = StreamSlice(
+                partition={
+                    "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
+                },
+                cursor_slice={},
+            )
+            url_response = self.url_requester.send_request(stream_slice=stream_slice)  # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
+            if not url_response:
+                raise AirbyteTracedException(
+                    internal_message="Always expect a response or an exception from url_requester",
+                    failure_type=FailureType.system_error,
+                )
+        yield from self.urls_extractor.extract_records(url_response)  # type: ignore # we expect urls_extractor to always return list of strings

airbyte_cdk/sources/declarative/retrievers/simple_retriever.py CHANGED Viewed

@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
             stream_slice,
             next_page_token,
             self._paginator.get_request_headers,
-            self.request_option_provider.get_request_headers,
+            self.stream_slicer.get_request_headers,
         )
         if isinstance(headers, str):
             raise ValueError("Request headers cannot be a string")

airbyte_cdk/sources/streams/concurrent/cursor.py CHANGED Viewed

@@ -196,9 +196,7 @@ class ConcurrentCursor(Cursor):
     @property
     def state(self) -> MutableMapping[str, Any]:
-        return self._connector_state_converter.convert_to_state_message(
-            self.cursor_field, self._concurrent_state
-        )
+        return self._concurrent_state
     @property
     def cursor_field(self) -> CursorField:
@@ -243,10 +241,10 @@ class ConcurrentCursor(Cursor):
         return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
     def close_partition(self, partition: Partition) -> None:
-        slice_count_before = len(self._concurrent_state.get("slices", []))
+        slice_count_before = len(self.state.get("slices", []))
         self._add_slice_to_state(partition)
         if slice_count_before < len(
-            self._concurrent_state["slices"]
+            self.state["slices"]
         ):  # only emit if at least one slice has been processed
             self._merge_partitions()
             self._emit_state_message()
@@ -258,11 +256,11 @@ class ConcurrentCursor(Cursor):
         )
         if self._slice_boundary_fields:
-            if "slices" not in self._concurrent_state:
+            if "slices" not in self.state:
                 raise RuntimeError(
                     f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
                 )
-            self._concurrent_state["slices"].append(
+            self.state["slices"].append(
                 {
                     self._connector_state_converter.START_KEY: self._extract_from_slice(
                         partition, self._slice_boundary_fields[self._START_BOUNDARY]
@@ -290,7 +288,7 @@ class ConcurrentCursor(Cursor):
                     "expected. Please contact the Airbyte team."
                 )
-            self._concurrent_state["slices"].append(
+            self.state["slices"].append(
                 {
                     self._connector_state_converter.START_KEY: self.start,
                     self._connector_state_converter.END_KEY: most_recent_cursor_value,
@@ -302,7 +300,9 @@ class ConcurrentCursor(Cursor):
         self._connector_state_manager.update_state_for_stream(
             self._stream_name,
             self._stream_namespace,
-            self.state,
+            self._connector_state_converter.convert_to_state_message(
+                self._cursor_field, self.state
+            ),
         )
         state_message = self._connector_state_manager.create_state_message(
             self._stream_name, self._stream_namespace
@@ -310,9 +310,7 @@ class ConcurrentCursor(Cursor):
         self._message_repository.emit_message(state_message)
     def _merge_partitions(self) -> None:
-        self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
-            self._concurrent_state["slices"]
-        )
+        self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
     def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
         try:
@@ -349,42 +347,36 @@ class ConcurrentCursor(Cursor):
         if self._start is not None and self._is_start_before_first_slice():
             yield from self._split_per_slice_range(
                 self._start,
-                self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
+                self.state["slices"][0][self._connector_state_converter.START_KEY],
                 False,
             )
-        if len(self._concurrent_state["slices"]) == 1:
+        if len(self.state["slices"]) == 1:
             yield from self._split_per_slice_range(
                 self._calculate_lower_boundary_of_last_slice(
-                    self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
+                    self.state["slices"][0][self._connector_state_converter.END_KEY]
                 ),
                 self._end_provider(),
                 True,
             )
-        elif len(self._concurrent_state["slices"]) > 1:
-            for i in range(len(self._concurrent_state["slices"]) - 1):
+        elif len(self.state["slices"]) > 1:
+            for i in range(len(self.state["slices"]) - 1):
                 if self._cursor_granularity:
                     yield from self._split_per_slice_range(
-                        self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
+                        self.state["slices"][i][self._connector_state_converter.END_KEY]
                         + self._cursor_granularity,
-                        self._concurrent_state["slices"][i + 1][
-                            self._connector_state_converter.START_KEY
-                        ],
+                        self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
                         False,
                     )
                 else:
                     yield from self._split_per_slice_range(
-                        self._concurrent_state["slices"][i][
-                            self._connector_state_converter.END_KEY
-                        ],
-                        self._concurrent_state["slices"][i + 1][
-                            self._connector_state_converter.START_KEY
-                        ],
+                        self.state["slices"][i][self._connector_state_converter.END_KEY],
+                        self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
                         False,
                     )
             yield from self._split_per_slice_range(
                 self._calculate_lower_boundary_of_last_slice(
-                    self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
+                    self.state["slices"][-1][self._connector_state_converter.END_KEY]
                 ),
                 self._end_provider(),
                 True,
@@ -395,8 +387,7 @@ class ConcurrentCursor(Cursor):
     def _is_start_before_first_slice(self) -> bool:
         return (
             self._start is not None
-            and self._start
-            < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
+            and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
         )
     def _calculate_lower_boundary_of_last_slice(

airbyte_cdk/sources/types.py CHANGED Viewed

@@ -152,3 +152,6 @@ class StreamSlice(Mapping[str, Any]):
     def __hash__(self) -> int:
         return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
+    def __bool__(self) -> bool:
+        return bool(self._stream_slice) or bool(self._extra_fields)

{airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: airbyte-cdk
-Version: 6.17.1.dev1
+Version: 6.18.0
 Summary: A framework for writing Airbyte Connectors.
 License: MIT
 Keywords: airbyte,connector-development-kit,cdk

{airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/RECORD RENAMED Viewed

@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
 airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
 airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
 airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
-airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=VfDvff6ionjGScMbEpMGlZ0TfOyIQpMUZiuV6pkI9Os,26557
+airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyMCu1qoGsne1Ooz3c1da-8EDZk6Suiy2gIq9Q,22475
 airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
 airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
 airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
-airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=GfZlk9EvYQiWDx3AipNLf1us1e986q2mgqcbHbeZU0k,133172
+airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=2t3_QVXWOImPcH-apR_Xd8qNl6K_URFwBbQ47YHcjXg,133490
 airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
 airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
 airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
@@ -81,16 +81,15 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrq
 airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
 airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
 airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
-airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
+airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
 airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
 airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
 airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
-airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
-airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=vU6bcVgjDFou7szl5UKxv2-theKSsV78oSME84-C78A,15043
+airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=huRz3KQJSUFmJCg5GPE9TckEBsB5TMsCa_THhJAhPVI,1037
 airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
 airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
 airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
-airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=_FSJjAwL4Zu-i2CngnhTtx8j-NPVSBKj5LwDSPta3Cg,16305
+airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
 airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
 airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
 airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
@@ -107,12 +106,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
 airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
 airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
 airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
-airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=IZFT1m4d-zp5hQ0ayU06Vdxm6r3MEq-X2sOCo9SuG-k,93270
+airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=3xWpeDNDGOw_I2pQ1LDiUhNBEWEvNAtd-HCi_1aklSQ,93666
 airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
 airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
 airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
-airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lgFqJ8DP-cRizmvFKRd4Oy_ebgoT_AceMKIpuqoFm3c,112097
+airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dpRWxZaPghPcE5vGkI4swKDaXyLWLMAbvDoazuNSobU,109709
 airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
 airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
 airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -120,6 +119,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
 airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
 airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
 airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
+airbyte_cdk/sources/declarative/requesters/README.md,sha256=WabtHlwHg_J34aL1Kwm8vboYqBaSgsFjq10qR-P2sx8,2658
 airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
 airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
 airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
@@ -134,7 +134,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
 airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
 airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
 airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=vhWsEKNTYEzZ4gerhHqnDNKu4wGIP485NAzpSQ5DRZg,7941
-airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
+airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
 airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
 airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
 airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
@@ -163,7 +163,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
 airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
 airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
 airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
-airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
+airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
 airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
 airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
 airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
@@ -257,7 +257,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
 airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
 airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
 airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
-airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=suObbNi24so8Wcj0Wm32OkJAcuvODAOwp373YBmUPp0,21213
+airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
 airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
 airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
 airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
@@ -293,7 +293,7 @@ airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y
 airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=ka-bBRWvIv09LmZNYl49p2lK9nd_Tvi2g0lIp3OkU40,14872
 airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
 airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
-airbyte_cdk/sources/types.py,sha256=WWVigI7ZSoQU2TBCzDsHJtoX4Ima9v--lcLyYwUG_cE,4904
+airbyte_cdk/sources/types.py,sha256=nLPkTpyfGV4E6e99qcBWX4r8C3fE4I8Fvgx2EjvT9ic,5005
 airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
 airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
 airbyte_cdk/sources/utils/record_helper.py,sha256=jeB0mucudzna7Zvj-pCBbwFrbLJ36SlAWZTh5O4Fb9Y,2168
@@ -343,8 +343,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
 airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
 airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
 airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
-airbyte_cdk-6.17.1.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
-airbyte_cdk-6.17.1.dev1.dist-info/METADATA,sha256=8TVLQbLq6-v0qkRHb8X4P9x2sYTe9EUjwdvMb2NVOpA,6005
-airbyte_cdk-6.17.1.dev1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
-airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
-airbyte_cdk-6.17.1.dev1.dist-info/RECORD,,
+airbyte_cdk-6.18.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
+airbyte_cdk-6.18.0.dist-info/METADATA,sha256=RvVkgbg-LBbS5eGTntO-mp34yRIDMuPYZ26VRmSkhCA,6000
+airbyte_cdk-6.18.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
+airbyte_cdk-6.18.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
+airbyte_cdk-6.18.0.dist-info/RECORD,,

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py DELETED Viewed

@@ -1,340 +0,0 @@
-import copy
-import logging
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
-import threading
-from collections import OrderedDict
-from copy import deepcopy
-from datetime import timedelta
-from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
-from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
-from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
-from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
-    Timer,
-    iterate_with_last_flag_and_state,
-)
-from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
-from airbyte_cdk.sources.message import MessageRepository
-from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
-    PerPartitionKeySerializer,
-)
-from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
-from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
-from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
-logger = logging.getLogger("airbyte")
-class ConcurrentCursorFactory:
-    def __init__(self, create_function: Callable[..., Cursor]):
-        self._create_function = create_function
-    def create(self, stream_state: Mapping[str, Any], runtime_lookback_window: Any) -> Cursor:
-        return self._create_function(
-            stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
-        )
-class ConcurrentPerPartitionCursor(Cursor):
-    """
-    Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
-    **Partition Limitation and Limit Reached Logic**
-    - **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
-    - **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
-    - **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
-    The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
-    - When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
-    - The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
-    This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
-    """
-    DEFAULT_MAX_PARTITIONS_NUMBER = 10000
-    _NO_STATE: Mapping[str, Any] = {}
-    _NO_CURSOR_STATE: Mapping[str, Any] = {}
-    _KEY = 0
-    _VALUE = 1
-    def __init__(
-        self,
-        cursor_factory: ConcurrentCursorFactory,
-        partition_router: PartitionRouter,
-        stream_name: str,
-        stream_namespace: Optional[str],
-        stream_state: Any,
-        message_repository: MessageRepository,
-        connector_state_manager: ConnectorStateManager,
-        cursor_field: CursorField,
-    ) -> None:
-        self._global_cursor: Mapping[str, Any] = {}
-        self._stream_name = stream_name
-        self._stream_namespace = stream_namespace
-        self._message_repository = message_repository
-        self._connector_state_manager = connector_state_manager
-        self._cursor_field = cursor_field
-        self._cursor_factory = cursor_factory
-        self._partition_router = partition_router
-        # The dict is ordered to ensure that once the maximum number of partitions is reached,
-        # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
-        self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
-        self._state = {"states": []}
-        self._semaphore_per_partition = OrderedDict()
-        self._finished_partitions = set()
-        self._lock = threading.Lock()
-        self._timer = Timer()
-        self._new_global_cursor = None
-        self._lookback_window = 0
-        self._parent_state = None
-        self._over_limit = 0
-        self._partition_serializer = PerPartitionKeySerializer()
-        self._set_initial_state(stream_state)
-    @property
-    def cursor_field(self) -> CursorField:
-        return self._cursor_field
-    @property
-    def state(self) -> MutableMapping[str, Any]:
-        states = []
-        for partition_tuple, cursor in self._cursor_per_partition.items():
-            if cursor.state:
-                states.append(
-                    {
-                        "partition": self._to_dict(partition_tuple),
-                        "cursor": copy.deepcopy(cursor.state),
-                    }
-                )
-        state: dict[str, Any] = {"states": states}
-        if self._global_cursor:
-            state["state"] = self._global_cursor
-        if self._lookback_window is not None:
-            state["lookback_window"] = self._lookback_window
-        if self._parent_state is not None:
-            state["parent_state"] = self._parent_state
-        return state
-    def close_partition(self, partition: Partition) -> None:
-        self._cursor_per_partition[
-            self._to_partition_key(partition._stream_slice.partition)
-        ].close_partition(partition=partition)
-        with self._lock:
-            self._semaphore_per_partition[
-                self._to_partition_key(partition._stream_slice.partition)
-            ].acquire()
-            cursor = self._cursor_per_partition[
-                self._to_partition_key(partition._stream_slice.partition)
-            ]
-            if (
-                self._to_partition_key(partition._stream_slice.partition)
-                in self._finished_partitions
-                and self._semaphore_per_partition[
-                    self._to_partition_key(partition._stream_slice.partition)
-                ]._value
-                == 0
-            ):
-                if (
-                    self._new_global_cursor is None
-                    or self._new_global_cursor[self.cursor_field.cursor_field_key]
-                    < cursor.state[self.cursor_field.cursor_field_key]
-                ):
-                    self._new_global_cursor = copy.deepcopy(cursor.state)
-    def ensure_at_least_one_state_emitted(self) -> None:
-        """
-        The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
-        called.
-        """
-        if not any(
-            semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
-        ):
-            self._global_cursor = self._new_global_cursor
-            self._lookback_window = self._timer.finish()
-            self._parent_state = self._partition_router.get_stream_state()
-        self._emit_state_message()
-    def _emit_state_message(self) -> None:
-        self._connector_state_manager.update_state_for_stream(
-            self._stream_name,
-            self._stream_namespace,
-            self.state,
-        )
-        state_message = self._connector_state_manager.create_state_message(
-            self._stream_name, self._stream_namespace
-        )
-        self._message_repository.emit_message(state_message)
-    def stream_slices(self) -> Iterable[StreamSlice]:
-        slices = self._partition_router.stream_slices()
-        self._timer.start()
-        for partition in slices:
-            yield from self.generate_slices_from_partition(partition)
-    def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
-        # Ensure the maximum number of partitions is not exceeded
-        self._ensure_partition_limit()
-        cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
-        if not cursor:
-            partition_state = self._global_cursor if self._global_cursor else self._NO_CURSOR_STATE
-            cursor = self._create_cursor(partition_state)
-            self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
-            self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
-                threading.Semaphore(0)
-            )
-        for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
-            cursor.stream_slices(),
-            lambda: None,
-        ):
-            self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
-            if is_last_slice:
-                self._finished_partitions.add(self._to_partition_key(partition.partition))
-            yield StreamSlice(
-                partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
-            )
-    def _ensure_partition_limit(self) -> None:
-        """
-        Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
-        """
-        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
-            self._over_limit += 1
-            oldest_partition = self._cursor_per_partition.popitem(last=False)[
-                0
-            ]  # Remove the oldest partition
-            logger.warning(
-                f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
-            )
-    def limit_reached(self) -> bool:
-        return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
-    def _set_initial_state(self, stream_state: StreamState) -> None:
-        """
-        Set the initial state for the cursors.
-        This method initializes the state for each partition cursor using the provided stream state.
-        If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
-        Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
-        does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
-        Args:
-            stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
-                {
-                    "states": [
-                        {
-                            "partition": {
-                                "partition_key": "value"
-                            },
-                            "cursor": {
-                                "last_updated": "2023-05-27T00:00:00Z"
-                            }
-                        }
-                    ],
-                    "parent_state": {
-                        "parent_stream_name": {
-                            "last_updated": "2023-05-27T00:00:00Z"
-                        }
-                    }
-                }
-        """
-        if not stream_state:
-            return
-        if "states" not in stream_state:
-            # We assume that `stream_state` is in a global format that can be applied to all partitions.
-            # Example: {"global_state_format_key": "global_state_format_value"}
-            self._global_cursor = deepcopy(stream_state)
-            self._new_global_cursor = deepcopy(stream_state)
-        else:
-            self._lookback_window = stream_state.get("lookback_window")
-            for state in stream_state["states"]:
-                self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
-                    self._create_cursor(
-                        state["cursor"], runtime_lookback_window=self._lookback_window
-                    )
-                )
-                self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
-                    threading.Semaphore(0)
-                )
-            # set default state for missing partitions if it is per partition with fallback to global
-            if "state" in stream_state:
-                self._global_cursor = deepcopy(stream_state["state"])
-                self._new_global_cursor = deepcopy(stream_state["state"])
-        # Set parent state for partition routers based on parent streams
-        self._partition_router.set_initial_state(stream_state)
-    def observe(self, record: Record) -> None:
-        self._cursor_per_partition[
-            self._to_partition_key(record.associated_slice.partition)
-        ].observe(record)
-    def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
-        return self._partition_serializer.to_partition_key(partition)
-    def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
-        return self._partition_serializer.to_partition(partition_key)
-    def _create_cursor(self, cursor_state: Any, runtime_lookback_window: Any = None) -> Cursor:
-        if runtime_lookback_window:
-            runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
-        cursor = self._cursor_factory.create(
-            stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
-        )
-        return cursor
-    def should_be_synced(self, record: Record) -> bool:
-        return self._get_cursor(record).should_be_synced(record)
-    def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
-        if not first.associated_slice or not second.associated_slice:
-            raise ValueError(
-                f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
-            )
-        if first.associated_slice.partition != second.associated_slice.partition:
-            raise ValueError(
-                f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
-            )
-        return self._get_cursor(first).is_greater_than_or_equal(
-            self._convert_record_to_cursor_record(first),
-            self._convert_record_to_cursor_record(second),
-        )
-    @staticmethod
-    def _convert_record_to_cursor_record(record: Record) -> Record:
-        return Record(
-            record.data,
-            StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
-            if record.associated_slice
-            else None,
-        )
-    def _get_cursor(self, record: Record) -> Cursor:
-        if not record.associated_slice:
-            raise ValueError(
-                "Invalid state as stream slices that are emitted should refer to an existing cursor"
-            )
-        partition_key = self._to_partition_key(record.associated_slice.partition)
-        if partition_key not in self._cursor_per_partition:
-            raise ValueError(
-                "Invalid state as stream slices that are emitted should refer to an existing cursor"
-            )
-        cursor = self._cursor_per_partition[partition_key]
-        return cursor

{airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-cdk 6.17.1.dev1__py3-none-any.whl → 6.18.0__py3-none-any.whl

Potentially problematic release.

airbyte-cdk 6.17.1.dev1py3-none-any.whl → 6.18.0py3-none-any.whl