PyPI - airbyte-cdk - Versions diffs - 6.41.9.dev4101__py3-none-any.whl → 6.42.0__py3-none-any.whl - Mend

airbyte-cdk 6.41.9.dev4101py3-none-any.whl → 6.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -1890,9 +1890,10 @@ class DeclarativeSource1(BaseModel):
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
     api_budget: Optional[HTTPAPIBudget] = None
-    max_concurrent_async_job_count: Optional[int] = Field(
+    max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
         None,
         description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
+        examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
         title="Maximum Concurrent Asynchronous Jobs",
     )
     metadata: Optional[Dict[str, Any]] = Field(
@@ -1922,9 +1923,10 @@ class DeclarativeSource2(BaseModel):
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
     api_budget: Optional[HTTPAPIBudget] = None
-    max_concurrent_async_job_count: Optional[int] = Field(
+    max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
         None,
         description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
+        examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
         title="Maximum Concurrent Asynchronous Jobs",
     )
     metadata: Optional[Dict[str, Any]] = Field(
@@ -2278,22 +2280,6 @@ class StateDelegatingStream(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
-class FileUploader(BaseModel):
-    type: Literal["FileUploader"]
-    requester: Union[CustomRequester, HttpRequester] = Field(
-        ...,
-        description="Requester component that describes how to prepare HTTP requests to send to the source API.",
-    )
-    download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
-        ...,
-        description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
-    )
-    file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
-        None,
-        description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
-    )
 class SimpleRetriever(BaseModel):
     type: Literal["SimpleRetriever"]
     record_selector: RecordSelector = Field(
@@ -2317,18 +2303,21 @@ class SimpleRetriever(BaseModel):
             CustomPartitionRouter,
             ListPartitionRouter,
             SubstreamPartitionRouter,
-            List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
+            GroupingPartitionRouter,
+            List[
+                Union[
+                    CustomPartitionRouter,
+                    ListPartitionRouter,
+                    SubstreamPartitionRouter,
+                    GroupingPartitionRouter,
+                ]
+            ],
         ]
     ] = Field(
         [],
         description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.",
         title="Partition Router",
     )
-    file_uploader: Optional[FileUploader] = Field(
-        None,
-        description="(experimental) Describes how to fetch a file",
-        title="File Uploader",
-    )
     decoder: Optional[
         Union[
             CustomDecoder,
@@ -2404,7 +2393,15 @@ class AsyncRetriever(BaseModel):
             CustomPartitionRouter,
             ListPartitionRouter,
             SubstreamPartitionRouter,
-            List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
+            GroupingPartitionRouter,
+            List[
+                Union[
+                    CustomPartitionRouter,
+                    ListPartitionRouter,
+                    SubstreamPartitionRouter,
+                    GroupingPartitionRouter,
+                ]
+            ],
         ]
     ] = Field(
         [],
@@ -2456,6 +2453,29 @@ class SubstreamPartitionRouter(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
+class GroupingPartitionRouter(BaseModel):
+    type: Literal["GroupingPartitionRouter"]
+    group_size: int = Field(
+        ...,
+        description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
+        examples=[10, 50],
+        title="Group Size",
+    )
+    underlying_partition_router: Union[
+        CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
+    ] = Field(
+        ...,
+        description="The partition router whose output will be grouped. This can be any valid partition router component.",
+        title="Underlying Partition Router",
+    )
+    deduplicate: Optional[bool] = Field(
+        True,
+        description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
+        title="Deduplicate Partitions",
+    )
+    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class HttpComponentsResolver(BaseModel):
     type: Literal["HttpComponentsResolver"]
     retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -2469,6 +2489,9 @@ class HttpComponentsResolver(BaseModel):
 class DynamicDeclarativeStream(BaseModel):
     type: Literal["DynamicDeclarativeStream"]
+    name: Optional[str] = Field(
+        "", description="The dynamic stream name.", example=["Tables"], title="Name"
+    )
     stream_template: DeclarativeStream = Field(
         ..., description="Reference to the stream template.", title="Stream Template"
     )

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -102,6 +102,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
 )
 from airbyte_cdk.sources.declarative.models import (
     CustomStateMigration,
+    GzipDecoder,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -220,15 +221,15 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    FileUploader as FileUploaderModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     FlattenFields as FlattenFieldsModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    GroupingPartitionRouter as GroupingPartitionRouterModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     GzipDecoder as GzipDecoderModel,
 )
@@ -387,6 +388,7 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
 )
 from airbyte_cdk.sources.declarative.partition_routers import (
     CartesianProductStreamSlicer,
+    GroupingPartitionRouter,
     ListPartitionRouter,
     PartitionRouter,
     SinglePartitionRouter,
@@ -444,7 +446,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
     SimpleRetriever,
     SimpleRetrieverTestReadDecorator,
 )
-from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
 from airbyte_cdk.sources.declarative.schema import (
     ComplexFieldType,
     DefaultSchemaLoader,
@@ -636,12 +637,12 @@ class ModelToComponentFactory:
             ComponentMappingDefinitionModel: self.create_components_mapping_definition,
             ZipfileDecoderModel: self.create_zipfile_decoder,
             HTTPAPIBudgetModel: self.create_http_api_budget,
-            FileUploaderModel: self.create_file_uploader,
             FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
             MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
             UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
             RateModel: self.create_rate,
             HttpRequestRegexMatcherModel: self.create_http_request_matcher,
+            GroupingPartitionRouterModel: self.create_grouping_partition_router,
         }
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1359,6 +1360,9 @@ class ModelToComponentFactory:
         )
         stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
+        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
+        use_global_cursor = isinstance(partition_router, GroupingPartitionRouter)
         # Return the concurrent cursor and state converter
         return ConcurrentPerPartitionCursor(
             cursor_factory=cursor_factory,
@@ -1370,6 +1374,7 @@ class ModelToComponentFactory:
             connector_state_manager=state_manager,
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
+            use_global_cursor=use_global_cursor,
         )
     @staticmethod
@@ -3077,8 +3082,11 @@ class ModelToComponentFactory:
                 stream_slices,
                 self._job_tracker,
                 self._message_repository,
-                has_bulk_parent=False,
                 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
+                has_bulk_parent=False,
+                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
+                # `None` == default retry is set to 3 attempts, under the hood.
+                job_max_retry=1 if self._emit_connector_builder_messages else None,
             ),
             stream_slicer=stream_slicer,
             config=config,
@@ -3322,24 +3330,6 @@ class ModelToComponentFactory:
             matchers=matchers,
         )
-    def create_file_uploader(
-        self, model: FileUploaderModel, config: Config, **kwargs: Any
-    ) -> FileUploader:
-        name = "File Uploader"
-        requester = self._create_component_from_model(
-            model=model.requester,
-            config=config,
-            name=name,
-            **kwargs,
-        )
-        download_target_extractor = self._create_component_from_model(
-            model=model.download_target_extractor,
-            config=config,
-            name=name,
-            **kwargs,
-        )
-        return FileUploader(requester, download_target_extractor)
     def create_moving_window_call_rate_policy(
         self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
     ) -> MovingWindowCallRatePolicy:
@@ -3389,3 +3379,34 @@ class ModelToComponentFactory:
         self._api_budget = self.create_component(
             model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
         )
+    def create_grouping_partition_router(
+        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
+    ) -> GroupingPartitionRouter:
+        underlying_router = self._create_component_from_model(
+            model=model.underlying_partition_router, config=config
+        )
+        if model.group_size < 1:
+            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
+        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
+        # because they are specific to individual partitions and cannot be aggregated or handled
+        # when grouping, potentially leading to incorrect API calls. Any request customization
+        # should be managed at the stream level through the requester's configuration.
+        if isinstance(underlying_router, SubstreamPartitionRouter):
+            if any(
+                parent_config.request_option
+                for parent_config in underlying_router.parent_stream_configs
+            ):
+                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
+        if isinstance(underlying_router, ListPartitionRouter):
+            if underlying_router.request_option:
+                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
+        return GroupingPartitionRouter(
+            group_size=model.group_size,
+            underlying_partition_router=underlying_router,
+            deduplicate=model.deduplicate if model.deduplicate is not None else True,
+            config=config,
+        )

airbyte_cdk/sources/declarative/partition_routers/__init__.py CHANGED Viewed

@@ -8,6 +8,9 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
 from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
     CartesianProductStreamSlicer,
 )
+from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
+    GroupingPartitionRouter,
+)
 from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
     ListPartitionRouter,
 )
@@ -22,6 +25,7 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
 __all__ = [
     "AsyncJobPartitionRouter",
     "CartesianProductStreamSlicer",
+    "GroupingPartitionRouter",
     "ListPartitionRouter",
     "SinglePartitionRouter",
     "SubstreamPartitionRouter",

airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py ADDED Viewed

@@ -0,0 +1,150 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+from dataclasses import dataclass
+from typing import Any, Iterable, Mapping, Optional
+from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+@dataclass
+class GroupingPartitionRouter(PartitionRouter):
+    """
+    A partition router that groups partitions from an underlying partition router into batches of a specified size.
+    This is useful for APIs that support filtering by multiple partition keys in a single request.
+    Attributes:
+        group_size (int): The number of partitions to include in each group.
+        underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
+        deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
+        config (Config): The connector configuration.
+        parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
+    """
+    group_size: int
+    underlying_partition_router: PartitionRouter
+    config: Config
+    deduplicate: bool = True
+    def __post_init__(self) -> None:
+        self._state: Optional[Mapping[str, StreamState]] = {}
+    def stream_slices(self) -> Iterable[StreamSlice]:
+        """
+        Lazily groups partitions from the underlying partition router into batches of size `group_size`.
+        This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
+        When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
+        If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
+        Yields:
+            Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
+        """
+        batch = []
+        seen_keys = set()
+        # Iterate over partitions lazily from the underlying router
+        for partition in self.underlying_partition_router.stream_slices():
+            # Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
+            partition_keys = list(partition.partition.keys())
+            # skip parent_slice as it is part of SubstreamPartitionRouter partition
+            if "parent_slice" in partition_keys:
+                partition_keys.remove("parent_slice")
+            if len(partition_keys) != 1:
+                raise ValueError(
+                    f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
+                )
+            key = partition.partition[partition_keys[0]]
+            # Skip duplicates if deduplication is enabled
+            if self.deduplicate and key in seen_keys:
+                continue
+            # Add partition to the batch
+            batch.append(partition)
+            if self.deduplicate:
+                seen_keys.add(key)
+            # Yield the batch when it reaches the group_size
+            if len(batch) == self.group_size:
+                self._state = self.underlying_partition_router.get_stream_state()
+                yield self._create_grouped_slice(batch)
+                batch = []  # Reset the batch
+        self._state = self.underlying_partition_router.get_stream_state()
+        # Yield any remaining partitions if the batch isn't empty
+        if batch:
+            yield self._create_grouped_slice(batch)
+    def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
+        """
+        Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
+        Args:
+            batch (list[StreamSlice]): A list of StreamSlice objects to group.
+        Returns:
+            StreamSlice: A single StreamSlice with combined partition and extra field values.
+        """
+        # Combine partition values into a single dict with lists
+        grouped_partition = {
+            key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
+        }
+        # Aggregate extra fields into a dict with list values
+        extra_fields_dict = (
+            {
+                key: [p.extra_fields.get(key) for p in batch]
+                for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
+            }
+            if any(p.extra_fields for p in batch)
+            else {}
+        )
+        return StreamSlice(
+            partition=grouped_partition,
+            cursor_slice={},  # Cursor is managed by the underlying router or incremental sync
+            extra_fields=extra_fields_dict,
+        )
+    def get_request_params(
+        self,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+    def get_request_headers(
+        self,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+    def get_request_body_data(
+        self,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+    def get_request_body_json(
+        self,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+    def set_initial_state(self, stream_state: StreamState) -> None:
+        """Delegate state initialization to the underlying partition router."""
+        self.underlying_partition_router.set_initial_state(stream_state)
+        self._state = self.underlying_partition_router.get_stream_state()
+    def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
+        """Delegate state retrieval to the underlying partition router."""
+        return self._state

airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py CHANGED Viewed

@@ -374,7 +374,11 @@ class SubstreamPartitionRouter(PartitionRouter):
         # Ignore per-partition states or invalid formats.
         if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
             # If a global state is present under the key "state", use its first value.
-            if "state" in stream_state and isinstance(stream_state["state"], dict):
+            if (
+                "state" in stream_state
+                and isinstance(stream_state["state"], dict)
+                and stream_state["state"] != {}
+            ):
                 substream_state = list(stream_state["state"].values())[0]
             else:
                 return {}

airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py CHANGED Viewed

@@ -3,7 +3,6 @@
 from typing import Any, Iterable, Mapping, Optional
 from airbyte_cdk.sources.declarative.retrievers import Retriever
-from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
 from airbyte_cdk.sources.message import MessageRepository
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -19,7 +18,6 @@ class DeclarativePartitionFactory:
         json_schema: Mapping[str, Any],
         retriever: Retriever,
         message_repository: MessageRepository,
-        file_uploader: Optional[FileUploader] = None,
     ) -> None:
         """
         The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
@@ -30,7 +28,6 @@ class DeclarativePartitionFactory:
         self._json_schema = json_schema
         self._retriever = retriever
         self._message_repository = message_repository
-        self._file_uploader = file_uploader
     def create(self, stream_slice: StreamSlice) -> Partition:
         return DeclarativePartition(
@@ -38,7 +35,6 @@ class DeclarativePartitionFactory:
             self._json_schema,
             self._retriever,
             self._message_repository,
-            self._file_uploader,
             stream_slice,
         )
@@ -50,32 +46,23 @@ class DeclarativePartition(Partition):
         json_schema: Mapping[str, Any],
         retriever: Retriever,
         message_repository: MessageRepository,
-        file_uploader: Optional[FileUploader],
         stream_slice: StreamSlice,
     ):
         self._stream_name = stream_name
         self._json_schema = json_schema
         self._retriever = retriever
         self._message_repository = message_repository
-        self._file_uploader = file_uploader
         self._stream_slice = stream_slice
         self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
     def read(self) -> Iterable[Record]:
         for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
             if isinstance(stream_data, Mapping):
-                record = (
-                    stream_data
-                    if isinstance(stream_data, Record)
-                    else Record(
-                        data=stream_data,
-                        stream_name=self.stream_name(),
-                        associated_slice=self._stream_slice,
-                    )
+                yield Record(
+                    data=stream_data,
+                    stream_name=self.stream_name(),
+                    associated_slice=self._stream_slice,
                 )
-                if self._file_uploader:
-                    self._file_uploader.upload(record)
-                yield record
             else:
                 self._message_repository.emit_message(stream_data)

airbyte_cdk/sources/file_based/file_types/file_transfer.py CHANGED Viewed

@@ -8,12 +8,18 @@ from typing import Any, Dict, Iterable
 from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
 from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
 from airbyte_cdk.sources.file_based.remote_file import RemoteFile
-from airbyte_cdk.sources.utils.files_directory import get_files_directory
+AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
+DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
 class FileTransfer:
     def __init__(self) -> None:
-        self._local_directory = get_files_directory()
+        self._local_directory = (
+            AIRBYTE_STAGING_DIRECTORY
+            if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
+            else DEFAULT_LOCAL_DIRECTORY
+        )
     def get_file(
         self,

airbyte_cdk/sources/streams/concurrent/default_stream.py CHANGED Viewed

@@ -29,7 +29,6 @@ class DefaultStream(AbstractStream):
         logger: Logger,
         cursor: Cursor,
         namespace: Optional[str] = None,
-        supports_file_transfer: bool = False,
     ) -> None:
         self._stream_partition_generator = partition_generator
         self._name = name
@@ -40,7 +39,6 @@ class DefaultStream(AbstractStream):
         self._logger = logger
         self._cursor = cursor
         self._namespace = namespace
-        self._supports_file_transfer = supports_file_transfer
     def generate_partitions(self) -> Iterable[Partition]:
         yield from self._stream_partition_generator.generate()
@@ -70,7 +68,6 @@ class DefaultStream(AbstractStream):
             json_schema=dict(self._json_schema),
             supported_sync_modes=[SyncMode.full_refresh],
             is_resumable=False,
-            is_file_based=self._supports_file_transfer,
         )
         if self._namespace:

airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py CHANGED Viewed

@@ -71,6 +71,10 @@ class AbstractStreamStateConverter(ABC):
         for stream_slice in state.get("slices", []):
             stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
             stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
+            if self.MOST_RECENT_RECORD_KEY in stream_slice:
+                stream_slice[self.MOST_RECENT_RECORD_KEY] = self._from_state_message(
+                    stream_slice[self.MOST_RECENT_RECORD_KEY]
+                )
         return state
     def serialize(

airbyte_cdk/sources/types.py CHANGED Viewed

@@ -6,7 +6,6 @@ from __future__ import annotations
 from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
-from airbyte_cdk.models import AirbyteRecordMessageFileReference
 from airbyte_cdk.utils.slice_hasher import SliceHasher
 # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
@@ -25,13 +24,11 @@ class Record(Mapping[str, Any]):
         stream_name: str,
         associated_slice: Optional[StreamSlice] = None,
         is_file_transfer_message: bool = False,
-        file_reference: Optional[AirbyteRecordMessageFileReference] = None,
     ):
         self._data = data
         self._associated_slice = associated_slice
         self.stream_name = stream_name
         self.is_file_transfer_message = is_file_transfer_message
-        self._file_reference = file_reference
     @property
     def data(self) -> Mapping[str, Any]:
@@ -41,14 +38,6 @@ class Record(Mapping[str, Any]):
     def associated_slice(self) -> Optional[StreamSlice]:
         return self._associated_slice
-    @property
-    def file_reference(self) -> AirbyteRecordMessageFileReference:
-        return self._file_reference
-    @file_reference.setter
-    def file_reference(self, value: AirbyteRecordMessageFileReference):
-        self._file_reference = value
     def __repr__(self) -> str:
         return repr(self._data)

airbyte_cdk/sources/utils/record_helper.py CHANGED Viewed

@@ -9,7 +9,6 @@ from airbyte_cdk.models import (
     AirbyteLogMessage,
     AirbyteMessage,
     AirbyteRecordMessage,
-    AirbyteRecordMessageFileReference,
     AirbyteTraceMessage,
 )
 from airbyte_cdk.models import Type as MessageType
@@ -24,7 +23,6 @@ def stream_data_to_airbyte_message(
     transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
     schema: Optional[Mapping[str, Any]] = None,
     is_file_transfer_message: bool = False,
-    file_reference: Optional[AirbyteRecordMessageFileReference] = None,
 ) -> AirbyteMessage:
     if schema is None:
         schema = {}
@@ -43,12 +41,7 @@ def stream_data_to_airbyte_message(
                     stream=stream_name, file=data, emitted_at=now_millis, data={}
                 )
             else:
-                message = AirbyteRecordMessage(
-                    stream=stream_name,
-                    data=data,
-                    emitted_at=now_millis,
-                    file_reference=file_reference,
-                )
+                message = AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=now_millis)
             return AirbyteMessage(type=MessageType.RECORD, record=message)
         case AirbyteTraceMessage():
             return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)

{airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: airbyte-cdk
-Version: 6.41.9.dev4101
+Version: 6.42.0
 Summary: A framework for writing Airbyte Connectors.
 Home-page: https://airbyte.com
 License: MIT
@@ -22,7 +22,7 @@ Provides-Extra: sql
 Provides-Extra: vector-db-based
 Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
-Requires-Dist: airbyte-protocol-models-dataclasses (==0.14.1337.dev1742858109)
+Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
 Requires-Dist: anyascii (>=0.3.2,<0.4.0)
 Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
 Requires-Dist: backoff

airbyte-cdk 6.41.9.dev4101__py3-none-any.whl → 6.42.0__py3-none-any.whl

airbyte-cdk 6.41.9.dev4101py3-none-any.whl → 6.42.0py3-none-any.whl