PyPI - airbyte-cdk - Versions diffs - 6.41.9__py3-none-any.whl → 6.41.9.dev4101__py3-none-any.whl - Mend

airbyte-cdk 6.41.9py3-none-any.whl → 6.41.9.dev4101py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of airbyte-cdk might be problematic. Click here for more details.

Files changed (27) hide show

airbyte_cdk/models/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .airbyte_protocol import (
     AirbyteMessage,
     AirbyteProtocol,
     AirbyteRecordMessage,
+    AirbyteRecordMessageFileReference,
     AirbyteStateBlob,
     AirbyteStateMessage,
     AirbyteStateStats,

airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py CHANGED Viewed

@@ -150,6 +150,7 @@ class ConcurrentReadProcessor:
             stream_name=record.stream_name,
             data_or_message=record.data,
             is_file_transfer_message=record.is_file_transfer_message,
+            file_reference=record.file_reference,
         )
         stream = self._stream_name_to_instance[record.stream_name]

airbyte_cdk/sources/declarative/async_job/job.py CHANGED Viewed

@@ -34,12 +34,6 @@ class AsyncJob:
     def status(self) -> AsyncJobStatus:
         if self._timer.has_timed_out():
-            # TODO: we should account the fact that,
-            # certain APIs could send the `Timeout` status,
-            # thus we should not return `Timeout` in that case,
-            # but act based on the scenario.
-            # the default behavior is to return `Timeout` status and retry.
             return AsyncJobStatus.TIMED_OUT
         return self._status

airbyte_cdk/sources/declarative/async_job/job_orchestrator.py CHANGED Viewed

@@ -44,21 +44,16 @@ class AsyncPartition:
     This bucket of api_jobs is a bit useless for this iteration but should become interesting when we will be able to split jobs
     """
-    _DEFAULT_MAX_JOB_RETRY = 3
+    _MAX_NUMBER_OF_ATTEMPTS = 3
-    def __init__(
-        self, jobs: List[AsyncJob], stream_slice: StreamSlice, job_max_retry: Optional[int] = None
-    ) -> None:
+    def __init__(self, jobs: List[AsyncJob], stream_slice: StreamSlice) -> None:
         self._attempts_per_job = {job: 1 for job in jobs}
         self._stream_slice = stream_slice
-        self._job_max_retry = (
-            job_max_retry if job_max_retry is not None else self._DEFAULT_MAX_JOB_RETRY
-        )
     def has_reached_max_attempt(self) -> bool:
         return any(
             map(
-                lambda attempt_count: attempt_count >= self._job_max_retry,
+                lambda attempt_count: attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS,
                 self._attempts_per_job.values(),
             )
         )
@@ -67,7 +62,7 @@ class AsyncPartition:
         current_attempt_count = self._attempts_per_job.pop(job_to_replace, None)
         if current_attempt_count is None:
             raise ValueError("Could not find job to replace")
-        elif current_attempt_count >= self._job_max_retry:
+        elif current_attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS:
             raise ValueError(f"Max attempt reached for job in partition {self._stream_slice}")
         new_attempt_count = current_attempt_count + 1
@@ -160,7 +155,6 @@ class AsyncJobOrchestrator:
         message_repository: MessageRepository,
         exceptions_to_break_on: Iterable[Type[Exception]] = tuple(),
         has_bulk_parent: bool = False,
-        job_max_retry: Optional[int] = None,
     ) -> None:
         """
         If the stream slices provided as a parameters relies on a async job streams that relies on the same JobTracker, `has_bulk_parent`
@@ -181,12 +175,11 @@ class AsyncJobOrchestrator:
         self._message_repository = message_repository
         self._exceptions_to_break_on: Tuple[Type[Exception], ...] = tuple(exceptions_to_break_on)
         self._has_bulk_parent = has_bulk_parent
-        self._job_max_retry = job_max_retry
         self._non_breaking_exceptions: List[Exception] = []
     def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
-        failed_status_jobs = (AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT)
+        failed_status_jobs = (AsyncJobStatus.FAILED,)
         jobs_to_replace = [job for job in partition.jobs if job.status() in failed_status_jobs]
         for job in jobs_to_replace:
             new_job = self._start_job(job.job_parameters(), job.api_job_id())
@@ -221,7 +214,7 @@ class AsyncJobOrchestrator:
             for _slice in self._slice_iterator:
                 at_least_one_slice_consumed_from_slice_iterator_during_current_iteration = True
                 job = self._start_job(_slice)
-                self._running_partitions.append(AsyncPartition([job], _slice, self._job_max_retry))
+                self._running_partitions.append(AsyncPartition([job], _slice))
                 if self._has_bulk_parent and self._slice_iterator.has_next():
                     break
         except ConcurrentJobLimitReached:
@@ -370,7 +363,7 @@ class AsyncJobOrchestrator:
                     self._reallocate_partition(current_running_partitions, partition)
             # We only remove completed / timeout jobs jobs as we want failed jobs to be re-allocated in priority
-            self._remove_completed_jobs(partition)
+            self._remove_completed_or_timed_out_jobs(partition)
         # update the referenced list with running partitions
         self._running_partitions = current_running_partitions
@@ -385,7 +378,11 @@ class AsyncJobOrchestrator:
     def _stop_timed_out_jobs(self, partition: AsyncPartition) -> None:
         for job in partition.jobs:
             if job.status() == AsyncJobStatus.TIMED_OUT:
-                self._abort_job(job, free_job_allocation=False)
+                self._abort_job(job, free_job_allocation=True)
+                raise AirbyteTracedException(
+                    internal_message=f"Job {job.api_job_id()} has timed out. Try increasing the `polling job timeout`.",
+                    failure_type=FailureType.config_error,
+                )
     def _abort_job(self, job: AsyncJob, free_job_allocation: bool = True) -> None:
         try:
@@ -395,7 +392,7 @@ class AsyncJobOrchestrator:
         except Exception as exception:
             LOGGER.warning(f"Could not free budget for job {job.api_job_id()}: {exception}")
-    def _remove_completed_jobs(self, partition: AsyncPartition) -> None:
+    def _remove_completed_or_timed_out_jobs(self, partition: AsyncPartition) -> None:
         """
         Remove completed or timed out jobs from the partition.
@@ -403,7 +400,7 @@ class AsyncJobOrchestrator:
             partition (AsyncPartition): The partition to process.
         """
         for job in partition.jobs:
-            if job.status() == AsyncJobStatus.COMPLETED:
+            if job.status() in [AsyncJobStatus.COMPLETED, AsyncJobStatus.TIMED_OUT]:
                 self._job_tracker.remove_job(job.api_job_id())
     def _reallocate_partition(
@@ -418,7 +415,10 @@ class AsyncJobOrchestrator:
             current_running_partitions (list): The list of currently running partitions.
             partition (AsyncPartition): The partition to reallocate.
         """
-        current_running_partitions.insert(0, partition)
+        for job in partition.jobs:
+            if job.status() != AsyncJobStatus.TIMED_OUT:
+                # allow the FAILED jobs to be re-allocated for partition
+                current_running_partitions.insert(0, partition)
     def _process_partitions_with_errors(self, partition: AsyncPartition) -> None:
         """

airbyte_cdk/sources/declarative/async_job/job_tracker.py CHANGED Viewed

@@ -3,11 +3,9 @@
 import logging
 import threading
 import uuid
-from dataclasses import dataclass, field
-from typing import Any, Mapping, Set, Union
+from typing import Set
 from airbyte_cdk.logger import lazy_log
-from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 LOGGER = logging.getLogger("airbyte")
@@ -16,29 +14,15 @@ class ConcurrentJobLimitReached(Exception):
     pass
-@dataclass
 class JobTracker:
-    limit: Union[int, str]
-    config: Mapping[str, Any] = field(default_factory=dict)
-    def __post_init__(self) -> None:
+    def __init__(self, limit: int):
         self._jobs: Set[str] = set()
-        self._lock = threading.Lock()
-        if isinstance(self.limit, str):
-            try:
-                self.limit = int(
-                    InterpolatedString(self.limit, parameters={}).eval(config=self.config)
-                )
-            except Exception as e:
-                LOGGER.warning(
-                    f"Error interpolating max job count: {self.limit}. Setting to 1. {e}"
-                )
-                self.limit = 1
-        if self.limit < 1:
+        if limit < 1:
             LOGGER.warning(
-                f"The `max_concurrent_async_job_count` property is less than 1: {self.limit}. Setting to 1. Please update the source manifest to set a valid value."
+                f"The `max_concurrent_async_job_count` property is less than 1: {limit}. Setting to 1. Please update the source manifest to set a valid value."
             )
-        self._limit = self.limit if self.limit >= 1 else 1
+        self._limit = 1 if limit < 1 else limit
+        self._lock = threading.Lock()
     def try_to_get_intent(self) -> str:
         lazy_log(

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
     PerPartitionWithGlobalCursor,
 )
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
+from airbyte_cdk.sources.declarative.models import FileUploader
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ConcurrencyLevel as ConcurrencyLevelModel,
 )
@@ -206,6 +207,20 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
             # so we need to treat them as synchronous
+            file_uploader = None
+            if isinstance(declarative_stream, DeclarativeStream):
+                file_uploader = (
+                    self._constructor.create_component(
+                        model_type=FileUploader,
+                        component_definition=name_to_stream_mapping[declarative_stream.name][
+                            "file_uploader"
+                        ],
+                        config=config,
+                    )
+                    if "file_uploader" in name_to_stream_mapping[declarative_stream.name]
+                    else None
+                )
             if (
                 isinstance(declarative_stream, DeclarativeStream)
                 and name_to_stream_mapping[declarative_stream.name]["type"]
@@ -273,6 +288,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 declarative_stream.get_json_schema(),
                                 retriever,
                                 self.message_repository,
+                                file_uploader,
                             ),
                             stream_slicer=declarative_stream.retriever.stream_slicer,
                         )
@@ -303,6 +319,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 declarative_stream.get_json_schema(),
                                 retriever,
                                 self.message_repository,
+                                file_uploader,
                             ),
                             stream_slicer=cursor,
                         )
@@ -322,6 +339,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             else None,
                             logger=self.logger,
                             cursor=cursor,
+                            supports_file_transfer=bool(file_uploader),
                         )
                     )
                 elif (
@@ -333,6 +351,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             declarative_stream.get_json_schema(),
                             declarative_stream.retriever,
                             self.message_repository,
+                            file_uploader,
                         ),
                         declarative_stream.retriever.stream_slicer,
                     )
@@ -353,6 +372,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             cursor_field=None,
                             logger=self.logger,
                             cursor=final_state_cursor,
+                            supports_file_transfer=bool(file_uploader),
                         )
                     )
                 elif (
@@ -392,6 +412,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             declarative_stream.get_json_schema(),
                             retriever,
                             self.message_repository,
+                            file_uploader,
                         ),
                         perpartition_cursor,
                     )
@@ -406,6 +427,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
                             logger=self.logger,
                             cursor=perpartition_cursor,
+                            supports_file_transfer=bool(file_uploader),
                         )
                     )
                 else:

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -47,12 +47,7 @@ properties:
   max_concurrent_async_job_count:
     title: Maximum Concurrent Asynchronous Jobs
     description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
-    type:
-      - integer
-      - string
-    examples:
-      - 3
-      - "{{ config['max_concurrent_async_job_count'] }}"
+    type: integer
   metadata:
     type: object
     description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -1427,6 +1422,33 @@ definitions:
             - "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
             - "$ref": "#/definitions/CustomStateMigration"
         default: []
+      file_uploader:
+        title: File Uploader
+        description: (experimental) Describes how to fetch a file
+        type: object
+        required:
+          - type
+          - requester
+          - download_target_extractor
+        properties:
+          type:
+            type: string
+            enum: [ FileUploader ]
+          requester:
+            description: Requester component that describes how to prepare HTTP requests to send to the source API.
+            anyOf:
+              - "$ref": "#/definitions/CustomRequester"
+              - "$ref": "#/definitions/HttpRequester"
+          download_target_extractor:
+            description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
+            anyOf:
+              - "$ref": "#/definitions/CustomRecordExtractor"
+              - "$ref": "#/definitions/DpathExtractor"
+          file_extractor:
+            description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
+            anyOf:
+              - "$ref": "#/definitions/CustomRecordExtractor"
+              - "$ref": "#/definitions/DpathExtractor"
       $parameters:
         type: object
         additional_properties: true
@@ -2197,8 +2219,7 @@ definitions:
         type: object
         additionalProperties: true
   JsonDecoder:
-    title: JSON
-    description: Select 'JSON' if the response is formatted as a JSON object.
+    title: Json Decoder
     type: object
     required:
       - type
@@ -2207,8 +2228,8 @@ definitions:
         type: string
         enum: [JsonDecoder]
   JsonlDecoder:
-    title: JSON Lines
-    description: Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\n') in JSONL format.
+    title: JSONL Decoder
+    description: Use this if the response consists of JSON objects separated by new lines (`\n`) in JSONL format.
     type: object
     required:
       - type
@@ -2333,8 +2354,8 @@ definitions:
         type: object
         additionalProperties: true
   IterableDecoder:
-    title: Iterable
-    description: Select 'Iterable' if the response consists of strings separated by new lines (`\n`). The string will then be wrapped into a JSON object with the `record` key.
+    title: Iterable Decoder
+    description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
     type: object
     required:
       - type
@@ -2343,8 +2364,8 @@ definitions:
         type: string
         enum: [IterableDecoder]
   XmlDecoder:
-    title: XML
-    description: Select 'XML' if the response consists of XML-formatted data.
+    title: XML Decoder
+    description: Use this if the response is XML.
     type: object
     required:
       - type
@@ -2375,8 +2396,8 @@ definitions:
         type: object
         additionalProperties: true
   ZipfileDecoder:
-    title: ZIP File
-    description: Select 'ZIP file' for response data that is returned as a zipfile. Requires specifying an inner data type/decoder to parse the unzipped data.
+    title: Zipfile Decoder
+    description: Decoder for response data that is returned as zipfile(s).
     type: object
     additionalProperties: true
     required:
@@ -2900,7 +2921,7 @@ definitions:
         title: Lazy Read Pointer
         description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
         type: array
-        default: []
+        default: [ ]
         items:
           - type: string
         interpolation_context:
@@ -3205,7 +3226,7 @@ definitions:
     properties:
       type:
         type: string
-        enum: [StateDelegatingStream]
+        enum: [ StateDelegatingStream ]
       name:
         title: Name
         description: The stream name.
@@ -3260,14 +3281,12 @@ definitions:
           - "$ref": "#/definitions/CustomPartitionRouter"
           - "$ref": "#/definitions/ListPartitionRouter"
           - "$ref": "#/definitions/SubstreamPartitionRouter"
-          - "$ref": "#/definitions/GroupingPartitionRouter"
           - type: array
             items:
               anyOf:
                 - "$ref": "#/definitions/CustomPartitionRouter"
                 - "$ref": "#/definitions/ListPartitionRouter"
                 - "$ref": "#/definitions/SubstreamPartitionRouter"
-                - "$ref": "#/definitions/GroupingPartitionRouter"
       decoder:
         title: Decoder
         description: Component decoding the response so records can be extracted.
@@ -3284,8 +3303,6 @@ definitions:
         type: object
         additionalProperties: true
   GzipDecoder:
-    title: gzip
-    description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
     type: object
     required:
       - type
@@ -3301,8 +3318,6 @@ definitions:
           - "$ref": "#/definitions/JsonDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
   CsvDecoder:
-    title: CSV
-    description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
     type: object
     required:
       - type
@@ -3433,14 +3448,12 @@ definitions:
           - "$ref": "#/definitions/CustomPartitionRouter"
           - "$ref": "#/definitions/ListPartitionRouter"
           - "$ref": "#/definitions/SubstreamPartitionRouter"
-          - "$ref": "#/definitions/GroupingPartitionRouter"
           - type: array
             items:
               anyOf:
                 - "$ref": "#/definitions/CustomPartitionRouter"
                 - "$ref": "#/definitions/ListPartitionRouter"
                 - "$ref": "#/definitions/SubstreamPartitionRouter"
-                - "$ref": "#/definitions/GroupingPartitionRouter"
       decoder:
         title: Decoder
         description: Component decoding the response so records can be extracted.
@@ -3557,44 +3570,6 @@ definitions:
       $parameters:
         type: object
         additionalProperties: true
-  GroupingPartitionRouter:
-    title: Grouping Partition Router
-    description: >
-      A decorator on top of a partition router that groups partitions into batches of a specified size.
-      This is useful for APIs that support filtering by multiple partition keys in a single request.
-      Note that per-partition incremental syncs may not work as expected because the grouping
-      of partitions might change between syncs, potentially leading to inconsistent state tracking.
-    type: object
-    required:
-      - type
-      - group_size
-      - underlying_partition_router
-    properties:
-      type:
-        type: string
-        enum: [GroupingPartitionRouter]
-      group_size:
-        title: Group Size
-        description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
-        type: integer
-        examples:
-          - 10
-          - 50
-      underlying_partition_router:
-        title: Underlying Partition Router
-        description: The partition router whose output will be grouped. This can be any valid partition router component.
-        anyOf:
-          - "$ref": "#/definitions/CustomPartitionRouter"
-          - "$ref": "#/definitions/ListPartitionRouter"
-          - "$ref": "#/definitions/SubstreamPartitionRouter"
-      deduplicate:
-        title: Deduplicate Partitions
-        description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
-        type: boolean
-        default: true
-      $parameters:
-        type: object
-        additionalProperties: true
   WaitUntilTimeFromHeader:
     title: Wait Until Time Defined In Response Header
     description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py CHANGED Viewed

@@ -79,7 +79,6 @@ class ConcurrentPerPartitionCursor(Cursor):
         connector_state_manager: ConnectorStateManager,
         connector_state_converter: AbstractStreamStateConverter,
         cursor_field: CursorField,
-        use_global_cursor: bool = False,
     ) -> None:
         self._global_cursor: Optional[StreamState] = {}
         self._stream_name = stream_name
@@ -107,7 +106,7 @@ class ConcurrentPerPartitionCursor(Cursor):
         self._lookback_window: int = 0
         self._parent_state: Optional[StreamState] = None
         self._number_of_partitions: int = 0
-        self._use_global_cursor: bool = use_global_cursor
+        self._use_global_cursor: bool = False
         self._partition_serializer = PerPartitionKeySerializer()
         # Track the last time a state message was emitted
         self._last_emission_time: float = 0.0

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -1890,10 +1890,9 @@ class DeclarativeSource1(BaseModel):
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
     api_budget: Optional[HTTPAPIBudget] = None
-    max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
+    max_concurrent_async_job_count: Optional[int] = Field(
         None,
         description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
-        examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
         title="Maximum Concurrent Asynchronous Jobs",
     )
     metadata: Optional[Dict[str, Any]] = Field(
@@ -1923,10 +1922,9 @@ class DeclarativeSource2(BaseModel):
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
     api_budget: Optional[HTTPAPIBudget] = None
-    max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
+    max_concurrent_async_job_count: Optional[int] = Field(
         None,
         description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
-        examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
         title="Maximum Concurrent Asynchronous Jobs",
     )
     metadata: Optional[Dict[str, Any]] = Field(
@@ -2280,6 +2278,22 @@ class StateDelegatingStream(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
+class FileUploader(BaseModel):
+    type: Literal["FileUploader"]
+    requester: Union[CustomRequester, HttpRequester] = Field(
+        ...,
+        description="Requester component that describes how to prepare HTTP requests to send to the source API.",
+    )
+    download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
+        ...,
+        description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
+    )
+    file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
+        None,
+        description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
+    )
 class SimpleRetriever(BaseModel):
     type: Literal["SimpleRetriever"]
     record_selector: RecordSelector = Field(
@@ -2303,21 +2317,18 @@ class SimpleRetriever(BaseModel):
             CustomPartitionRouter,
             ListPartitionRouter,
             SubstreamPartitionRouter,
-            GroupingPartitionRouter,
-            List[
-                Union[
-                    CustomPartitionRouter,
-                    ListPartitionRouter,
-                    SubstreamPartitionRouter,
-                    GroupingPartitionRouter,
-                ]
-            ],
+            List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
         ]
     ] = Field(
         [],
         description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.",
         title="Partition Router",
     )
+    file_uploader: Optional[FileUploader] = Field(
+        None,
+        description="(experimental) Describes how to fetch a file",
+        title="File Uploader",
+    )
     decoder: Optional[
         Union[
             CustomDecoder,
@@ -2393,15 +2404,7 @@ class AsyncRetriever(BaseModel):
             CustomPartitionRouter,
             ListPartitionRouter,
             SubstreamPartitionRouter,
-            GroupingPartitionRouter,
-            List[
-                Union[
-                    CustomPartitionRouter,
-                    ListPartitionRouter,
-                    SubstreamPartitionRouter,
-                    GroupingPartitionRouter,
-                ]
-            ],
+            List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
         ]
     ] = Field(
         [],
@@ -2453,29 +2456,6 @@ class SubstreamPartitionRouter(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
-class GroupingPartitionRouter(BaseModel):
-    type: Literal["GroupingPartitionRouter"]
-    group_size: int = Field(
-        ...,
-        description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
-        examples=[10, 50],
-        title="Group Size",
-    )
-    underlying_partition_router: Union[
-        CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
-    ] = Field(
-        ...,
-        description="The partition router whose output will be grouped. This can be any valid partition router component.",
-        title="Underlying Partition Router",
-    )
-    deduplicate: Optional[bool] = Field(
-        True,
-        description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
-        title="Deduplicate Partitions",
-    )
-    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class HttpComponentsResolver(BaseModel):
     type: Literal["HttpComponentsResolver"]
     retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(

airbyte-cdk 6.41.9__py3-none-any.whl → 6.41.9.dev4101__py3-none-any.whl

Potentially problematic release.

airbyte-cdk 6.41.9py3-none-any.whl → 6.41.9.dev4101py3-none-any.whl