PyPI - airbyte-cdk - Versions diffs - 6.60.15__py3-none-any.whl → 6.61.0__py3-none-any.whl - Mend

airbyte-cdk 6.60.15py3-none-any.whl → 6.61.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

airbyte_cdk/connector_builder/connector_builder_handler.py CHANGED Viewed

@@ -3,8 +3,8 @@
 #
-from dataclasses import asdict, dataclass, field
-from typing import Any, ClassVar, Dict, List, Mapping
+from dataclasses import asdict
+from typing import Any, Dict, List, Mapping, Optional
 from airbyte_cdk.connector_builder.test_reader import TestReader
 from airbyte_cdk.models import (
@@ -15,45 +15,32 @@ from airbyte_cdk.models import (
     Type,
 )
 from airbyte_cdk.models import Type as MessageType
+from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
+    ConcurrentDeclarativeSource,
+    TestLimits,
+)
 from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
-from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
-    ModelToComponentFactory,
-)
 from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
 from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
 from airbyte_cdk.utils.traced_exception import AirbyteTracedException
-DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
-DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
-DEFAULT_MAXIMUM_RECORDS = 100
-DEFAULT_MAXIMUM_STREAMS = 100
 MAX_PAGES_PER_SLICE_KEY = "max_pages_per_slice"
 MAX_SLICES_KEY = "max_slices"
 MAX_RECORDS_KEY = "max_records"
 MAX_STREAMS_KEY = "max_streams"
-@dataclass
-class TestLimits:
-    __test__: ClassVar[bool] = False  # Tell Pytest this is not a Pytest class, despite its name
-    max_records: int = field(default=DEFAULT_MAXIMUM_RECORDS)
-    max_pages_per_slice: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE)
-    max_slices: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_SLICES)
-    max_streams: int = field(default=DEFAULT_MAXIMUM_STREAMS)
 def get_limits(config: Mapping[str, Any]) -> TestLimits:
     command_config = config.get("__test_read_config", {})
-    max_pages_per_slice = (
-        command_config.get(MAX_PAGES_PER_SLICE_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE
+    return TestLimits(
+        max_records=command_config.get(MAX_RECORDS_KEY, TestLimits.DEFAULT_MAX_RECORDS),
+        max_pages_per_slice=command_config.get(
+            MAX_PAGES_PER_SLICE_KEY, TestLimits.DEFAULT_MAX_PAGES_PER_SLICE
+        ),
+        max_slices=command_config.get(MAX_SLICES_KEY, TestLimits.DEFAULT_MAX_SLICES),
+        max_streams=command_config.get(MAX_STREAMS_KEY, TestLimits.DEFAULT_MAX_STREAMS),
     )
-    max_slices = command_config.get(MAX_SLICES_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_SLICES
-    max_records = command_config.get(MAX_RECORDS_KEY) or DEFAULT_MAXIMUM_RECORDS
-    max_streams = command_config.get(MAX_STREAMS_KEY) or DEFAULT_MAXIMUM_STREAMS
-    return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams)
 def should_migrate_manifest(config: Mapping[str, Any]) -> bool:
@@ -75,21 +62,30 @@ def should_normalize_manifest(config: Mapping[str, Any]) -> bool:
     return config.get("__should_normalize", False)
-def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource:
+def create_source(
+    config: Mapping[str, Any],
+    limits: TestLimits,
+    catalog: Optional[ConfiguredAirbyteCatalog],
+    state: Optional[List[AirbyteStateMessage]],
+) -> ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]]:
     manifest = config["__injected_declarative_manifest"]
-    return ManifestDeclarativeSource(
+    # We enforce a concurrency level of 1 so that the stream is processed on a single thread
+    # to retain ordering for the grouping of the builder message responses.
+    if "concurrency_level" in manifest:
+        manifest["concurrency_level"]["default_concurrency"] = 1
+    else:
+        manifest["concurrency_level"] = {"type": "ConcurrencyLevel", "default_concurrency": 1}
+    return ConcurrentDeclarativeSource(
+        catalog=catalog,
         config=config,
-        emit_connector_builder_messages=True,
+        state=state,
         source_config=manifest,
+        emit_connector_builder_messages=True,
         migrate_manifest=should_migrate_manifest(config),
         normalize_manifest=should_normalize_manifest(config),
-        component_factory=ModelToComponentFactory(
-            emit_connector_builder_messages=True,
-            limit_pages_fetched_per_slice=limits.max_pages_per_slice,
-            limit_slices_fetched=limits.max_slices,
-            disable_retries=True,
-            disable_cache=True,
-        ),
+        limits=limits,
     )

airbyte_cdk/connector_builder/main.py CHANGED Viewed

@@ -91,12 +91,12 @@ def handle_connector_builder_request(
 def handle_request(args: List[str]) -> str:
     command, config, catalog, state = get_config_and_catalog_from_args(args)
     limits = get_limits(config)
-    source = create_source(config, limits)
-    return orjson.dumps(
+    source = create_source(config=config, limits=limits, catalog=catalog, state=state)
+    return orjson.dumps(  # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage
         AirbyteMessageSerializer.dump(
             handle_connector_builder_request(source, command, config, catalog, state, limits)
         )
-    ).decode()  # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage
+    ).decode()
 if __name__ == "__main__":

airbyte_cdk/connector_builder/test_reader/helpers.py CHANGED Viewed

@@ -5,7 +5,7 @@
 import json
 from copy import deepcopy
 from json import JSONDecodeError
-from typing import Any, Dict, List, Mapping, Optional
+from typing import Any, Dict, List, Mapping, Optional, Union
 from airbyte_cdk.connector_builder.models import (
     AuxiliaryRequest,
@@ -17,6 +17,8 @@ from airbyte_cdk.connector_builder.models import (
 from airbyte_cdk.models import (
     AirbyteLogMessage,
     AirbyteMessage,
+    AirbyteStateBlob,
+    AirbyteStateMessage,
     OrchestratorType,
     TraceType,
 )
@@ -466,7 +468,7 @@ def handle_current_slice(
     return StreamReadSlices(
         pages=current_slice_pages,
         slice_descriptor=current_slice_descriptor,
-        state=[latest_state_message] if latest_state_message else [],
+        state=[convert_state_blob_to_mapping(latest_state_message)] if latest_state_message else [],
         auxiliary_requests=auxiliary_requests if auxiliary_requests else [],
     )
@@ -718,3 +720,23 @@ def get_auxiliary_request_type(stream: dict, http: dict) -> str:  # type: ignore
     Determines the type of the auxiliary request based on the stream and HTTP properties.
     """
     return "PARENT_STREAM" if stream.get("is_substream", False) else str(http.get("type", None))
+def convert_state_blob_to_mapping(
+    state_message: Union[AirbyteStateMessage, Dict[str, Any]],
+) -> Dict[str, Any]:
+    """
+    The AirbyteStreamState stores state as an AirbyteStateBlob which deceivingly is not
+    a dictionary, but rather a list of kwargs fields. This in turn causes it to not be
+    properly turned into a dictionary when translating this back into response output
+    by the connector_builder_handler using asdict()
+    """
+    if isinstance(state_message, AirbyteStateMessage) and state_message.stream:
+        state_value = state_message.stream.stream_state
+        if isinstance(state_value, AirbyteStateBlob):
+            state_value_mapping = {k: v for k, v in state_value.__dict__.items()}
+            state_message.stream.stream_state = state_value_mapping  # type: ignore  # we intentionally set this as a Dict so that StreamReadSlices is translated properly in the resulting HTTP response
+        return state_message  # type: ignore  # See above, but when this is an AirbyteStateMessage we must convert AirbyteStateBlob to a Dict
+    else:
+        return state_message  # type: ignore  # This is guaranteed to be a Dict since we check isinstance AirbyteStateMessage above

airbyte_cdk/connector_builder/test_reader/message_grouper.py CHANGED Viewed

@@ -95,7 +95,7 @@ def get_message_groups(
     latest_state_message: Optional[Dict[str, Any]] = None
     slice_auxiliary_requests: List[AuxiliaryRequest] = []
-    while records_count < limit and (message := next(messages, None)):
+    while message := next(messages, None):
         json_message = airbyte_message_to_json(message)
         if is_page_http_request_for_different_stream(json_message, stream_name):

airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import logging
+import os
 from typing import Dict, Iterable, List, Optional, Set
 from airbyte_cdk.exception_handler import generate_failed_streams_error_message
@@ -95,11 +96,14 @@ class ConcurrentReadProcessor:
         """
         stream_name = partition.stream_name()
         self._streams_to_running_partitions[stream_name].add(partition)
+        cursor = self._stream_name_to_instance[stream_name].cursor
         if self._slice_logger.should_log_slice_message(self._logger):
             self._message_repository.emit_message(
                 self._slice_logger.create_slice_log_message(partition.to_slice())
             )
-        self._thread_pool_manager.submit(self._partition_reader.process_partition, partition)
+        self._thread_pool_manager.submit(
+            self._partition_reader.process_partition, partition, cursor
+        )
     def on_partition_complete_sentinel(
         self, sentinel: PartitionCompleteSentinel
@@ -112,26 +116,16 @@ class ConcurrentReadProcessor:
         """
         partition = sentinel.partition
-        try:
-            if sentinel.is_successful:
-                stream = self._stream_name_to_instance[partition.stream_name()]
-                stream.cursor.close_partition(partition)
-        except Exception as exception:
-            self._flag_exception(partition.stream_name(), exception)
-            yield AirbyteTracedException.from_exception(
-                exception, stream_descriptor=StreamDescriptor(name=partition.stream_name())
-            ).as_sanitized_airbyte_message()
-        finally:
-            partitions_running = self._streams_to_running_partitions[partition.stream_name()]
-            if partition in partitions_running:
-                partitions_running.remove(partition)
-                # If all partitions were generated and this was the last one, the stream is done
-                if (
-                    partition.stream_name() not in self._streams_currently_generating_partitions
-                    and len(partitions_running) == 0
-                ):
-                    yield from self._on_stream_is_done(partition.stream_name())
-            yield from self._message_repository.consume_queue()
+        partitions_running = self._streams_to_running_partitions[partition.stream_name()]
+        if partition in partitions_running:
+            partitions_running.remove(partition)
+            # If all partitions were generated and this was the last one, the stream is done
+            if (
+                partition.stream_name() not in self._streams_currently_generating_partitions
+                and len(partitions_running) == 0
+            ):
+                yield from self._on_stream_is_done(partition.stream_name())
+        yield from self._message_repository.consume_queue()
     def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
         """
@@ -160,7 +154,6 @@ class ConcurrentReadProcessor:
                     stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
                 )
             self._record_counter[stream.name] += 1
-            stream.cursor.observe(record)
         yield message
         yield from self._message_repository.consume_queue()

airbyte_cdk/sources/concurrent_source/concurrent_source.py CHANGED Viewed

@@ -1,10 +1,11 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import concurrent
 import logging
 from queue import Queue
-from typing import Iterable, Iterator, List
+from typing import Iterable, Iterator, List, Optional
 from airbyte_cdk.models import AirbyteMessage
 from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
@@ -16,7 +17,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
 from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
 from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
 from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
-from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
+from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionLogger, PartitionReader
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.types import (
     PartitionCompleteSentinel,
@@ -43,6 +44,7 @@ class ConcurrentSource:
         logger: logging.Logger,
         slice_logger: SliceLogger,
         message_repository: MessageRepository,
+        queue: Optional[Queue[QueueItem]] = None,
         timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
     ) -> "ConcurrentSource":
         is_single_threaded = initial_number_of_partitions_to_generate == 1 and num_workers == 1
@@ -59,12 +61,13 @@ class ConcurrentSource:
             logger,
         )
         return ConcurrentSource(
-            threadpool,
-            logger,
-            slice_logger,
-            message_repository,
-            initial_number_of_partitions_to_generate,
-            timeout_seconds,
+            threadpool=threadpool,
+            logger=logger,
+            slice_logger=slice_logger,
+            queue=queue,
+            message_repository=message_repository,
+            initial_number_partitions_to_generate=initial_number_of_partitions_to_generate,
+            timeout_seconds=timeout_seconds,
         )
     def __init__(
@@ -72,6 +75,7 @@ class ConcurrentSource:
         threadpool: ThreadPoolManager,
         logger: logging.Logger,
         slice_logger: SliceLogger = DebugSliceLogger(),
+        queue: Optional[Queue[QueueItem]] = None,
         message_repository: MessageRepository = InMemoryMessageRepository(),
         initial_number_partitions_to_generate: int = 1,
         timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
@@ -91,25 +95,28 @@ class ConcurrentSource:
         self._initial_number_partitions_to_generate = initial_number_partitions_to_generate
         self._timeout_seconds = timeout_seconds
+        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
+        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
+        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
+        # information and might even need to be configurable depending on the source
+        self._queue = queue or Queue(maxsize=10_000)
     def read(
         self,
         streams: List[AbstractStream],
     ) -> Iterator[AirbyteMessage]:
         self._logger.info("Starting syncing")
-        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
-        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
-        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
-        # information and might even need to be configurable depending on the source
-        queue: Queue[QueueItem] = Queue(maxsize=10_000)
         concurrent_stream_processor = ConcurrentReadProcessor(
             streams,
-            PartitionEnqueuer(queue, self._threadpool),
+            PartitionEnqueuer(self._queue, self._threadpool),
             self._threadpool,
             self._logger,
             self._slice_logger,
             self._message_repository,
-            PartitionReader(queue),
+            PartitionReader(
+                self._queue,
+                PartitionLogger(self._slice_logger, self._logger, self._message_repository),
+            ),
         )
         # Enqueue initial partition generation tasks
@@ -117,7 +124,7 @@ class ConcurrentSource:
         # Read from the queue until all partitions were generated and read
         yield from self._consume_from_queue(
-            queue,
+            self._queue,
             concurrent_stream_processor,
         )
         self._threadpool.check_for_errors_and_shutdown()
@@ -141,7 +148,10 @@ class ConcurrentSource:
                 airbyte_message_or_record_or_exception,
                 concurrent_stream_processor,
             )
-            if concurrent_stream_processor.is_done() and queue.empty():
+            # In the event that a partition raises an exception, anything remaining in
+            # the queue will be missed because is_done() can raise an exception and exit
+            # out of this loop before remaining items are consumed
+            if queue.empty() and concurrent_stream_processor.is_done():
                 # all partitions were generated and processed. we're done here
                 break
@@ -161,5 +171,7 @@ class ConcurrentSource:
             yield from concurrent_stream_processor.on_partition_complete_sentinel(queue_item)
         elif isinstance(queue_item, Record):
             yield from concurrent_stream_processor.on_record(queue_item)
+        elif isinstance(queue_item, AirbyteMessage):
+            yield queue_item
         else:
             raise ValueError(f"Unknown queue item type: {type(queue_item)}")

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -3,7 +3,22 @@
 #
 import logging
-from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
+from dataclasses import dataclass, field
+from queue import Queue
+from typing import (
+    Any,
+    ClassVar,
+    Generic,
+    Iterator,
+    List,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Tuple,
+    Union,
+)
+from airbyte_protocol_dataclasses.models import Level
 from airbyte_cdk.models import (
     AirbyteCatalog,
@@ -43,6 +58,8 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
     StreamSlicerPartitionGenerator,
 )
 from airbyte_cdk.sources.declarative.types import ConnectionDefinition
+from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository
+from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
 from airbyte_cdk.sources.source import TState
 from airbyte_cdk.sources.streams import Stream
 from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
@@ -50,6 +67,22 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import Abstra
 from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
 from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
 from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
+from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
+@dataclass
+class TestLimits:
+    __test__: ClassVar[bool] = False  # Tell Pytest this is not a Pytest class, despite its name
+    DEFAULT_MAX_PAGES_PER_SLICE: ClassVar[int] = 5
+    DEFAULT_MAX_SLICES: ClassVar[int] = 5
+    DEFAULT_MAX_RECORDS: ClassVar[int] = 100
+    DEFAULT_MAX_STREAMS: ClassVar[int] = 100
+    max_records: int = field(default=DEFAULT_MAX_RECORDS)
+    max_pages_per_slice: int = field(default=DEFAULT_MAX_PAGES_PER_SLICE)
+    max_slices: int = field(default=DEFAULT_MAX_SLICES)
+    max_streams: int = field(default=DEFAULT_MAX_STREAMS)
 class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -65,7 +98,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         source_config: ConnectionDefinition,
         debug: bool = False,
         emit_connector_builder_messages: bool = False,
-        component_factory: Optional[ModelToComponentFactory] = None,
+        migrate_manifest: bool = False,
+        normalize_manifest: bool = False,
+        limits: Optional[TestLimits] = None,
         config_path: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
@@ -73,21 +108,39 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         #  no longer needs to store the original incoming state. But maybe there's an edge case?
         self._connector_state_manager = ConnectorStateManager(state=state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
+        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
+        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
+        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
+        # information and might even need to be configurable depending on the source
+        queue: Queue[QueueItem] = Queue(maxsize=10_000)
+        message_repository = InMemoryMessageRepository(
+            Level.DEBUG if emit_connector_builder_messages else Level.INFO
+        )
         # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
         # cursors. We do this by no longer automatically instantiating RFR cursors when converting
         # the declarative models into runtime components. Concurrent sources will continue to checkpoint
         # incremental streams running in full refresh.
-        component_factory = component_factory or ModelToComponentFactory(
+        component_factory = ModelToComponentFactory(
             emit_connector_builder_messages=emit_connector_builder_messages,
+            message_repository=ConcurrentMessageRepository(queue, message_repository),
             connector_state_manager=self._connector_state_manager,
             max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
+            limit_pages_fetched_per_slice=limits.max_pages_per_slice if limits else None,
+            limit_slices_fetched=limits.max_slices if limits else None,
+            disable_retries=True if limits else False,
+            disable_cache=True if limits else False,
         )
+        self._limits = limits
         super().__init__(
             source_config=source_config,
             config=config,
             debug=debug,
             emit_connector_builder_messages=emit_connector_builder_messages,
+            migrate_manifest=migrate_manifest,
+            normalize_manifest=normalize_manifest,
             component_factory=component_factory,
             config_path=config_path,
         )
@@ -117,6 +170,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate,
             logger=self.logger,
             slice_logger=self._slice_logger,
+            queue=queue,
             message_repository=self.message_repository,
         )
@@ -280,8 +334,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self.message_repository,
+                                max_records_limit=self._limits.max_records
+                                if self._limits
+                                else None,
                             ),
                             stream_slicer=declarative_stream.retriever.stream_slicer,
+                            slice_limit=self._limits.max_slices
+                            if self._limits
+                            else None,  # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
                         )
                     else:
                         if (
@@ -311,8 +371,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self.message_repository,
+                                max_records_limit=self._limits.max_records
+                                if self._limits
+                                else None,
                             ),
                             stream_slicer=cursor,
+                            slice_limit=self._limits.max_slices if self._limits else None,
                         )
                     concurrent_streams.append(
@@ -341,8 +405,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=declarative_stream.retriever,
                             message_repository=self.message_repository,
+                            max_records_limit=self._limits.max_records if self._limits else None,
                         ),
                         declarative_stream.retriever.stream_slicer,
+                        slice_limit=self._limits.max_slices
+                        if self._limits
+                        else None,  # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
                     )
                     final_state_cursor = FinalStateCursor(
@@ -401,8 +469,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=retriever,
                             message_repository=self.message_repository,
+                            max_records_limit=self._limits.max_records if self._limits else None,
                         ),
                         perpartition_cursor,
+                        slice_limit=self._limits.max_slices if self._limits else None,
                     )
                     concurrent_streams.append(

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -4142,11 +4142,9 @@ definitions:
           - stream_slice
           - stream_template_config
         examples:
-          - ["data"]
-          - ["data", "records"]
-          - ["data", 1, "name"]
-          - ["data", "{{ components_values.name }}"]
-          - ["data", "*", "record"]
+          - ["name"]
+          - ["retriever", "requester", "url"]
+          - ["retriever", "requester", "{{ components_values.field }}"]
           - ["*", "**", "name"]
       value:
         title: Value
@@ -4777,6 +4775,12 @@ interpolation:
     - title: stream_slice
       description: This variable is deprecated. Use stream_interval or stream_partition instead.
       type: object
+    - title: components_values
+      description: The record object produced by the components resolver for which a stream will be generated.
+      type: object
+      examples:
+        - name: "accounts"
+          id: 1234
   macros:
     - title: now_utc
       description: Returns the current date and time in the UTC timezone.

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -1463,11 +1463,9 @@ class ComponentMappingDefinition(BaseModel):
         ...,
         description="A list of potentially nested fields indicating the full path where value will be added or updated.",
         examples=[
-            ["data"],
-            ["data", "records"],
-            ["data", 1, "name"],
-            ["data", "{{ components_values.name }}"],
-            ["data", "*", "record"],
+            ["name"],
+            ["retriever", "requester", "url"],
+            ["retriever", "requester", "{{ components_values.field }}"],
             ["*", "**", "name"],
         ],
         title="Field Path",

airbyte-cdk 6.60.15__py3-none-any.whl → 6.61.0__py3-none-any.whl

airbyte-cdk 6.60.15py3-none-any.whl → 6.61.0py3-none-any.whl