PyPI - airbyte-cdk - Versions diffs - 6.60.16__py3-none-any.whl → 6.60.16.post40.dev17219503797__py3-none-any.whl - Mend

airbyte-cdk 6.60.16py3-none-any.whl → 6.60.16.post40.dev17219503797py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

airbyte_cdk/manifest_server/routers/manifest.py ADDED Viewed

@@ -0,0 +1,137 @@
+import hashlib
+from dataclasses import asdict
+from typing import Any, Dict, List, Mapping, Optional
+import jsonschema
+from fastapi import APIRouter, Depends, HTTPException
+from airbyte_cdk.manifest_server.api_models.manifest import (
+    CheckRequest,
+    CheckResponse,
+    DiscoverRequest,
+    DiscoverResponse,
+)
+from airbyte_cdk.models import AirbyteStateMessageSerializer
+from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
+from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
+    INJECTED_COMPONENTS_PY,
+    INJECTED_COMPONENTS_PY_CHECKSUMS,
+)
+from ..api_models import (
+    FullResolveRequest,
+    Manifest,
+    ManifestResponse,
+    ResolveRequest,
+    StreamRead,
+    StreamTestReadRequest,
+)
+from ..auth import verify_jwt_token
+from ..command_processor.processor import ManifestCommandProcessor
+from ..command_processor.utils import build_catalog, build_source
+def safe_build_source(
+    manifest_dict: Mapping[str, Any],
+    config_dict: Mapping[str, Any],
+    page_limit: Optional[int] = None,
+    slice_limit: Optional[int] = None,
+) -> ManifestDeclarativeSource:
+    """Wrapper around build_source that converts ValidationError to HTTPException."""
+    try:
+        return build_source(manifest_dict, config_dict, page_limit, slice_limit)
+    except jsonschema.exceptions.ValidationError as e:
+        raise HTTPException(status_code=400, detail=f"Invalid manifest: {e.message}")
+router = APIRouter(
+    prefix="/manifest",
+    tags=["manifest"],
+    dependencies=[Depends(verify_jwt_token)],
+)
+@router.post("/test_read", operation_id="testRead")
+def test_read(request: StreamTestReadRequest) -> StreamRead:
+    """
+    Test reading from a specific stream in the manifest.
+    """
+    config_dict = request.config.model_dump()
+    source = safe_build_source(
+        request.manifest.model_dump(), config_dict, request.page_limit, request.slice_limit
+    )
+    catalog = build_catalog(request.stream_name)
+    state = [AirbyteStateMessageSerializer.load(state) for state in request.state]
+    if request.custom_components_code:
+        config_dict[INJECTED_COMPONENTS_PY] = request.custom_components_code
+        config_dict[INJECTED_COMPONENTS_PY_CHECKSUMS] = {
+            "md5": hashlib.md5(request.custom_components_code.encode()).hexdigest()
+        }
+    runner = ManifestCommandProcessor(source)
+    cdk_result = runner.test_read(
+        config_dict,
+        catalog,
+        state,
+        request.record_limit,
+        request.page_limit,
+        request.slice_limit,
+    )
+    return StreamRead.model_validate(asdict(cdk_result))
+@router.post("/check", operation_id="check")
+def check(request: CheckRequest) -> CheckResponse:
+    """Check configuration against a manifest"""
+    source = safe_build_source(request.manifest.model_dump(), request.config.model_dump())
+    runner = ManifestCommandProcessor(source)
+    success, message = runner.check_connection(request.config.model_dump())
+    return CheckResponse(success=success, message=message)
+@router.post("/discover", operation_id="discover")
+def discover(request: DiscoverRequest) -> DiscoverResponse:
+    """Discover streams from a manifest"""
+    source = safe_build_source(request.manifest.model_dump(), request.config.model_dump())
+    runner = ManifestCommandProcessor(source)
+    catalog = runner.discover(request.config.model_dump())
+    if catalog is None:
+        raise HTTPException(status_code=422, detail="Connector did not return a discovered catalog")
+    return DiscoverResponse(catalog=catalog)
+@router.post("/resolve", operation_id="resolve")
+def resolve(request: ResolveRequest) -> ManifestResponse:
+    """Resolve a manifest to its final configuration."""
+    source = safe_build_source(request.manifest.model_dump(), {})
+    return ManifestResponse(manifest=Manifest(**source.resolved_manifest))
+@router.post("/full_resolve", operation_id="fullResolve")
+def full_resolve(request: FullResolveRequest) -> ManifestResponse:
+    """
+    Fully resolve a manifest including dynamic streams.
+    Generates dynamic streams up to the specified limit and includes
+    them in the resolved manifest.
+    """
+    source = safe_build_source(request.manifest.model_dump(), request.config.model_dump())
+    manifest = {**source.resolved_manifest}
+    streams = manifest.get("streams", [])
+    for stream in streams:
+        stream["dynamic_stream_name"] = None
+    mapped_streams: Dict[str, List[Dict[str, Any]]] = {}
+    for stream in source.dynamic_streams:
+        generated_streams = mapped_streams.setdefault(stream["dynamic_stream_name"], [])
+        if len(generated_streams) < request.stream_limit:
+            generated_streams += [stream]
+    for generated_streams_list in mapped_streams.values():
+        streams.extend(generated_streams_list)
+    manifest["streams"] = streams
+    return ManifestResponse(manifest=Manifest(**manifest))

airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import logging
+import os
 from typing import Dict, Iterable, List, Optional, Set
 from airbyte_cdk.exception_handler import generate_failed_streams_error_message
@@ -95,11 +96,14 @@ class ConcurrentReadProcessor:
         """
         stream_name = partition.stream_name()
         self._streams_to_running_partitions[stream_name].add(partition)
+        cursor = self._stream_name_to_instance[stream_name].cursor
         if self._slice_logger.should_log_slice_message(self._logger):
             self._message_repository.emit_message(
                 self._slice_logger.create_slice_log_message(partition.to_slice())
             )
-        self._thread_pool_manager.submit(self._partition_reader.process_partition, partition)
+        self._thread_pool_manager.submit(
+            self._partition_reader.process_partition, partition, cursor
+        )
     def on_partition_complete_sentinel(
         self, sentinel: PartitionCompleteSentinel
@@ -112,26 +116,16 @@ class ConcurrentReadProcessor:
         """
         partition = sentinel.partition
-        try:
-            if sentinel.is_successful:
-                stream = self._stream_name_to_instance[partition.stream_name()]
-                stream.cursor.close_partition(partition)
-        except Exception as exception:
-            self._flag_exception(partition.stream_name(), exception)
-            yield AirbyteTracedException.from_exception(
-                exception, stream_descriptor=StreamDescriptor(name=partition.stream_name())
-            ).as_sanitized_airbyte_message()
-        finally:
-            partitions_running = self._streams_to_running_partitions[partition.stream_name()]
-            if partition in partitions_running:
-                partitions_running.remove(partition)
-                # If all partitions were generated and this was the last one, the stream is done
-                if (
-                    partition.stream_name() not in self._streams_currently_generating_partitions
-                    and len(partitions_running) == 0
-                ):
-                    yield from self._on_stream_is_done(partition.stream_name())
-            yield from self._message_repository.consume_queue()
+        partitions_running = self._streams_to_running_partitions[partition.stream_name()]
+        if partition in partitions_running:
+            partitions_running.remove(partition)
+            # If all partitions were generated and this was the last one, the stream is done
+            if (
+                partition.stream_name() not in self._streams_currently_generating_partitions
+                and len(partitions_running) == 0
+            ):
+                yield from self._on_stream_is_done(partition.stream_name())
+        yield from self._message_repository.consume_queue()
     def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
         """
@@ -160,7 +154,6 @@ class ConcurrentReadProcessor:
                     stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
                 )
             self._record_counter[stream.name] += 1
-            stream.cursor.observe(record)
         yield message
         yield from self._message_repository.consume_queue()

airbyte_cdk/sources/concurrent_source/concurrent_source.py CHANGED Viewed

@@ -1,10 +1,11 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import concurrent
 import logging
 from queue import Queue
-from typing import Iterable, Iterator, List
+from typing import Iterable, Iterator, List, Optional
 from airbyte_cdk.models import AirbyteMessage
 from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
@@ -16,7 +17,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
 from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
 from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
 from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
-from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
+from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionLogger, PartitionReader
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.types import (
     PartitionCompleteSentinel,
@@ -43,6 +44,7 @@ class ConcurrentSource:
         logger: logging.Logger,
         slice_logger: SliceLogger,
         message_repository: MessageRepository,
+        queue: Optional[Queue[QueueItem]] = None,
         timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
     ) -> "ConcurrentSource":
         is_single_threaded = initial_number_of_partitions_to_generate == 1 and num_workers == 1
@@ -59,12 +61,13 @@ class ConcurrentSource:
             logger,
         )
         return ConcurrentSource(
-            threadpool,
-            logger,
-            slice_logger,
-            message_repository,
-            initial_number_of_partitions_to_generate,
-            timeout_seconds,
+            threadpool=threadpool,
+            logger=logger,
+            slice_logger=slice_logger,
+            queue=queue,
+            message_repository=message_repository,
+            initial_number_partitions_to_generate=initial_number_of_partitions_to_generate,
+            timeout_seconds=timeout_seconds,
         )
     def __init__(
@@ -72,6 +75,7 @@ class ConcurrentSource:
         threadpool: ThreadPoolManager,
         logger: logging.Logger,
         slice_logger: SliceLogger = DebugSliceLogger(),
+        queue: Optional[Queue[QueueItem]] = None,
         message_repository: MessageRepository = InMemoryMessageRepository(),
         initial_number_partitions_to_generate: int = 1,
         timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
@@ -91,25 +95,28 @@ class ConcurrentSource:
         self._initial_number_partitions_to_generate = initial_number_partitions_to_generate
         self._timeout_seconds = timeout_seconds
+        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
+        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
+        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
+        # information and might even need to be configurable depending on the source
+        self._queue = queue or Queue(maxsize=10_000)
     def read(
         self,
         streams: List[AbstractStream],
     ) -> Iterator[AirbyteMessage]:
         self._logger.info("Starting syncing")
-        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
-        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
-        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
-        # information and might even need to be configurable depending on the source
-        queue: Queue[QueueItem] = Queue(maxsize=10_000)
         concurrent_stream_processor = ConcurrentReadProcessor(
             streams,
-            PartitionEnqueuer(queue, self._threadpool),
+            PartitionEnqueuer(self._queue, self._threadpool),
             self._threadpool,
             self._logger,
             self._slice_logger,
             self._message_repository,
-            PartitionReader(queue),
+            PartitionReader(
+                self._queue,
+                PartitionLogger(self._slice_logger, self._logger, self._message_repository),
+            ),
         )
         # Enqueue initial partition generation tasks
@@ -117,7 +124,7 @@ class ConcurrentSource:
         # Read from the queue until all partitions were generated and read
         yield from self._consume_from_queue(
-            queue,
+            self._queue,
             concurrent_stream_processor,
         )
         self._threadpool.check_for_errors_and_shutdown()
@@ -141,7 +148,10 @@ class ConcurrentSource:
                 airbyte_message_or_record_or_exception,
                 concurrent_stream_processor,
             )
-            if concurrent_stream_processor.is_done() and queue.empty():
+            # In the event that a partition raises an exception, anything remaining in
+            # the queue will be missed because is_done() can raise an exception and exit
+            # out of this loop before remaining items are consumed
+            if queue.empty() and concurrent_stream_processor.is_done():
                 # all partitions were generated and processed. we're done here
                 break
@@ -161,5 +171,7 @@ class ConcurrentSource:
             yield from concurrent_stream_processor.on_partition_complete_sentinel(queue_item)
         elif isinstance(queue_item, Record):
             yield from concurrent_stream_processor.on_record(queue_item)
+        elif isinstance(queue_item, AirbyteMessage):
+            yield queue_item
         else:
             raise ValueError(f"Unknown queue item type: {type(queue_item)}")

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -3,7 +3,22 @@
 #
 import logging
-from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
+from dataclasses import dataclass, field
+from queue import Queue
+from typing import (
+    Any,
+    ClassVar,
+    Generic,
+    Iterator,
+    List,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Tuple,
+    Union,
+)
+from airbyte_protocol_dataclasses.models import Level
 from airbyte_cdk.models import (
     AirbyteCatalog,
@@ -43,6 +58,8 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
     StreamSlicerPartitionGenerator,
 )
 from airbyte_cdk.sources.declarative.types import ConnectionDefinition
+from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository
+from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
 from airbyte_cdk.sources.source import TState
 from airbyte_cdk.sources.streams import Stream
 from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
@@ -50,6 +67,22 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import Abstra
 from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
 from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
 from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
+from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
+@dataclass
+class TestLimits:
+    __test__: ClassVar[bool] = False  # Tell Pytest this is not a Pytest class, despite its name
+    DEFAULT_MAX_PAGES_PER_SLICE: ClassVar[int] = 5
+    DEFAULT_MAX_SLICES: ClassVar[int] = 5
+    DEFAULT_MAX_RECORDS: ClassVar[int] = 100
+    DEFAULT_MAX_STREAMS: ClassVar[int] = 100
+    max_records: int = field(default=DEFAULT_MAX_RECORDS)
+    max_pages_per_slice: int = field(default=DEFAULT_MAX_PAGES_PER_SLICE)
+    max_slices: int = field(default=DEFAULT_MAX_SLICES)
+    max_streams: int = field(default=DEFAULT_MAX_STREAMS)
 class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -65,7 +98,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         source_config: ConnectionDefinition,
         debug: bool = False,
         emit_connector_builder_messages: bool = False,
-        component_factory: Optional[ModelToComponentFactory] = None,
+        migrate_manifest: bool = False,
+        normalize_manifest: bool = False,
+        limits: Optional[TestLimits] = None,
         config_path: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
@@ -73,21 +108,39 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         #  no longer needs to store the original incoming state. But maybe there's an edge case?
         self._connector_state_manager = ConnectorStateManager(state=state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
+        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
+        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
+        # partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
+        # information and might even need to be configurable depending on the source
+        queue: Queue[QueueItem] = Queue(maxsize=10_000)
+        message_repository = InMemoryMessageRepository(
+            Level.DEBUG if emit_connector_builder_messages else Level.INFO
+        )
         # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
         # cursors. We do this by no longer automatically instantiating RFR cursors when converting
         # the declarative models into runtime components. Concurrent sources will continue to checkpoint
         # incremental streams running in full refresh.
-        component_factory = component_factory or ModelToComponentFactory(
+        component_factory = ModelToComponentFactory(
             emit_connector_builder_messages=emit_connector_builder_messages,
+            message_repository=ConcurrentMessageRepository(queue, message_repository),
             connector_state_manager=self._connector_state_manager,
             max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
+            limit_pages_fetched_per_slice=limits.max_pages_per_slice if limits else None,
+            limit_slices_fetched=limits.max_slices if limits else None,
+            disable_retries=True if limits else False,
+            disable_cache=True if limits else False,
         )
+        self._limits = limits
         super().__init__(
             source_config=source_config,
             config=config,
             debug=debug,
             emit_connector_builder_messages=emit_connector_builder_messages,
+            migrate_manifest=migrate_manifest,
+            normalize_manifest=normalize_manifest,
             component_factory=component_factory,
             config_path=config_path,
         )
@@ -117,6 +170,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate,
             logger=self.logger,
             slice_logger=self._slice_logger,
+            queue=queue,
             message_repository=self.message_repository,
         )
@@ -280,8 +334,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self.message_repository,
+                                max_records_limit=self._limits.max_records
+                                if self._limits
+                                else None,
                             ),
                             stream_slicer=declarative_stream.retriever.stream_slicer,
+                            slice_limit=self._limits.max_slices
+                            if self._limits
+                            else None,  # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
                         )
                     else:
                         if (
@@ -311,8 +371,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                                 schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self.message_repository,
+                                max_records_limit=self._limits.max_records
+                                if self._limits
+                                else None,
                             ),
                             stream_slicer=cursor,
+                            slice_limit=self._limits.max_slices if self._limits else None,
                         )
                     concurrent_streams.append(
@@ -341,8 +405,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=declarative_stream.retriever,
                             message_repository=self.message_repository,
+                            max_records_limit=self._limits.max_records if self._limits else None,
                         ),
                         declarative_stream.retriever.stream_slicer,
+                        slice_limit=self._limits.max_slices
+                        if self._limits
+                        else None,  # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
                     )
                     final_state_cursor = FinalStateCursor(
@@ -401,8 +469,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=retriever,
                             message_repository=self.message_repository,
+                            max_records_limit=self._limits.max_records if self._limits else None,
                         ),
                         perpartition_cursor,
+                        slice_limit=self._limits.max_slices if self._limits else None,
                     )
                     concurrent_streams.append(

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -631,6 +631,10 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
     SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
 }
+# Ideally this should use the value defined in ConcurrentDeclarativeSource, but
+# this would be a circular import
+MAX_SLICES = 5
 class ModelToComponentFactory:
     EPOCH_DATETIME_FORMAT = "%s"

airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py CHANGED Viewed

@@ -1,9 +1,12 @@
-# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
-from typing import Any, Iterable, Mapping, Optional
+from typing import Any, Iterable, Mapping, Optional, cast
 from airbyte_cdk.sources.declarative.retrievers import Retriever
 from airbyte_cdk.sources.declarative.schema import SchemaLoader
+from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer_test_read_decorator import (
+    StreamSlicerTestReadDecorator,
+)
 from airbyte_cdk.sources.message import MessageRepository
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -11,6 +14,11 @@ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import Stre
 from airbyte_cdk.sources.types import Record, StreamSlice
 from airbyte_cdk.utils.slice_hasher import SliceHasher
+# For Connector Builder test read operations, we track the total number of records
+# read for the stream at the global level so that we can stop reading early if we
+# exceed the record limit
+total_record_counter = 0
 class SchemaLoaderCachingDecorator(SchemaLoader):
     def __init__(self, schema_loader: SchemaLoader):
@@ -31,6 +39,7 @@ class DeclarativePartitionFactory:
         schema_loader: SchemaLoader,
         retriever: Retriever,
         message_repository: MessageRepository,
+        max_records_limit: Optional[int] = None,
     ) -> None:
         """
         The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
@@ -41,6 +50,7 @@ class DeclarativePartitionFactory:
         self._schema_loader = SchemaLoaderCachingDecorator(schema_loader)
         self._retriever = retriever
         self._message_repository = message_repository
+        self._max_records_limit = max_records_limit
     def create(self, stream_slice: StreamSlice) -> Partition:
         return DeclarativePartition(
@@ -48,6 +58,7 @@ class DeclarativePartitionFactory:
             schema_loader=self._schema_loader,
             retriever=self._retriever,
             message_repository=self._message_repository,
+            max_records_limit=self._max_records_limit,
             stream_slice=stream_slice,
         )
@@ -59,19 +70,29 @@ class DeclarativePartition(Partition):
         schema_loader: SchemaLoader,
         retriever: Retriever,
         message_repository: MessageRepository,
+        max_records_limit: Optional[int],
         stream_slice: StreamSlice,
     ):
         self._stream_name = stream_name
         self._schema_loader = schema_loader
         self._retriever = retriever
         self._message_repository = message_repository
+        self._max_records_limit = max_records_limit
         self._stream_slice = stream_slice
         self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
     def read(self) -> Iterable[Record]:
+        if self._max_records_limit is not None:
+            global total_record_counter
+            if total_record_counter >= self._max_records_limit:
+                return
         for stream_data in self._retriever.read_records(
             self._schema_loader.get_json_schema(), self._stream_slice
         ):
+            if self._max_records_limit is not None:
+                if total_record_counter >= self._max_records_limit:
+                    break
             if isinstance(stream_data, Mapping):
                 record = (
                     stream_data
@@ -86,6 +107,9 @@ class DeclarativePartition(Partition):
             else:
                 self._message_repository.emit_message(stream_data)
+            if self._max_records_limit is not None:
+                total_record_counter += 1
     def to_slice(self) -> Optional[Mapping[str, Any]]:
         return self._stream_slice
@@ -98,10 +122,24 @@ class DeclarativePartition(Partition):
 class StreamSlicerPartitionGenerator(PartitionGenerator):
     def __init__(
-        self, partition_factory: DeclarativePartitionFactory, stream_slicer: StreamSlicer
+        self,
+        partition_factory: DeclarativePartitionFactory,
+        stream_slicer: StreamSlicer,
+        slice_limit: Optional[int] = None,
+        max_records_limit: Optional[int] = None,
     ) -> None:
         self._partition_factory = partition_factory
-        self._stream_slicer = stream_slicer
+        if slice_limit:
+            self._stream_slicer = cast(
+                StreamSlicer,
+                StreamSlicerTestReadDecorator(
+                    wrapped_slicer=stream_slicer,
+                    maximum_number_of_slices=slice_limit,
+                ),
+            )
+        else:
+            self._stream_slicer = stream_slicer
     def generate(self) -> Iterable[Partition]:
         for stream_slice in self._stream_slicer.stream_slices():

airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py CHANGED Viewed

@@ -4,10 +4,10 @@
 from dataclasses import dataclass
 from itertools import islice
-from typing import Any, Iterable, Mapping, Optional, Union
+from typing import Any, Iterable
 from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
-from airbyte_cdk.sources.types import StreamSlice, StreamState
+from airbyte_cdk.sources.types import StreamSlice
 @dataclass

airbyte-cdk 6.60.16__py3-none-any.whl → 6.60.16.post40.dev17219503797__py3-none-any.whl

airbyte-cdk 6.60.16py3-none-any.whl → 6.60.16.post40.dev17219503797py3-none-any.whl