PyPI - airbyte-cdk - Versions diffs - 6.6.0rc1__py3-none-any.whl → 6.6.1__py3-none-any.whl - Mend

airbyte-cdk 6.6.0rc1py3-none-any.whl → 6.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

airbyte_cdk/__init__.py CHANGED Viewed

@@ -282,12 +282,18 @@ __all__ = [
     "StreamSlice",
 ]
-__version__ = _dunamai.get_version(
-    "airbyte-cdk",
-    third_choice=_dunamai.Version.from_any_vcs,
-).serialize()
+__version__: str
 """Version generated by poetry dynamic versioning during publish.
 When running in development, dunamai will calculate a new prerelease version
 from existing git release tag info.
 """
+try:
+    __version__ = _dunamai.get_version(
+        "airbyte-cdk",
+        third_choice=_dunamai.Version.from_any_vcs,
+        fallback=_dunamai.Version("0.0.0+dev"),
+    ).serialize()
+except:
+    __version__ = "0.0.0+dev"

airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py CHANGED Viewed

@@ -114,7 +114,8 @@ class ConcurrentReadProcessor:
         try:
             if sentinel.is_successful:
-                partition.close()
+                stream = self._stream_name_to_instance[partition.stream_name()]
+                stream.cursor.close_partition(partition)
         except Exception as exception:
             self._flag_exception(partition.stream_name(), exception)
             yield AirbyteTracedException.from_exception(

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -3,7 +3,7 @@
 #
 import logging
-from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union
+from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union, Callable
 from airbyte_cdk.models import (
     AirbyteCatalog,
@@ -16,6 +16,9 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
 from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
 from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
 from airbyte_cdk.sources.declarative.extractors import RecordSelector
+from airbyte_cdk.sources.declarative.extractors.record_filter import (
+    ClientSideIncrementalRecordFilterDecorator,
+)
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
 from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
@@ -24,18 +27,24 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     DatetimeBasedCursor as DatetimeBasedCursorModel,
+    DeclarativeStream as DeclarativeStreamModel,
 )
 from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
     ModelToComponentFactory,
+    ComponentDefinition,
 )
 from airbyte_cdk.sources.declarative.requesters import HttpRequester
-from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
+from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, Retriever
+from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
+    DeclarativePartitionFactory,
+    StreamSlicerPartitionGenerator,
+)
 from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
 from airbyte_cdk.sources.declarative.types import ConnectionDefinition
 from airbyte_cdk.sources.source import TState
+from airbyte_cdk.sources.types import Config, StreamState
 from airbyte_cdk.sources.streams import Stream
 from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
-from airbyte_cdk.sources.streams.concurrent.adapters import CursorPartitionGenerator
 from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
     AlwaysAvailableAvailabilityStrategy,
 )
@@ -210,31 +219,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                         )
                     )
-                    # This is an optimization so that we don't invoke any cursor or state management flows within the
-                    # low-code framework because state management is handled through the ConcurrentCursor.
-                    if (
-                        declarative_stream
-                        and declarative_stream.retriever
-                        and isinstance(declarative_stream.retriever, SimpleRetriever)
-                    ):
-                        # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
-                        # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
-                        # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
-                        # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
-                        # with state.
-                        if declarative_stream.retriever.cursor:
-                            declarative_stream.retriever.cursor.set_initial_state(
-                                stream_state=stream_state
-                            )
-                        declarative_stream.retriever.cursor = None
-                    partition_generator = CursorPartitionGenerator(
-                        stream=declarative_stream,
-                        message_repository=self.message_repository,  # type: ignore  # message_repository is always instantiated with a value by factory
-                        cursor=cursor,
-                        connector_state_converter=connector_state_converter,
-                        cursor_field=[cursor.cursor_field.cursor_field_key],
-                        slice_boundary_fields=cursor.slice_boundary_fields,
+                    partition_generator = StreamSlicerPartitionGenerator(
+                        DeclarativePartitionFactory(
+                            declarative_stream.name,
+                            declarative_stream.get_json_schema(),
+                            self._retriever_factory(
+                                name_to_stream_mapping[declarative_stream.name],
+                                config,
+                                stream_state,
+                            ),
+                            self.message_repository,
+                        ),
+                        cursor,
                     )
                     concurrent_streams.append(
@@ -291,6 +287,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             if isinstance(record_selector, RecordSelector):
                 if (
                     record_selector.record_filter
+                    and not isinstance(
+                        record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
+                    )
                     and "stream_state" in record_selector.record_filter.condition
                 ):
                     self.logger.warning(
@@ -344,3 +343,34 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                 if stream.stream.name not in concurrent_stream_names
             ]
         )
+    def _retriever_factory(
+        self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
+    ) -> Callable[[], Retriever]:
+        def _factory_method() -> Retriever:
+            declarative_stream: DeclarativeStream = self._constructor.create_component(
+                DeclarativeStreamModel,
+                stream_config,
+                source_config,
+                emit_connector_builder_messages=self._emit_connector_builder_messages,
+            )
+            # This is an optimization so that we don't invoke any cursor or state management flows within the
+            # low-code framework because state management is handled through the ConcurrentCursor.
+            if (
+                declarative_stream
+                and declarative_stream.retriever
+                and isinstance(declarative_stream.retriever, SimpleRetriever)
+            ):
+                # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
+                # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
+                # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
+                # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
+                # with state.
+                if declarative_stream.retriever.cursor:
+                    declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
+                declarative_stream.retriever.cursor = None
+            return declarative_stream.retriever
+        return _factory_method

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -1750,6 +1750,45 @@ definitions:
       type:
         type: string
         enum: [XmlDecoder]
+  CustomDecoder:
+    title: Custom Decoder
+    description: Use this to implement custom decoder logic.
+    type: object
+    additionalProperties: true
+    required:
+      - type
+      - class_name
+    properties:
+      type:
+        type: string
+        enum: [CustomDecoder]
+      class_name:
+        title: Class Name
+        description: Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_<name>.<package>.<class_name>`.
+        type: string
+        additionalProperties: true
+        examples:
+          - "source_amazon_ads.components.GzipJsonlDecoder"
+      $parameters:
+        type: object
+        additionalProperties: true
+  GzipJsonDecoder:
+    title: GzipJson Decoder
+    description: Use this if the response is Gzip compressed Json.
+    type: object
+    additionalProperties: true
+    required:
+      - type
+    properties:
+      type:
+        type: string
+        enum: [GzipJsonDecoder]
+      encoding:
+        type: string
+        default: utf-8
+      $parameters:
+        type: object
+        additionalProperties: true
   ListPartitionRouter:
     title: List Partition Router
     description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -2404,10 +2443,12 @@ definitions:
         title: Decoder
         description: Component decoding the response so records can be extracted.
         anyOf:
+          - "$ref": "#/definitions/CustomDecoder"
           - "$ref": "#/definitions/JsonDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
           - "$ref": "#/definitions/IterableDecoder"
           - "$ref": "#/definitions/XmlDecoder"
+          - "$ref": "#/definitions/GzipJsonDecoder"
       $parameters:
         type: object
         additionalProperties: true
@@ -2520,10 +2561,12 @@ definitions:
         title: Decoder
         description: Component decoding the response so records can be extracted.
         anyOf:
+          - "$ref": "#/definitions/CustomDecoder"
           - "$ref": "#/definitions/JsonDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
           - "$ref": "#/definitions/IterableDecoder"
           - "$ref": "#/definitions/XmlDecoder"
+          - "$ref": "#/definitions/GzipJsonDecoder"
       $parameters:
         type: object
         additionalProperties: true

airbyte_cdk/sources/declarative/decoders/__init__.py CHANGED Viewed

@@ -3,9 +3,9 @@
 #
 from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
-from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder
+from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder, GzipJsonDecoder
 from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
 from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import PaginationDecoderDecorator
 from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
-__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"]
+__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "GzipJsonDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"]

airbyte_cdk/sources/declarative/decoders/json_decoder.py CHANGED Viewed

@@ -1,14 +1,15 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
+import codecs
 import logging
 from dataclasses import InitVar, dataclass
-from typing import Any, Generator, Mapping
+from gzip import decompress
+from typing import Any, Generator, Mapping, MutableMapping, List, Optional
 import requests
 from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
-from orjson import orjson
+import orjson
 logger = logging.getLogger("airbyte")
@@ -24,24 +25,32 @@ class JsonDecoder(Decoder):
     def is_stream_response(self) -> bool:
         return False
-    def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]:
+    def decode(
+        self, response: requests.Response
+    ) -> Generator[MutableMapping[str, Any], None, None]:
         """
         Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
         """
         try:
             body_json = response.json()
-            if not isinstance(body_json, list):
-                body_json = [body_json]
-            if len(body_json) == 0:
-                yield {}
-            else:
-                yield from body_json
+            yield from self.parse_body_json(body_json)
         except requests.exceptions.JSONDecodeError:
             logger.warning(
                 f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
             )
             yield {}
+    @staticmethod
+    def parse_body_json(
+        body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
+    ) -> Generator[MutableMapping[str, Any], None, None]:
+        if not isinstance(body_json, list):
+            body_json = [body_json]
+        if len(body_json) == 0:
+            yield {}
+        else:
+            yield from body_json
 @dataclass
 class IterableDecoder(Decoder):
@@ -54,7 +63,9 @@ class IterableDecoder(Decoder):
     def is_stream_response(self) -> bool:
         return True
-    def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]:
+    def decode(
+        self, response: requests.Response
+    ) -> Generator[MutableMapping[str, Any], None, None]:
         for line in response.iter_lines():
             yield {"record": line.decode()}
@@ -70,8 +81,30 @@ class JsonlDecoder(Decoder):
     def is_stream_response(self) -> bool:
         return True
-    def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]:
+    def decode(
+        self, response: requests.Response
+    ) -> Generator[MutableMapping[str, Any], None, None]:
         # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
         #  https://github.com/airbytehq/airbyte-internal-issues/issues/8436
         for record in response.iter_lines():
             yield orjson.loads(record)
+@dataclass
+class GzipJsonDecoder(JsonDecoder):
+    encoding: Optional[str]
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        if self.encoding:
+            try:
+                codecs.lookup(self.encoding)
+            except LookupError:
+                raise ValueError(
+                    f"Invalid encoding '{self.encoding}'. Please check provided encoding"
+                )
+    def decode(
+        self, response: requests.Response
+    ) -> Generator[MutableMapping[str, Any], None, None]:
+        raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
+        yield from self.parse_body_json(orjson.loads(raw_string))

airbyte_cdk/sources/declarative/manifest_declarative_source.py CHANGED Viewed

@@ -8,7 +8,7 @@ import pkgutil
 import re
 from copy import deepcopy
 from importlib import metadata
-from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple, Union
+from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple
 import yaml
 from airbyte_cdk.models import (
@@ -94,7 +94,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
         return self._source_config
     @property
-    def message_repository(self) -> Union[None, MessageRepository]:
+    def message_repository(self) -> MessageRepository:
         return self._message_repository
     @property
@@ -256,7 +256,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
             manifest_version, "manifest"
         )
-        if cdk_major < manifest_major or (
+        if cdk_version.startswith("0.0.0"):
+            # Skipping version compatibility check on unreleased dev branch
+            pass
+        elif cdk_major < manifest_major or (
             cdk_major == manifest_major and cdk_minor < manifest_minor
         ):
             raise ValidationError(

airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py CHANGED Viewed

@@ -4,7 +4,11 @@ from typing import Any, Mapping
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
-from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter
+from airbyte_cdk.sources.declarative.models import (
+    DatetimeBasedCursor,
+    SubstreamPartitionRouter,
+    CustomIncrementalSync,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig
@@ -32,7 +36,7 @@ class LegacyToPerPartitionStateMigration(StateMigration):
     def __init__(
         self,
         partition_router: SubstreamPartitionRouter,
-        cursor: DatetimeBasedCursor,
+        cursor: CustomIncrementalSync | DatetimeBasedCursor,
         config: Mapping[str, Any],
         parameters: Mapping[str, Any],
     ):
@@ -64,7 +68,7 @@ class LegacyToPerPartitionStateMigration(StateMigration):
             return False
         # There is exactly one parent stream
-        number_of_parent_streams = len(self._partition_router.parent_stream_configs)
+        number_of_parent_streams = len(self._partition_router.parent_stream_configs)  # type: ignore # custom partition will introduce this attribute if needed
         if number_of_parent_streams != 1:
             # There should be exactly one parent stream
             return False

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -4,10 +4,9 @@
 from __future__ import annotations
 from enum import Enum
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 from pydantic.v1 import BaseModel, Extra, Field
-from typing_extensions import Literal
 class AuthFlowType(Enum):
@@ -632,6 +631,7 @@ class HttpResponseFilter(BaseModel):
         description="Match the response if its HTTP code is included in this list.",
         examples=[[420, 429], [500]],
         title="HTTP Codes",
+        unique_items=True,
     )
     predicate: Optional[str] = Field(
         None,
@@ -687,6 +687,29 @@ class XmlDecoder(BaseModel):
     type: Literal["XmlDecoder"]
+class CustomDecoder(BaseModel):
+    class Config:
+        extra = Extra.allow
+    type: Literal["CustomDecoder"]
+    class_name: str = Field(
+        ...,
+        description="Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_<name>.<package>.<class_name>`.",
+        examples=["source_amazon_ads.components.GzipJsonlDecoder"],
+        title="Class Name",
+    )
+    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
+class GzipJsonDecoder(BaseModel):
+    class Config:
+        extra = Extra.allow
+    type: Literal["GzipJsonDecoder"]
+    encoding: Optional[str] = "utf-8"
+    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class MinMaxDatetime(BaseModel):
     type: Literal["MinMaxDatetime"]
     datetime: str = Field(
@@ -1620,7 +1643,16 @@ class SimpleRetriever(BaseModel):
         description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.",
         title="Partition Router",
     )
-    decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field(
+    decoder: Optional[
+        Union[
+            CustomDecoder,
+            JsonDecoder,
+            JsonlDecoder,
+            IterableDecoder,
+            XmlDecoder,
+            GzipJsonDecoder,
+        ]
+    ] = Field(
         None,
         description="Component decoding the response so records can be extracted.",
         title="Decoder",
@@ -1680,7 +1712,16 @@ class AsyncRetriever(BaseModel):
         description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.",
         title="Partition Router",
     )
-    decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field(
+    decoder: Optional[
+        Union[
+            CustomDecoder,
+            JsonDecoder,
+            JsonlDecoder,
+            IterableDecoder,
+            XmlDecoder,
+            GzipJsonDecoder,
+        ]
+    ] = Field(
         None,
         description="Component decoding the response so records can be extracted.",
         title="Decoder",

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -58,6 +58,7 @@ from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
 from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
 from airbyte_cdk.sources.declarative.decoders import (
     Decoder,
+    GzipJsonDecoder,
     IterableDecoder,
     JsonDecoder,
     JsonlDecoder,
@@ -134,6 +135,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     CustomBackoffStrategy as CustomBackoffStrategyModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    CustomDecoder as CustomDecoderModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     CustomErrorHandler as CustomErrorHandlerModel,
 )
@@ -182,6 +186,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    GzipJsonDecoder as GzipJsonDecoderModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpRequester as HttpRequesterModel,
 )
@@ -402,6 +409,7 @@ class ModelToComponentFactory:
             CursorPaginationModel: self.create_cursor_pagination,
             CustomAuthenticatorModel: self.create_custom_component,
             CustomBackoffStrategyModel: self.create_custom_component,
+            CustomDecoderModel: self.create_custom_component,
             CustomErrorHandlerModel: self.create_custom_component,
             CustomIncrementalSyncModel: self.create_custom_component,
             CustomRecordExtractorModel: self.create_custom_component,
@@ -425,6 +433,7 @@ class ModelToComponentFactory:
             InlineSchemaLoaderModel: self.create_inline_schema_loader,
             JsonDecoderModel: self.create_json_decoder,
             JsonlDecoderModel: self.create_jsonl_decoder,
+            GzipJsonDecoderModel: self.create_gzipjson_decoder,
             KeysToLowerModel: self.create_keys_to_lower_transformation,
             IterableDecoderModel: self.create_iterable_decoder,
             XmlDecoderModel: self.create_xml_decoder,
@@ -619,11 +628,16 @@ class ModelToComponentFactory:
                 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
             )
+        if not hasattr(declarative_stream, "incremental_sync"):
+            raise ValueError(
+                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
+            )
         return LegacyToPerPartitionStateMigration(
-            declarative_stream.retriever.partition_router,
-            declarative_stream.incremental_sync,
+            partition_router,  # type: ignore # was already checked above
+            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
             config,
-            declarative_stream.parameters,
+            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
         )  # type: ignore # The retriever type was already checked
     def create_session_token_authenticator(
@@ -1548,6 +1562,12 @@ class ModelToComponentFactory:
     def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
         return XmlDecoder(parameters={})
+    @staticmethod
+    def create_gzipjson_decoder(
+        model: GzipJsonDecoderModel, config: Config, **kwargs: Any
+    ) -> GzipJsonDecoder:
+        return GzipJsonDecoder(parameters={}, encoding=model.encoding)
     @staticmethod
     def create_json_file_schema_loader(
         model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any

airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+from typing import Iterable, Optional, Mapping, Any, Callable
+from airbyte_cdk.sources.declarative.retrievers import Retriever
+from airbyte_cdk.sources.message import MessageRepository
+from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
+from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
+from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
+from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
+from airbyte_cdk.sources.types import StreamSlice
+from airbyte_cdk.utils.slice_hasher import SliceHasher
+class DeclarativePartitionFactory:
+    def __init__(
+        self,
+        stream_name: str,
+        json_schema: Mapping[str, Any],
+        retriever_factory: Callable[[], Retriever],
+        message_repository: MessageRepository,
+    ) -> None:
+        """
+        The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
+        thread safe and classes like `DefaultPaginator` may not work because multiple threads can access and modify a shared field across each other.
+        In order to avoid these problems, we will create one retriever per thread which should make the processing thread-safe.
+        """
+        self._stream_name = stream_name
+        self._json_schema = json_schema
+        self._retriever_factory = retriever_factory
+        self._message_repository = message_repository
+    def create(self, stream_slice: StreamSlice) -> Partition:
+        return DeclarativePartition(
+            self._stream_name,
+            self._json_schema,
+            self._retriever_factory(),
+            self._message_repository,
+            stream_slice,
+        )
+class DeclarativePartition(Partition):
+    def __init__(
+        self,
+        stream_name: str,
+        json_schema: Mapping[str, Any],
+        retriever: Retriever,
+        message_repository: MessageRepository,
+        stream_slice: StreamSlice,
+    ):
+        self._stream_name = stream_name
+        self._json_schema = json_schema
+        self._retriever = retriever
+        self._message_repository = message_repository
+        self._stream_slice = stream_slice
+        self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
+    def read(self) -> Iterable[Record]:
+        for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
+            if isinstance(stream_data, Mapping):
+                yield Record(stream_data, self)
+            else:
+                self._message_repository.emit_message(stream_data)
+    def to_slice(self) -> Optional[Mapping[str, Any]]:
+        return self._stream_slice
+    def stream_name(self) -> str:
+        return self._stream_name
+    def __hash__(self) -> int:
+        return self._hash
+class StreamSlicerPartitionGenerator(PartitionGenerator):
+    def __init__(
+        self, partition_factory: DeclarativePartitionFactory, stream_slicer: StreamSlicer
+    ) -> None:
+        self._partition_factory = partition_factory
+        self._stream_slicer = stream_slicer
+    def generate(self) -> Iterable[Partition]:
+        for stream_slice in self._stream_slicer.stream_slices():
+            yield self._partition_factory.create(stream_slice)

airbyte-cdk 6.6.0rc1__py3-none-any.whl → 6.6.1__py3-none-any.whl

airbyte-cdk 6.6.0rc1py3-none-any.whl → 6.6.1py3-none-any.whl