PyPI - airbyte-cdk - Versions diffs - 6.9.0.dev0__py3-none-any.whl → 6.9.1__py3-none-any.whl - Mend

airbyte-cdk 6.9.0.dev0py3-none-any.whl → 6.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -56,8 +56,9 @@ from airbyte_cdk.sources.types import Config, StreamState
 class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
-    # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
-    SINGLE_THREADED_CONCURRENCY_LEVEL = 1
+    # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
+    # because it has hit the limit of futures but not partition reader is consuming them.
+    _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
     def __init__(
         self,
@@ -86,23 +87,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             component_factory=component_factory,
         )
+        # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
+        #  no longer needs to store the original incoming state. But maybe there's an edge case?
         self._state = state
-        self._concurrent_streams: Optional[List[AbstractStream]]
-        self._synchronous_streams: Optional[List[Stream]]
-        # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
-        # they might depend on it. Ideally we want to have a static method on this class to get the spec without
-        # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
-        # for our future improvements to the CDK.
-        if config:
-            self._concurrent_streams, self._synchronous_streams = self._group_streams(
-                config=config or {}
-            )
-        else:
-            self._concurrent_streams = None
-            self._synchronous_streams = None
         concurrency_level_from_manifest = self._source_config.get("concurrency_level")
         if concurrency_level_from_manifest:
             concurrency_level_component = self._constructor.create_component(
@@ -120,8 +108,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                 concurrency_level // 2, 1
             )  # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
         else:
-            concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
-            initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
+            concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
+            initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
         self._concurrent_source = ConcurrentSource.create(
             num_workers=concurrency_level,
@@ -136,17 +124,20 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         logger: logging.Logger,
         config: Mapping[str, Any],
         catalog: ConfiguredAirbyteCatalog,
-        state: Optional[Union[List[AirbyteStateMessage]]] = None,
+        state: Optional[List[AirbyteStateMessage]] = None,
     ) -> Iterator[AirbyteMessage]:
-        # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
-        # streams must be saved so that they can be removed from the catalog before starting synchronous streams
-        if self._concurrent_streams:
+        concurrent_streams, _ = self._group_streams(config=config)
+        # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of
+        # the concurrent streams must be saved so that they can be removed from the catalog before starting
+        # synchronous streams
+        if len(concurrent_streams) > 0:
             concurrent_stream_names = set(
-                [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
+                [concurrent_stream.name for concurrent_stream in concurrent_streams]
             )
             selected_concurrent_streams = self._select_streams(
-                streams=self._concurrent_streams, configured_catalog=catalog
+                streams=concurrent_streams, configured_catalog=catalog
             )
             # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
             # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
@@ -165,8 +156,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
         yield from super().read(logger, config, filtered_catalog, state)
     def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
-        concurrent_streams = self._concurrent_streams or []
-        synchronous_streams = self._synchronous_streams or []
+        concurrent_streams, synchronous_streams = self._group_streams(config=config)
         return AirbyteCatalog(
             streams=[
                 stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
@@ -206,7 +196,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
             # so we need to treat them as synchronous
             if (
                 isinstance(declarative_stream, DeclarativeStream)
-                and name_to_stream_mapping[declarative_stream.name].get("retriever")["type"]
+                and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
                 == "SimpleRetriever"
             ):
                 incremental_sync_component_definition = name_to_stream_mapping[
@@ -215,7 +205,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                 partition_router_component_definition = (
                     name_to_stream_mapping[declarative_stream.name]
-                    .get("retriever")
+                    .get("retriever", {})
                     .get("partition_router")
                 )
                 is_without_partition_router_or_cursor = not bool(
@@ -237,7 +227,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                     cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
                         state_manager=state_manager,
                         model_type=DatetimeBasedCursorModel,
-                        component_definition=incremental_sync_component_definition,
+                        component_definition=incremental_sync_component_definition,  # type: ignore  # Not None because of the if condition above
                         stream_name=declarative_stream.name,
                         stream_namespace=declarative_stream.namespace,
                         config=config or {},
@@ -320,10 +310,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
     def _is_datetime_incremental_without_partition_routing(
         self,
         declarative_stream: DeclarativeStream,
-        incremental_sync_component_definition: Mapping[str, Any],
+        incremental_sync_component_definition: Mapping[str, Any] | None,
     ) -> bool:
         return (
-            bool(incremental_sync_component_definition)
+            incremental_sync_component_definition is not None
+            and bool(incremental_sync_component_definition)
             and incremental_sync_component_definition.get("type", "")
             == DatetimeBasedCursorModel.__name__
             and self._stream_supports_concurrent_partition_processing(

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -327,7 +327,7 @@ definitions:
         additionalProperties: true
   ConcurrencyLevel:
     title: Concurrency Level
-    description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
+    description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time. Note that a value of 1 could create deadlock if a stream has a very high number of partitions.
     type: object
     required:
       - default_concurrency
@@ -1684,92 +1684,6 @@ definitions:
       $parameters:
         type: object
         additionalProperties: true
-  TypesMap:
-    title: Types Map
-    description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
-    type: object
-    required:
-      - target_type
-      - current_type
-    properties:
-      target_type:
-        anyOf:
-          - type: string
-          - type: array
-            items:
-              type: string
-      current_type:
-        anyOf:
-          - type: string
-          - type: array
-            items:
-              type: string
-  SchemaTypeIdentifier:
-    title: Schema Type Identifier
-    description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
-    type: object
-    required:
-      - key_pointer
-    properties:
-      type:
-        type: string
-        enum: [SchemaTypeIdentifier]
-      schema_pointer:
-        title: Schema Path
-        description: List of nested fields defining the schema field path to extract. Defaults to [].
-        type: array
-        default: []
-        items:
-          - type: string
-        interpolation_content:
-          - config
-      key_pointer:
-        title: Key Path
-        description: List of potentially nested fields describing the full path of the field key to extract.
-        type: array
-        items:
-          - type: string
-        interpolation_content:
-          - config
-      type_pointer:
-        title: Type Path
-        description: List of potentially nested fields describing the full path of the field type to extract.
-        type: array
-        items:
-          - type: string
-        interpolation_content:
-          - config
-      types_mapping:
-        type: array
-        items:
-          - "$ref": "#/definitions/TypesMap"
-      $parameters:
-        type: object
-        additionalProperties: true
-  DynamicSchemaLoader:
-    title: Dynamic Schema Loader
-    description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
-    type: object
-    required:
-      - type
-      - retriever
-      - schema_type_identifier
-    properties:
-      type:
-        type: string
-        enum: [DynamicSchemaLoader]
-      retriever:
-        title: Retriever
-        description: Component used to coordinate how records are extracted across stream slices and request pages.
-        anyOf:
-          - "$ref": "#/definitions/AsyncRetriever"
-          - "$ref": "#/definitions/CustomRetriever"
-          - "$ref": "#/definitions/SimpleRetriever"
-      schema_type_identifier:
-        "$ref": "#/definitions/SchemaTypeIdentifier"
-      $parameters:
-        type: object
-        additionalProperties: true
   InlineSchemaLoader:
     title: Inline Schema Loader
     description: Loads a schema that is defined directly in the manifest file.

airbyte_cdk/sources/declarative/interpolation/jinja.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import ast
 from functools import cache
-from typing import Any, Mapping, Optional, Tuple, Type
+from typing import Any, Mapping, Optional, Set, Tuple, Type
 from jinja2 import meta
 from jinja2.environment import Template
@@ -27,7 +27,35 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
     def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
         if attr in ["_partition"]:
             return True
-        return super().is_safe_attribute(obj, attr, value)
+        return super().is_safe_attribute(obj, attr, value)  # type: ignore  # for some reason, mypy says 'Returning Any from function declared to return "bool"'
+# These aliases are used to deprecate existing keywords without breaking all existing connectors.
+_ALIASES = {
+    "stream_interval": "stream_slice",  # Use stream_interval to access incremental_sync values
+    "stream_partition": "stream_slice",  # Use stream_partition to access partition router's values
+}
+# These extensions are not installed so they're not currently a problem,
+# but we're still explicitly removing them from the jinja context.
+# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
+_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"]  # Adds support for break continue in loops
+# By default, these Python builtin functions are available in the Jinja context.
+# We explicitly remove them because of the potential security risk.
+# Please add a unit test to test_jinja.py when adding a restriction.
+_RESTRICTED_BUILTIN_FUNCTIONS = [
+    "range"
+]  # The range function can cause very expensive computations
+_ENVIRONMENT = StreamPartitionAccessEnvironment()
+_ENVIRONMENT.filters.update(**filters)
+_ENVIRONMENT.globals.update(**macros)
+for extension in _RESTRICTED_EXTENSIONS:
+    _ENVIRONMENT.extensions.pop(extension, None)
+for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
+    _ENVIRONMENT.globals.pop(builtin, None)
 class JinjaInterpolation(Interpolation):
@@ -48,34 +76,6 @@ class JinjaInterpolation(Interpolation):
     Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
     """
-    # These aliases are used to deprecate existing keywords without breaking all existing connectors.
-    ALIASES = {
-        "stream_interval": "stream_slice",  # Use stream_interval to access incremental_sync values
-        "stream_partition": "stream_slice",  # Use stream_partition to access partition router's values
-    }
-    # These extensions are not installed so they're not currently a problem,
-    # but we're still explicitely removing them from the jinja context.
-    # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
-    RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"]  # Adds support for break continue in loops
-    # By default, these Python builtin functions are available in the Jinja context.
-    # We explicitely remove them because of the potential security risk.
-    # Please add a unit test to test_jinja.py when adding a restriction.
-    RESTRICTED_BUILTIN_FUNCTIONS = [
-        "range"
-    ]  # The range function can cause very expensive computations
-    def __init__(self) -> None:
-        self._environment = StreamPartitionAccessEnvironment()
-        self._environment.filters.update(**filters)
-        self._environment.globals.update(**macros)
-        for extension in self.RESTRICTED_EXTENSIONS:
-            self._environment.extensions.pop(extension, None)
-        for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
-            self._environment.globals.pop(builtin, None)
     def eval(
         self,
         input_str: str,
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
     ) -> Any:
         context = {"config": config, **additional_parameters}
-        for alias, equivalent in self.ALIASES.items():
+        for alias, equivalent in _ALIASES.items():
             if alias in context:
                 # This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
                 raise ValueError(
@@ -105,6 +105,7 @@ class JinjaInterpolation(Interpolation):
                 raise Exception(f"Expected a string, got {input_str}")
         except UndefinedError:
             pass
         # If result is empty or resulted in an undefined error, evaluate and return the default string
         return self._literal_eval(self._eval(default, context), valid_types)
@@ -132,16 +133,16 @@ class JinjaInterpolation(Interpolation):
             return s
     @cache
-    def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
+    def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
         """
         Find undeclared variables and cache them
         """
-        ast = self._environment.parse(s)  # type: ignore # parse is able to handle None
+        ast = _ENVIRONMENT.parse(s)  # type: ignore # parse is able to handle None
         return meta.find_undeclared_variables(ast)
     @cache
-    def _compile(self, s: Optional[str]) -> Template:
+    def _compile(self, s: str) -> Template:
         """
         We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
         """
-        return self._environment.from_string(s)  # type: ignore [arg-type]  # Expected `str | Template` but passed `str | None`
+        return _ENVIRONMENT.from_string(s)

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -650,32 +650,6 @@ class HttpResponseFilter(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
-class TypesMap(BaseModel):
-    target_type: Union[str, List[str]]
-    current_type: Union[str, List[str]]
-class SchemaTypeIdentifier(BaseModel):
-    type: Optional[Literal["SchemaTypeIdentifier"]] = None
-    schema_pointer: Optional[List[str]] = Field(
-        [],
-        description="List of nested fields defining the schema field path to extract. Defaults to [].",
-        title="Schema Path",
-    )
-    key_pointer: List[str] = Field(
-        ...,
-        description="List of potentially nested fields describing the full path of the field key to extract.",
-        title="Key Path",
-    )
-    type_pointer: Optional[List[str]] = Field(
-        None,
-        description="List of potentially nested fields describing the full path of the field type to extract.",
-        title="Type Path",
-    )
-    types_mapping: Optional[List[TypesMap]] = None
-    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class InlineSchemaLoader(BaseModel):
     type: Literal["InlineSchemaLoader"]
     schema_: Optional[Dict[str, Any]] = Field(
@@ -848,13 +822,13 @@ class OauthConnectorInputSpecification(BaseModel):
     )
     extract_output: List[str] = Field(
         ...,
-        description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
+        description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.                ",
         examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
         title="DeclarativeOAuth Extract Output",
     )
     state: Optional[State] = Field(
         None,
-        description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
+        description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.                ",
         examples=[{"state": {"min": 7, "max": 128}}],
         title="(Optional) DeclarativeOAuth Configurable State Query Param",
     )
@@ -878,13 +852,13 @@ class OauthConnectorInputSpecification(BaseModel):
     )
     state_key: Optional[str] = Field(
         None,
-        description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
+        description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.                ",
         examples=[{"state_key": "my_custom_state_key_key_name"}],
         title="(Optional) DeclarativeOAuth State Key Override",
     )
     auth_code_key: Optional[str] = Field(
         None,
-        description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
+        description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.                ",
         examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
         title="(Optional) DeclarativeOAuth Auth Code Key Override",
     )
@@ -1800,17 +1774,6 @@ class HttpRequester(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
-class DynamicSchemaLoader(BaseModel):
-    type: Literal["DynamicSchemaLoader"]
-    retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
-        ...,
-        description="Component used to coordinate how records are extracted across stream slices and request pages.",
-        title="Retriever",
-    )
-    schema_type_identifier: SchemaTypeIdentifier
-    parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 class ParentStreamConfig(BaseModel):
     type: Literal["ParentStreamConfig"]
     parent_key: str = Field(
@@ -2018,6 +1981,5 @@ DeclarativeSource2.update_forward_refs()
 SelectiveAuthenticator.update_forward_refs()
 DeclarativeStream.update_forward_refs()
 SessionTokenAuthenticator.update_forward_refs()
-DynamicSchemaLoader.update_forward_refs()
 SimpleRetriever.update_forward_refs()
 AsyncRetriever.update_forward_refs()

airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py CHANGED Viewed

@@ -64,10 +64,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
     "AddFields.fields": "AddedFieldDefinition",
     # CustomPartitionRouter
     "CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
-    # DynamicSchemaLoader
-    "DynamicSchemaLoader.retriever": "SimpleRetriever",
-    # SchemaTypeIdentifier
-    "SchemaTypeIdentifier.types_map": "TypesMap",
 }
 # We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -188,9 +188,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     DpathExtractor as DpathExtractorModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    DynamicSchemaLoader as DynamicSchemaLoaderModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 )
@@ -281,9 +278,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ResponseToFileExtractor as ResponseToFileExtractorModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    SchemaTypeIdentifier as SchemaTypeIdentifierModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     SelectiveAuthenticator as SelectiveAuthenticatorModel,
 )
@@ -297,9 +291,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     SubstreamPartitionRouter as SubstreamPartitionRouterModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    TypesMap as TypesMapModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -365,11 +356,8 @@ from airbyte_cdk.sources.declarative.retrievers import (
 )
 from airbyte_cdk.sources.declarative.schema import (
     DefaultSchemaLoader,
-    DynamicSchemaLoader,
     InlineSchemaLoader,
     JsonFileSchemaLoader,
-    SchemaTypeIdentifier,
-    TypesMap,
 )
 from airbyte_cdk.sources.declarative.spec import Spec
 from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
@@ -467,9 +455,6 @@ class ModelToComponentFactory:
             IterableDecoderModel: self.create_iterable_decoder,
             XmlDecoderModel: self.create_xml_decoder,
             JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
-            DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
-            SchemaTypeIdentifierModel: self.create_schema_type_identifier,
-            TypesMapModel: self.create_types_map,
             JwtAuthenticatorModel: self.create_jwt_authenticator,
             LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
             ListPartitionRouterModel: self.create_list_partition_router,
@@ -1589,63 +1574,6 @@ class ModelToComponentFactory:
     ) -> InlineSchemaLoader:
         return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
-    @staticmethod
-    def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
-        return TypesMap(target_type=model.target_type, current_type=model.current_type)
-    def create_schema_type_identifier(
-        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
-    ) -> SchemaTypeIdentifier:
-        types_mapping = []
-        if model.types_mapping:
-            types_mapping.extend(
-                [
-                    self._create_component_from_model(types_map, config=config)
-                    for types_map in model.types_mapping
-                ]
-            )
-        model_schema_pointer: List[Union[InterpolatedString, str]] = (
-            [x for x in model.schema_pointer] if model.schema_pointer else []
-        )
-        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
-        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
-            [x for x in model.type_pointer] if model.type_pointer else None
-        )
-        return SchemaTypeIdentifier(
-            schema_pointer=model_schema_pointer,
-            key_pointer=model_key_pointer,
-            type_pointer=model_type_pointer,
-            types_mapping=types_mapping,
-            parameters=model.parameters or {},
-        )
-    def create_dynamic_schema_loader(
-        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
-    ) -> DynamicSchemaLoader:
-        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
-        combined_slicers = self._build_resumable_cursor_from_paginator(
-            model.retriever, stream_slicer
-        )
-        retriever = self._create_component_from_model(
-            model=model.retriever,
-            config=config,
-            name="",
-            primary_key=None,
-            stream_slicer=combined_slicers,
-            transformations=[],
-        )
-        schema_type_identifier = self._create_component_from_model(
-            model.schema_type_identifier, config=config, parameters=model.parameters or {}
-        )
-        return DynamicSchemaLoader(
-            retriever=retriever,
-            config=config,
-            schema_type_identifier=schema_type_identifier,
-            parameters=model.parameters or {},
-        )
     @staticmethod
     def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
         return JsonDecoder(parameters={})

airbyte_cdk/sources/declarative/schema/__init__.py CHANGED Viewed

@@ -6,6 +6,5 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
 from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
 from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
 from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
-from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
-__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
+__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]

airbyte_cdk/sources/streams/http/http_client.py CHANGED Viewed

@@ -138,12 +138,22 @@ class HttpClient:
             cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
             # Use in-memory cache if cache_dir is not set
             # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
-            if cache_dir:
-                sqlite_path = str(Path(cache_dir) / self.cache_filename)
-            else:
-                sqlite_path = "file::memory:?cache=shared"
+            # Use in-memory cache if cache_dir is not set
+            # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
+            sqlite_path = (
+                str(Path(cache_dir) / self.cache_filename)
+                if cache_dir
+                else "file::memory:?cache=shared"
+            )
+            # By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
+            # Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
+            # There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
+            # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
+            # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
+            # * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
+            backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
             return CachedLimiterSession(
-                sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True
+                sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
             )
         else:
             return LimiterSession(api_budget=self._api_budget)

{airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: airbyte-cdk
-Version: 6.9.0.dev0
+Version: 6.9.1
 Summary: A framework for writing Airbyte Connectors.
 Home-page: https://airbyte.com
 License: MIT
@@ -18,12 +18,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Provides-Extra: file-based
-Provides-Extra: sphinx-docs
 Provides-Extra: sql
 Provides-Extra: vector-db-based
 Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
-Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
 Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
 Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
 Requires-Dist: backoff
@@ -63,7 +61,6 @@ Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
 Requires-Dist: requests
 Requires-Dist: requests_cache
 Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
-Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
 Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
 Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
 Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"

{airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/RECORD RENAMED Viewed

@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
 airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
 airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
 airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
-airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=hgKamhOh1B8RA4Fx8FmCl4ORc7eO2h_RhxbkQovh3FM,23724
+airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
 airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
 airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
 airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
-airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=ZCYl6v0miacvpIt6M8FakkGZpEsY8SmB4_436sHEw9Y,126841
+airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Z1v19wOXYpuffvcmZ5TZyU4kSCFyt3Hba7qfY-2o46U,124229
 airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
 airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
 airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -97,19 +97,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=UrF
 airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=i2L0gREX8nHA-pKokdVqwBf4aJgWP71KOxIABj_DHcY,1857
 airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
 airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
-airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
+airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
 airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
 airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
 airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
 airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
 airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
-airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=qjzXe162aUcaq1n6B8KhA6Z9B8boM9yY8dAsLXll5-g,89872
+airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
 airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
-airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=KflzFl_ZKRSW9XkH16sfr5-9HnAAI0T5s8CVBrJK2Ao,8958
+airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
 airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
-airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=kGU0re3R-Ujtn6Gp96KpEVRHJB5P-B8sRa0aMR_jDdk,102536
+airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
 airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
 airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
 airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
@@ -159,9 +159,8 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp0
 airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
 airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
 airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
-airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
+airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
 airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
-airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=tP5DIEMn-k2JshWeXmo53ZEudDAVb4AJ50Z5tfme_ZU,8063
 airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
 airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
 airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
@@ -277,7 +276,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
 airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
 airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
 airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
-airbyte_cdk/sources/streams/http/http_client.py,sha256=Jqmbd3jL8jjnOfA1325-cpG3nE80YDMDwyxPZ08D7wo,21341
+airbyte_cdk/sources/streams/http/http_client.py,sha256=dyNrbcahEnDfGTrhqkr1XdfPiuVVRlKOdj-TJ5WRVrk,22923
 airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
 airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
 airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
@@ -334,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
 airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
 airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
 airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
-airbyte_cdk-6.9.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
-airbyte_cdk-6.9.0.dev0.dist-info/METADATA,sha256=VS_vbZgaobGxk6eCGkFZuQgCR-xzMrHzJWL2FpCnyM8,6112
-airbyte_cdk-6.9.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-airbyte_cdk-6.9.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
-airbyte_cdk-6.9.0.dev0.dist-info/RECORD,,
+airbyte_cdk-6.9.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
+airbyte_cdk-6.9.1.dist-info/METADATA,sha256=vmQrD-o8vQwVRNF1PSFviNR1x8VcdqsvNr42p8_8u18,5949
+airbyte_cdk-6.9.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+airbyte_cdk-6.9.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
+airbyte_cdk-6.9.1.dist-info/RECORD,,

airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py DELETED Viewed

@@ -1,219 +0,0 @@
-#
-# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
-#
-from copy import deepcopy
-from dataclasses import InitVar, dataclass
-from typing import Any, List, Mapping, MutableMapping, Optional, Union
-import dpath
-from typing_extensions import deprecated
-from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
-from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
-from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
-from airbyte_cdk.sources.source import ExperimentalClassWarning
-from airbyte_cdk.sources.types import Config
-AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
-    "string": {"type": ["null", "string"]},
-    "boolean": {"type": ["null", "boolean"]},
-    "date": {"type": ["null", "string"], "format": "date"},
-    "timestamp_without_timezone": {
-        "type": ["null", "string"],
-        "format": "date-time",
-        "airbyte_type": "timestamp_without_timezone",
-    },
-    "timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
-    "time_without_timezone": {
-        "type": ["null", "string"],
-        "format": "time",
-        "airbyte_type": "time_without_timezone",
-    },
-    "time_with_timezone": {
-        "type": ["null", "string"],
-        "format": "time",
-        "airbyte_type": "time_with_timezone",
-    },
-    "integer": {"type": ["null", "integer"]},
-    "number": {"type": ["null", "number"]},
-    "array": {"type": ["null", "array"]},
-    "object": {"type": ["null", "object"]},
-}
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
-@dataclass(frozen=True)
-class TypesMap:
-    """
-    Represents a mapping between a current type and its corresponding target type.
-    """
-    target_type: Union[List[str], str]
-    current_type: Union[List[str], str]
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
-@dataclass
-class SchemaTypeIdentifier:
-    """
-    Identifies schema details for dynamic schema extraction and processing.
-    """
-    key_pointer: List[Union[InterpolatedString, str]]
-    parameters: InitVar[Mapping[str, Any]]
-    type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
-    types_mapping: Optional[List[TypesMap]] = None
-    schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
-    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
-        self.schema_pointer = (
-            self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
-        )  # type: ignore[assignment]  # This is reqired field in model
-        self.key_pointer = self._update_pointer(self.key_pointer, parameters)  # type: ignore[assignment]  # This is reqired field in model
-        self.type_pointer = (
-            self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
-        )
-    @staticmethod
-    def _update_pointer(
-        pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
-    ) -> Optional[List[Union[InterpolatedString, str]]]:
-        return (
-            [
-                InterpolatedString.create(path, parameters=parameters)
-                if isinstance(path, str)
-                else path
-                for path in pointer
-            ]
-            if pointer
-            else None
-        )
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
-@dataclass
-class DynamicSchemaLoader(SchemaLoader):
-    """
-    Dynamically loads a JSON Schema by extracting data from retrieved records.
-    """
-    retriever: Retriever
-    config: Config
-    parameters: InitVar[Mapping[str, Any]]
-    schema_type_identifier: SchemaTypeIdentifier
-    def get_json_schema(self) -> Mapping[str, Any]:
-        """
-        Constructs a JSON Schema based on retrieved data.
-        """
-        properties = {}
-        retrieved_record = next(self.retriever.read_records({}), None)  # type: ignore[call-overload] # read_records return Iterable data type
-        raw_schema = (
-            self._extract_data(
-                retrieved_record,  # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
-                self.schema_type_identifier.schema_pointer,
-            )
-            if retrieved_record
-            else []
-        )
-        for property_definition in raw_schema:
-            key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
-            value = self._get_type(
-                property_definition,
-                self.schema_type_identifier.type_pointer,
-            )
-            properties[key] = value
-        return {
-            "$schema": "http://json-schema.org/draft-07/schema#",
-            "type": "object",
-            "properties": properties,
-        }
-    def _get_key(
-        self,
-        raw_schema: MutableMapping[str, Any],
-        field_key_path: List[Union[InterpolatedString, str]],
-    ) -> str:
-        """
-        Extracts the key field from the schema using the specified path.
-        """
-        field_key = self._extract_data(raw_schema, field_key_path)
-        if not isinstance(field_key, str):
-            raise ValueError(f"Expected key to be a string. Got {field_key}")
-        return field_key
-    def _get_type(
-        self,
-        raw_schema: MutableMapping[str, Any],
-        field_type_path: Optional[List[Union[InterpolatedString, str]]],
-    ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
-        """
-        Determines the JSON Schema type for a field, supporting nullable and combined types.
-        """
-        raw_field_type = (
-            self._extract_data(raw_schema, field_type_path, default="string")
-            if field_type_path
-            else "string"
-        )
-        mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
-        if (
-            isinstance(mapped_field_type, list)
-            and len(mapped_field_type) == 2
-            and all(isinstance(item, str) for item in mapped_field_type)
-        ):
-            first_type = self._get_airbyte_type(mapped_field_type[0])
-            second_type = self._get_airbyte_type(mapped_field_type[1])
-            return {"oneOf": [first_type, second_type]}
-        elif isinstance(mapped_field_type, str):
-            return self._get_airbyte_type(mapped_field_type)
-        else:
-            raise ValueError(
-                f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
-            )
-    def _replace_type_if_not_valid(
-        self, field_type: Union[List[str], str]
-    ) -> Union[List[str], str]:
-        """
-        Replaces a field type if it matches a type mapping in `types_map`.
-        """
-        if self.schema_type_identifier.types_mapping:
-            for types_map in self.schema_type_identifier.types_mapping:
-                if field_type == types_map.current_type:
-                    return types_map.target_type
-        return field_type
-    @staticmethod
-    def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
-        """
-        Maps a field type to its corresponding Airbyte type definition.
-        """
-        if field_type not in AIRBYTE_DATA_TYPES:
-            raise ValueError(f"Invalid Airbyte data type: {field_type}")
-        return deepcopy(AIRBYTE_DATA_TYPES[field_type])
-    def _extract_data(
-        self,
-        body: Mapping[str, Any],
-        extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
-        default: Any = None,
-    ) -> Any:
-        """
-        Extracts data from the body based on the provided extraction path.
-        """
-        if not extraction_path:
-            return body
-        path = [
-            node.eval(self.config) if not isinstance(node, str) else node
-            for node in extraction_path
-        ]
-        return dpath.get(body, path, default=default)  # type: ignore # extracted will be a MutableMapping, given input data structure

{airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-cdk 6.9.0.dev0__py3-none-any.whl → 6.9.1__py3-none-any.whl

airbyte-cdk 6.9.0.dev0py3-none-any.whl → 6.9.1py3-none-any.whl