PyPI - airbyte-cdk - Versions diffs - 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl - Mend

airbyte-cdk 6.5.3rc2py3-none-any.whl → 6.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

airbyte_cdk/sources/embedded/catalog.py CHANGED Viewed

@@ -31,15 +31,27 @@ def to_configured_stream(
     primary_key: Optional[List[List[str]]] = None,
 ) -> ConfiguredAirbyteStream:
     return ConfiguredAirbyteStream(
-        stream=stream, sync_mode=sync_mode, destination_sync_mode=destination_sync_mode, cursor_field=cursor_field, primary_key=primary_key
+        stream=stream,
+        sync_mode=sync_mode,
+        destination_sync_mode=destination_sync_mode,
+        cursor_field=cursor_field,
+        primary_key=primary_key,
     )
-def to_configured_catalog(configured_streams: List[ConfiguredAirbyteStream]) -> ConfiguredAirbyteCatalog:
+def to_configured_catalog(
+    configured_streams: List[ConfiguredAirbyteStream],
+) -> ConfiguredAirbyteCatalog:
     return ConfiguredAirbyteCatalog(streams=configured_streams)
-def create_configured_catalog(stream: AirbyteStream, sync_mode: SyncMode = SyncMode.full_refresh) -> ConfiguredAirbyteCatalog:
-    configured_streams = [to_configured_stream(stream, sync_mode=sync_mode, primary_key=stream.source_defined_primary_key)]
+def create_configured_catalog(
+    stream: AirbyteStream, sync_mode: SyncMode = SyncMode.full_refresh
+) -> ConfiguredAirbyteCatalog:
+    configured_streams = [
+        to_configured_stream(
+            stream, sync_mode=sync_mode, primary_key=stream.source_defined_primary_key
+        )
+    ]
     return to_configured_catalog(configured_streams)

airbyte_cdk/sources/embedded/runner.py CHANGED Viewed

@@ -8,7 +8,13 @@ from abc import ABC, abstractmethod
 from typing import Generic, Iterable, Optional
 from airbyte_cdk.connector import TConfig
-from airbyte_cdk.models import AirbyteCatalog, AirbyteMessage, AirbyteStateMessage, ConfiguredAirbyteCatalog, ConnectorSpecification
+from airbyte_cdk.models import (
+    AirbyteCatalog,
+    AirbyteMessage,
+    AirbyteStateMessage,
+    ConfiguredAirbyteCatalog,
+    ConnectorSpecification,
+)
 from airbyte_cdk.sources.source import Source
@@ -22,7 +28,12 @@ class SourceRunner(ABC, Generic[TConfig]):
         pass
     @abstractmethod
-    def read(self, config: TConfig, catalog: ConfiguredAirbyteCatalog, state: Optional[AirbyteStateMessage]) -> Iterable[AirbyteMessage]:
+    def read(
+        self,
+        config: TConfig,
+        catalog: ConfiguredAirbyteCatalog,
+        state: Optional[AirbyteStateMessage],
+    ) -> Iterable[AirbyteMessage]:
         pass
@@ -37,5 +48,10 @@ class CDKRunner(SourceRunner[TConfig]):
     def discover(self, config: TConfig) -> AirbyteCatalog:
         return self._source.discover(self._logger, config)
-    def read(self, config: TConfig, catalog: ConfiguredAirbyteCatalog, state: Optional[AirbyteStateMessage]) -> Iterable[AirbyteMessage]:
+    def read(
+        self,
+        config: TConfig,
+        catalog: ConfiguredAirbyteCatalog,
+        state: Optional[AirbyteStateMessage],
+    ) -> Iterable[AirbyteMessage]:
         return self._source.read(self._logger, config, catalog, state=[state] if state else [])

airbyte_cdk/sources/embedded/tools.py CHANGED Viewed

@@ -8,7 +8,9 @@ import dpath
 from airbyte_cdk.models import AirbyteStream
-def get_first(iterable: Iterable[Any], predicate: Callable[[Any], bool] = lambda m: True) -> Optional[Any]:
+def get_first(
+    iterable: Iterable[Any], predicate: Callable[[Any], bool] = lambda m: True
+) -> Optional[Any]:
     return next(filter(predicate, iterable), None)

airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py CHANGED Viewed

@@ -22,7 +22,9 @@ if TYPE_CHECKING:
 class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
     @abstractmethod
-    def check_availability(self, stream: Stream, logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]:
+    def check_availability(
+        self, stream: Stream, logger: logging.Logger, _: Optional[Source]
+    ) -> Tuple[bool, Optional[str]]:
         """
         Perform a connection check for the stream.
@@ -48,10 +50,16 @@ class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy)
         self.stream = stream
     def check_availability(self, logger: logging.Logger) -> StreamAvailability:
-        is_available, reason = self.stream.availability_strategy.check_availability(self.stream, logger, None)
+        is_available, reason = self.stream.availability_strategy.check_availability(
+            self.stream, logger, None
+        )
         if is_available:
             return StreamAvailable()
         return StreamUnavailable(reason or "")
-    def check_availability_and_parsability(self, logger: logging.Logger) -> Tuple[bool, Optional[str]]:
-        return self.stream.availability_strategy.check_availability_and_parsability(self.stream, logger, None)
+    def check_availability_and_parsability(
+        self, logger: logging.Logger
+    ) -> Tuple[bool, Optional[str]]:
+        return self.stream.availability_strategy.check_availability_and_parsability(
+            self.stream, logger, None
+        )

airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py CHANGED Viewed

@@ -8,8 +8,14 @@ from typing import TYPE_CHECKING, Optional, Tuple
 from airbyte_cdk import AirbyteTracedException
 from airbyte_cdk.sources import Source
-from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
-from airbyte_cdk.sources.file_based.exceptions import CheckAvailabilityError, CustomFileBasedException, FileBasedSourceError
+from airbyte_cdk.sources.file_based.availability_strategy import (
+    AbstractFileBasedAvailabilityStrategy,
+)
+from airbyte_cdk.sources.file_based.exceptions import (
+    CheckAvailabilityError,
+    CustomFileBasedException,
+    FileBasedSourceError,
+)
 from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
 from airbyte_cdk.sources.file_based.remote_file import RemoteFile
 from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema
@@ -22,7 +28,9 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
     def __init__(self, stream_reader: AbstractFileBasedStreamReader):
         self.stream_reader = stream_reader
-    def check_availability(self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]:  # type: ignore[override]
+    def check_availability(
+        self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
+    ) -> Tuple[bool, Optional[str]]:  # type: ignore[override]
         """
         Perform a connection check for the stream (verify that we can list files from the stream).
@@ -87,15 +95,25 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
         except CustomFileBasedException as exc:
             raise CheckAvailabilityError(str(exc), stream=stream.name) from exc
         except Exception as exc:
-            raise CheckAvailabilityError(FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name) from exc
+            raise CheckAvailabilityError(
+                FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name
+            ) from exc
         return file
-    def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
+    def _check_parse_record(
+        self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger
+    ) -> None:
         parser = stream.get_parser()
         try:
-            record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
+            record = next(
+                iter(
+                    parser.parse_records(
+                        stream.config, file, self.stream_reader, logger, discovered_schema=None
+                    )
+                )
+            )
         except StopIteration:
             # The file is empty. We've verified that we can open it, so will
             # consider the connection check successful even though it means
@@ -104,7 +122,9 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
         except AirbyteTracedException as ate:
             raise ate
         except Exception as exc:
-            raise CheckAvailabilityError(FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri) from exc
+            raise CheckAvailabilityError(
+                FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri
+            ) from exc
         schema = stream.catalog_schema or stream.config.input_schema
         if schema and stream.validation_policy.validate_schema_before_sync:

airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py CHANGED Viewed

@@ -107,10 +107,16 @@ class AbstractFileBasedSpec(BaseModel):
         properties_to_change = ["validation_policy"]
         for property_to_change in properties_to_change:
-            property_object = schema["properties"]["streams"]["items"]["properties"][property_to_change]
+            property_object = schema["properties"]["streams"]["items"]["properties"][
+                property_to_change
+            ]
             if "anyOf" in property_object:
-                schema["properties"]["streams"]["items"]["properties"][property_to_change]["type"] = "object"
-                schema["properties"]["streams"]["items"]["properties"][property_to_change]["oneOf"] = property_object.pop("anyOf")
+                schema["properties"]["streams"]["items"]["properties"][property_to_change][
+                    "type"
+                ] = "object"
+                schema["properties"]["streams"]["items"]["properties"][property_to_change][
+                    "oneOf"
+                ] = property_object.pop("anyOf")
             AbstractFileBasedSpec.move_enum_to_root(property_object)
         csv_format_schemas = list(
@@ -121,9 +127,9 @@ class AbstractFileBasedSpec(BaseModel):
         )
         if len(csv_format_schemas) != 1:
             raise ValueError(f"Expecting only one CSV format but got {csv_format_schemas}")
-        csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0]["properties"]["header_definition"].pop(
-            "anyOf", []
-        )
+        csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0][
+            "properties"
+        ]["header_definition"].pop("anyOf", [])
         csv_format_schemas[0]["properties"]["header_definition"]["type"] = "object"
         return schema

airbyte_cdk/sources/file_based/config/csv_format.py CHANGED Viewed

@@ -70,7 +70,9 @@ class CsvHeaderUserProvided(BaseModel):
     @validator("column_names")
     def validate_column_names(cls, v: List[str]) -> List[str]:
         if not v:
-            raise ValueError("At least one column name needs to be provided when using user provided headers")
+            raise ValueError(
+                "At least one column name needs to be provided when using user provided headers"
+            )
         return v
@@ -107,7 +109,9 @@ class CsvFormat(BaseModel):
         description='The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">list of python encodings</a> for allowable options.',
     )
     double_quote: bool = Field(
-        title="Double Quote", default=True, description="Whether two quotes in a quoted CSV value denote a single quote in the data."
+        title="Double Quote",
+        default=True,
+        description="Whether two quotes in a quoted CSV value denote a single quote in the data.",
     )
     null_values: Set[str] = Field(
         title="Null Values",
@@ -125,12 +129,16 @@ class CsvFormat(BaseModel):
         description="The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
     )
     skip_rows_after_header: int = Field(
-        title="Skip Rows After Header", default=0, description="The number of rows to skip after the header row."
+        title="Skip Rows After Header",
+        default=0,
+        description="The number of rows to skip after the header row.",
     )
-    header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = Field(
-        title="CSV Header Definition",
-        default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
-        description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
+    header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = (
+        Field(
+            title="CSV Header Definition",
+            default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
+            description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
+        )
     )
     true_values: Set[str] = Field(
         title="True Values",
@@ -189,9 +197,13 @@ class CsvFormat(BaseModel):
         definition_type = values.get("header_definition_type")
         column_names = values.get("user_provided_column_names")
         if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names:
-            raise ValidationError("`user_provided_column_names` should be defined if the definition 'User Provided'.", model=CsvFormat)
+            raise ValidationError(
+                "`user_provided_column_names` should be defined if the definition 'User Provided'.",
+                model=CsvFormat,
+            )
         if definition_type != CsvHeaderDefinitionType.USER_PROVIDED and column_names:
             raise ValidationError(
-                "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.", model=CsvFormat
+                "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.",
+                model=CsvFormat,
             )
         return values

airbyte_cdk/sources/file_based/config/file_based_stream_config.py CHANGED Viewed

@@ -56,7 +56,9 @@ class FileBasedStreamConfig(BaseModel):
         description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
         default=3,
     )
-    format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat] = Field(
+    format: Union[
+        AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat
+    ] = Field(
         title="Format",
         description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
     )
@@ -89,6 +91,8 @@ class FileBasedStreamConfig(BaseModel):
         if self.input_schema:
             schema = type_mapping_to_jsonschema(self.input_schema)
             if not schema:
-                raise ValueError(f"Unable to create JSON schema from input schema {self.input_schema}")
+                raise ValueError(
+                    f"Unable to create JSON schema from input schema {self.input_schema}"
+                )
             return schema
         return None

airbyte_cdk/sources/file_based/config/unstructured_format.py CHANGED Viewed

@@ -13,7 +13,9 @@ class LocalProcessingConfigModel(BaseModel):
     class Config(OneOfOptionConfig):
         title = "Local"
-        description = "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
+        description = (
+            "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
+        )
         discriminator = "mode"
@@ -23,7 +25,9 @@ class APIParameterConfigModel(BaseModel):
         description="The name of the unstructured API parameter to use",
         examples=["combine_under_n_chars", "languages"],
     )
-    value: str = Field(title="Value", description="The value of the parameter", examples=["true", "hi_res"])
+    value: str = Field(
+        title="Value", description="The value of the parameter", examples=["true", "hi_res"]
+    )
 class APIProcessingConfigModel(BaseModel):
@@ -85,7 +89,10 @@ class UnstructuredFormat(BaseModel):
         description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf",
     )
-    processing: Union[LocalProcessingConfigModel, APIProcessingConfigModel,] = Field(
+    processing: Union[
+        LocalProcessingConfigModel,
+        APIProcessingConfigModel,
+    ] = Field(
         default=LocalProcessingConfigModel(mode="local"),
         title="Processing",
         description="Processing configuration",

airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py CHANGED Viewed

@@ -15,9 +15,7 @@ class AbstractDiscoveryPolicy(ABC):
     @property
     @abstractmethod
-    def n_concurrent_requests(self) -> int:
-        ...
+    def n_concurrent_requests(self) -> int: ...
     @abstractmethod
-    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
-        ...
+    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...

airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py CHANGED Viewed

@@ -2,7 +2,9 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
-from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
+from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
+    AbstractDiscoveryPolicy,
+)
 from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
 DEFAULT_N_CONCURRENT_REQUESTS = 10
@@ -23,6 +25,9 @@ class DefaultDiscoveryPolicy(AbstractDiscoveryPolicy):
         return min(
             filter(
                 None,
-                (DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE, parser.parser_max_n_files_for_schema_inference),
+                (
+                    DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
+                    parser.parser_max_n_files_for_schema_inference,
+                ),
             )
         )

airbyte_cdk/sources/file_based/exceptions.py CHANGED Viewed

@@ -11,27 +11,21 @@ from airbyte_cdk.utils import AirbyteTracedException
 class FileBasedSourceError(Enum):
     EMPTY_STREAM = "No files were identified in the stream. This may be because there are no files in the specified container, or because your glob patterns did not match any files. Please verify that your source contains files last modified after the start_date and that your glob patterns are not overly strict."
-    GLOB_PARSE_ERROR = (
-        "Error parsing glob pattern. Please refer to the glob pattern rules at https://facelessuser.github.io/wcmatch/glob/#split."
-    )
+    GLOB_PARSE_ERROR = "Error parsing glob pattern. Please refer to the glob pattern rules at https://facelessuser.github.io/wcmatch/glob/#split."
     ENCODING_ERROR = "File encoding error. The configured encoding must match file encoding."
     ERROR_CASTING_VALUE = "Could not cast the value to the expected type."
     ERROR_CASTING_VALUE_UNRECOGNIZED_TYPE = "Could not cast the value to the expected type because the type is not recognized. Valid types are null, array, boolean, integer, number, object, and string."
     ERROR_DECODING_VALUE = "Expected a JSON-decodeable value but could not decode record."
-    ERROR_LISTING_FILES = (
-        "Error listing files. Please check the credentials provided in the config and verify that they provide permission to list files."
-    )
-    ERROR_READING_FILE = (
-        "Error opening file. Please check the credentials provided in the config and verify that they provide permission to read files."
-    )
+    ERROR_LISTING_FILES = "Error listing files. Please check the credentials provided in the config and verify that they provide permission to list files."
+    ERROR_READING_FILE = "Error opening file. Please check the credentials provided in the config and verify that they provide permission to read files."
     ERROR_PARSING_RECORD = "Error parsing record. This could be due to a mismatch between the config's file type and the actual file type, or because the file or record is not parseable."
-    ERROR_PARSING_USER_PROVIDED_SCHEMA = "The provided schema could not be transformed into valid JSON Schema."
+    ERROR_PARSING_USER_PROVIDED_SCHEMA = (
+        "The provided schema could not be transformed into valid JSON Schema."
+    )
     ERROR_VALIDATING_RECORD = "One or more records do not pass the schema validation policy. Please modify your input schema, or select a more lenient validation policy."
     ERROR_PARSING_RECORD_MISMATCHED_COLUMNS = "A header field has resolved to `None`. This indicates that the CSV has more rows than the number of header fields. If you input your schema or headers, please verify that the number of columns corresponds to the number of columns in your CSV's rows."
     ERROR_PARSING_RECORD_MISMATCHED_ROWS = "A row's value has resolved to `None`. This indicates that the CSV has more columns in the header field than the number of columns in the row(s). If you input your schema or headers, please verify that the number of columns corresponds to the number of columns in your CSV's rows."
-    STOP_SYNC_PER_SCHEMA_VALIDATION_POLICY = (
-        "Stopping sync in accordance with the configured validation policy. Records in file did not conform to the schema."
-    )
+    STOP_SYNC_PER_SCHEMA_VALIDATION_POLICY = "Stopping sync in accordance with the configured validation policy. Records in file did not conform to the schema."
     NULL_VALUE_IN_SCHEMA = "Error during schema inference: no type was detected for key."
     UNRECOGNIZED_TYPE = "Error during schema inference: unrecognized type."
     SCHEMA_INFERENCE_ERROR = "Error inferring schema from files. Are the files valid?"
@@ -39,7 +33,9 @@ class FileBasedSourceError(Enum):
     CONFIG_VALIDATION_ERROR = "Error creating stream config object."
     MISSING_SCHEMA = "Expected `json_schema` in the configured catalog but it is missing."
     UNDEFINED_PARSER = "No parser is defined for this file type."
-    UNDEFINED_VALIDATION_POLICY = "The validation policy defined in the config does not exist for the source."
+    UNDEFINED_VALIDATION_POLICY = (
+        "The validation policy defined in the config does not exist for the source."
+    )
 class FileBasedErrorsCollector:
@@ -70,7 +66,9 @@ class BaseFileBasedSourceError(Exception):
     def __init__(self, error: Union[FileBasedSourceError, str], **kwargs):  # type: ignore # noqa
         if isinstance(error, FileBasedSourceError):
             error = FileBasedSourceError(error).value
-        super().__init__(f"{error} Contact Support if you need assistance.\n{' '.join([f'{k}={v}' for k, v in kwargs.items()])}")
+        super().__init__(
+            f"{error} Contact Support if you need assistance.\n{' '.join([f'{k}={v}' for k, v in kwargs.items()])}"
+        )
 class ConfigValidationError(BaseFileBasedSourceError):

airbyte_cdk/sources/file_based/file_based_source.py CHANGED Viewed

@@ -22,15 +22,31 @@ from airbyte_cdk.models import (
 from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
 from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
 from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
-from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
+from airbyte_cdk.sources.file_based.availability_strategy import (
+    AbstractFileBasedAvailabilityStrategy,
+    DefaultFileBasedAvailabilityStrategy,
+)
 from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
-from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, ValidationPolicy
-from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
-from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedErrorsCollector, FileBasedSourceError
+from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
+    FileBasedStreamConfig,
+    ValidationPolicy,
+)
+from airbyte_cdk.sources.file_based.discovery_policy import (
+    AbstractDiscoveryPolicy,
+    DefaultDiscoveryPolicy,
+)
+from airbyte_cdk.sources.file_based.exceptions import (
+    ConfigValidationError,
+    FileBasedErrorsCollector,
+    FileBasedSourceError,
+)
 from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
 from airbyte_cdk.sources.file_based.file_types import default_parsers
 from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
-from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
+from airbyte_cdk.sources.file_based.schema_validation_policies import (
+    DEFAULT_SCHEMA_VALIDATION_POLICIES,
+    AbstractSchemaValidationPolicy,
+)
 from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
 from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
 from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
@@ -65,25 +81,37 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
         availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
         discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
         parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
-        validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
-        cursor_cls: Type[Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor]] = FileBasedConcurrentCursor,
+        validation_policies: Mapping[
+            ValidationPolicy, AbstractSchemaValidationPolicy
+        ] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
+        cursor_cls: Type[
+            Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor]
+        ] = FileBasedConcurrentCursor,
     ):
         self.stream_reader = stream_reader
         self.spec_class = spec_class
         self.config = config
         self.catalog = catalog
         self.state = state
-        self.availability_strategy = availability_strategy or DefaultFileBasedAvailabilityStrategy(stream_reader)
+        self.availability_strategy = availability_strategy or DefaultFileBasedAvailabilityStrategy(
+            stream_reader
+        )
         self.discovery_policy = discovery_policy
         self.parsers = parsers
         self.validation_policies = validation_policies
-        self.stream_schemas = {s.stream.name: s.stream.json_schema for s in catalog.streams} if catalog else {}
+        self.stream_schemas = (
+            {s.stream.name: s.stream.json_schema for s in catalog.streams} if catalog else {}
+        )
         self.cursor_cls = cursor_cls
         self.logger = init_logger(f"airbyte.{self.name}")
         self.errors_collector: FileBasedErrorsCollector = FileBasedErrorsCollector()
         self._message_repository: Optional[MessageRepository] = None
         concurrent_source = ConcurrentSource.create(
-            MAX_CONCURRENCY, INITIAL_N_PARTITIONS, self.logger, self._slice_logger, self.message_repository
+            MAX_CONCURRENCY,
+            INITIAL_N_PARTITIONS,
+            self.logger,
+            self._slice_logger,
+            self.message_repository,
         )
         self._state = None
         super().__init__(concurrent_source)
@@ -91,10 +119,14 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
     @property
     def message_repository(self) -> MessageRepository:
         if self._message_repository is None:
-            self._message_repository = InMemoryMessageRepository(Level(AirbyteLogFormatter.level_mapping[self.logger.level]))
+            self._message_repository = InMemoryMessageRepository(
+                Level(AirbyteLogFormatter.level_mapping[self.logger.level])
+            )
         return self._message_repository
-    def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
+    def check_connection(
+        self, logger: logging.Logger, config: Mapping[str, Any]
+    ) -> Tuple[bool, Optional[Any]]:
         """
         Check that the source can be accessed using the user-provided configuration.
@@ -195,13 +227,21 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
                 sync_mode = self._get_sync_mode_from_catalog(stream_config.name)
-                if sync_mode == SyncMode.full_refresh and hasattr(self, "_concurrency_level") and self._concurrency_level is not None:
+                if (
+                    sync_mode == SyncMode.full_refresh
+                    and hasattr(self, "_concurrency_level")
+                    and self._concurrency_level is not None
+                ):
                     cursor = FileBasedFinalStateCursor(
-                        stream_config=stream_config, stream_namespace=None, message_repository=self.message_repository
+                        stream_config=stream_config,
+                        stream_namespace=None,
+                        message_repository=self.message_repository,
                     )
                     stream = FileBasedStreamFacade.create_from_stream(
                         stream=self._make_default_stream(
-                            stream_config=stream_config, cursor=cursor, use_file_transfer=self._use_file_transfer(parsed_config)
+                            stream_config=stream_config,
+                            cursor=cursor,
+                            use_file_transfer=self._use_file_transfer(parsed_config),
                         ),
                         source=self,
                         logger=self.logger,
@@ -230,7 +270,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
                     )
                     stream = FileBasedStreamFacade.create_from_stream(
                         stream=self._make_default_stream(
-                            stream_config=stream_config, cursor=cursor, use_file_transfer=self._use_file_transfer(parsed_config)
+                            stream_config=stream_config,
+                            cursor=cursor,
+                            use_file_transfer=self._use_file_transfer(parsed_config),
                         ),
                         source=self,
                         logger=self.logger,
@@ -240,7 +282,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
                 else:
                     cursor = self.cursor_cls(stream_config)
                     stream = self._make_default_stream(
-                        stream_config=stream_config, cursor=cursor, use_file_transfer=self._use_file_transfer(parsed_config)
+                        stream_config=stream_config,
+                        cursor=cursor,
+                        use_file_transfer=self._use_file_transfer(parsed_config),
                     )
                 streams.append(stream)
@@ -250,7 +294,10 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
             raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR) from exc
     def _make_default_stream(
-        self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor], use_file_transfer: bool = False
+        self,
+        stream_config: FileBasedStreamConfig,
+        cursor: Optional[AbstractFileBasedCursor],
+        use_file_transfer: bool = False,
     ) -> AbstractFileBasedStream:
         return DefaultFileBasedStream(
             config=stream_config,
@@ -265,7 +312,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
             use_file_transfer=use_file_transfer,
         )
-    def _get_stream_from_catalog(self, stream_config: FileBasedStreamConfig) -> Optional[AirbyteStream]:
+    def _get_stream_from_catalog(
+        self, stream_config: FileBasedStreamConfig
+    ) -> Optional[AirbyteStream]:
         if self.catalog:
             for stream in self.catalog.streams or []:
                 if stream.stream.name == stream_config.name:
@@ -292,7 +341,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
         yield from self.errors_collector.yield_and_raise_collected()
         # count streams using a certain parser
         parsed_config = self._get_parsed_config(config)
-        for parser, count in Counter(stream.format.filetype for stream in parsed_config.streams).items():
+        for parser, count in Counter(
+            stream.format.filetype for stream in parsed_config.streams
+        ).items():
             yield create_analytics_message(f"file-cdk-{parser}-stream-count", count)
     def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
@@ -308,21 +359,28 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
     def _get_parsed_config(self, config: Mapping[str, Any]) -> AbstractFileBasedSpec:
         return self.spec_class(**config)
-    def _validate_and_get_validation_policy(self, stream_config: FileBasedStreamConfig) -> AbstractSchemaValidationPolicy:
+    def _validate_and_get_validation_policy(
+        self, stream_config: FileBasedStreamConfig
+    ) -> AbstractSchemaValidationPolicy:
         if stream_config.validation_policy not in self.validation_policies:
             # This should never happen because we validate the config against the schema's validation_policy enum
             raise ValidationError(
-                f"`validation_policy` must be one of {list(self.validation_policies.keys())}", model=FileBasedStreamConfig
+                f"`validation_policy` must be one of {list(self.validation_policies.keys())}",
+                model=FileBasedStreamConfig,
             )
         return self.validation_policies[stream_config.validation_policy]
     def _validate_input_schema(self, stream_config: FileBasedStreamConfig) -> None:
         if stream_config.schemaless and stream_config.input_schema:
-            raise ValidationError("`input_schema` and `schemaless` options cannot both be set", model=FileBasedStreamConfig)
+            raise ValidationError(
+                "`input_schema` and `schemaless` options cannot both be set",
+                model=FileBasedStreamConfig,
+            )
     @staticmethod
     def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
         use_file_transfer = (
-            hasattr(parsed_config.delivery_method, "delivery_type") and parsed_config.delivery_method.delivery_type == "use_file_transfer"
+            hasattr(parsed_config.delivery_method, "delivery_type")
+            and parsed_config.delivery_method.delivery_type == "use_file_transfer"
         )
         return use_file_transfer

airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

airbyte-cdk 6.5.3rc2py3-none-any.whl → 6.6.0py3-none-any.whl