airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
- airbyte_cdk/sources/file_based/exceptions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +28 -5
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
- airbyte_cdk/sources/types.py +3 -0
- airbyte_cdk/sources/utils/transform.py +29 -3
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
| @@ -7,9 +7,12 @@ from dataclasses import dataclass | |
| 7 7 | 
             
            from io import BufferedIOBase, TextIOWrapper
         | 
| 8 8 | 
             
            from typing import Any, Generator, MutableMapping, Optional
         | 
| 9 9 |  | 
| 10 | 
            +
            import orjson
         | 
| 10 11 | 
             
            import requests
         | 
| 11 12 |  | 
| 13 | 
            +
            from airbyte_cdk.models import FailureType
         | 
| 12 14 | 
             
            from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
         | 
| 15 | 
            +
            from airbyte_cdk.utils import AirbyteTracedException
         | 
| 13 16 |  | 
| 14 17 | 
             
            logger = logging.getLogger("airbyte")
         | 
| 15 18 |  | 
| @@ -42,6 +45,46 @@ class GzipParser(Parser): | |
| 42 45 | 
             
                        yield from self.inner_parser.parse(gzipobj)
         | 
| 43 46 |  | 
| 44 47 |  | 
| 48 | 
            +
            @dataclass
         | 
| 49 | 
            +
            class JsonParser(Parser):
         | 
| 50 | 
            +
                encoding: str = "utf-8"
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
         | 
| 53 | 
            +
                    """
         | 
| 54 | 
            +
                    Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
         | 
| 55 | 
            +
                    """
         | 
| 56 | 
            +
                    raw_data = data.read()
         | 
| 57 | 
            +
                    body_json = self._parse_orjson(raw_data) or self._parse_json(raw_data)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    if body_json is None:
         | 
| 60 | 
            +
                        raise AirbyteTracedException(
         | 
| 61 | 
            +
                            message="Response JSON data failed to be parsed. See logs for more information.",
         | 
| 62 | 
            +
                            internal_message=f"Response JSON data failed to be parsed.",
         | 
| 63 | 
            +
                            failure_type=FailureType.system_error,
         | 
| 64 | 
            +
                        )
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    if isinstance(body_json, list):
         | 
| 67 | 
            +
                        yield from body_json
         | 
| 68 | 
            +
                    else:
         | 
| 69 | 
            +
                        yield from [body_json]
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                def _parse_orjson(self, raw_data: bytes) -> Optional[Any]:
         | 
| 72 | 
            +
                    try:
         | 
| 73 | 
            +
                        return orjson.loads(raw_data.decode(self.encoding))
         | 
| 74 | 
            +
                    except Exception as exc:
         | 
| 75 | 
            +
                        logger.debug(
         | 
| 76 | 
            +
                            f"Failed to parse JSON data using orjson library. Falling back to json library. {exc}"
         | 
| 77 | 
            +
                        )
         | 
| 78 | 
            +
                        return None
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def _parse_json(self, raw_data: bytes) -> Optional[Any]:
         | 
| 81 | 
            +
                    try:
         | 
| 82 | 
            +
                        return json.loads(raw_data.decode(self.encoding))
         | 
| 83 | 
            +
                    except Exception as exc:
         | 
| 84 | 
            +
                        logger.error(f"Failed to parse JSON data using json library. {exc}")
         | 
| 85 | 
            +
                        return None
         | 
| 86 | 
            +
             | 
| 87 | 
            +
             | 
| 45 88 | 
             
            @dataclass
         | 
| 46 89 | 
             
            class JsonLineParser(Parser):
         | 
| 47 90 | 
             
                encoding: Optional[str] = "utf-8"
         | 
| @@ -0,0 +1,59 @@ | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            import logging
         | 
| 6 | 
            +
            import zipfile
         | 
| 7 | 
            +
            from dataclasses import dataclass
         | 
| 8 | 
            +
            from io import BytesIO
         | 
| 9 | 
            +
            from typing import Any, Generator, MutableMapping
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            import orjson
         | 
| 12 | 
            +
            import requests
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            from airbyte_cdk.models import FailureType
         | 
| 15 | 
            +
            from airbyte_cdk.sources.declarative.decoders import Decoder
         | 
| 16 | 
            +
            from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
         | 
| 17 | 
            +
                Parser,
         | 
| 18 | 
            +
            )
         | 
| 19 | 
            +
            from airbyte_cdk.utils import AirbyteTracedException
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            logger = logging.getLogger("airbyte")
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
            @dataclass
         | 
| 25 | 
            +
            class ZipfileDecoder(Decoder):
         | 
| 26 | 
            +
                parser: Parser
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def is_stream_response(self) -> bool:
         | 
| 29 | 
            +
                    return False
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def decode(
         | 
| 32 | 
            +
                    self, response: requests.Response
         | 
| 33 | 
            +
                ) -> Generator[MutableMapping[str, Any], None, None]:
         | 
| 34 | 
            +
                    try:
         | 
| 35 | 
            +
                        with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
         | 
| 36 | 
            +
                            for file_name in zip_file.namelist():
         | 
| 37 | 
            +
                                unzipped_content = zip_file.read(file_name)
         | 
| 38 | 
            +
                                buffered_content = BytesIO(unzipped_content)
         | 
| 39 | 
            +
                                try:
         | 
| 40 | 
            +
                                    yield from self.parser.parse(buffered_content)
         | 
| 41 | 
            +
                                except Exception as e:
         | 
| 42 | 
            +
                                    logger.error(
         | 
| 43 | 
            +
                                        f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
         | 
| 44 | 
            +
                                    )
         | 
| 45 | 
            +
                                    raise AirbyteTracedException(
         | 
| 46 | 
            +
                                        message=f"Failed to parse file: {file_name} from zip file.",
         | 
| 47 | 
            +
                                        internal_message=f"Failed to parse file: {file_name} from zip file: {response.request.url}.",
         | 
| 48 | 
            +
                                        failure_type=FailureType.system_error,
         | 
| 49 | 
            +
                                    ) from e
         | 
| 50 | 
            +
                    except zipfile.BadZipFile as e:
         | 
| 51 | 
            +
                        logger.error(
         | 
| 52 | 
            +
                            f"Received an invalid zip file in response to URL: {response.request.url}. "
         | 
| 53 | 
            +
                            f"The size of the response body is: {len(response.content)}"
         | 
| 54 | 
            +
                        )
         | 
| 55 | 
            +
                        raise AirbyteTracedException(
         | 
| 56 | 
            +
                            message="Received an invalid zip file in response.",
         | 
| 57 | 
            +
                            internal_message=f"Received an invalid zip file in response to URL: {response.request.url}.",
         | 
| 58 | 
            +
                            failure_type=FailureType.system_error,
         | 
| 59 | 
            +
                        ) from e
         | 
| @@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter): | |
| 59 59 |  | 
| 60 60 | 
             
                def __init__(
         | 
| 61 61 | 
             
                    self,
         | 
| 62 | 
            -
                     | 
| 62 | 
            +
                    date_time_based_cursor: DatetimeBasedCursor,
         | 
| 63 | 
            +
                    substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
         | 
| 63 64 | 
             
                    **kwargs: Any,
         | 
| 64 65 | 
             
                ):
         | 
| 65 66 | 
             
                    super().__init__(**kwargs)
         | 
| 66 | 
            -
                    self. | 
| 67 | 
            +
                    self._date_time_based_cursor = date_time_based_cursor
         | 
| 68 | 
            +
                    self._substream_cursor = substream_cursor
         | 
| 67 69 |  | 
| 68 70 | 
             
                def filter_records(
         | 
| 69 71 | 
             
                    self,
         | 
| @@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter): | |
| 75 77 | 
             
                    records = (
         | 
| 76 78 | 
             
                        record
         | 
| 77 79 | 
             
                        for record in records
         | 
| 78 | 
            -
                        if self. | 
| 80 | 
            +
                        if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
         | 
| 79 81 | 
             
                            # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
         | 
| 80 82 | 
             
                            # Record stream name is empty cause it is not used durig the filtering
         | 
| 81 83 | 
             
                            Record(data=record, associated_slice=stream_slice, stream_name="")
         | 
| @@ -2,10 +2,6 @@ | |
| 2 2 | 
             
            # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
         | 
| 3 3 | 
             
            #
         | 
| 4 4 |  | 
| 5 | 
            -
            from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
         | 
| 6 | 
            -
                ConcurrentCursorFactory,
         | 
| 7 | 
            -
                ConcurrentPerPartitionCursor,
         | 
| 8 | 
            -
            )
         | 
| 9 5 | 
             
            from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
         | 
| 10 6 | 
             
            from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
         | 
| 11 7 | 
             
            from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
         | 
| @@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i | |
| 25 21 |  | 
| 26 22 | 
             
            __all__ = [
         | 
| 27 23 | 
             
                "CursorFactory",
         | 
| 28 | 
            -
                "ConcurrentCursorFactory",
         | 
| 29 | 
            -
                "ConcurrentPerPartitionCursor",
         | 
| 30 24 | 
             
                "DatetimeBasedCursor",
         | 
| 31 25 | 
             
                "DeclarativeCursor",
         | 
| 32 26 | 
             
                "GlobalSubstreamCursor",
         | 
| @@ -303,21 +303,6 @@ class PerPartitionCursor(DeclarativeCursor): | |
| 303 303 | 
             
                        raise ValueError("A partition needs to be provided in order to get request body json")
         | 
| 304 304 |  | 
| 305 305 | 
             
                def should_be_synced(self, record: Record) -> bool:
         | 
| 306 | 
            -
                    if (
         | 
| 307 | 
            -
                        record.associated_slice
         | 
| 308 | 
            -
                        and self._to_partition_key(record.associated_slice.partition)
         | 
| 309 | 
            -
                        not in self._cursor_per_partition
         | 
| 310 | 
            -
                    ):
         | 
| 311 | 
            -
                        partition_state = (
         | 
| 312 | 
            -
                            self._state_to_migrate_from
         | 
| 313 | 
            -
                            if self._state_to_migrate_from
         | 
| 314 | 
            -
                            else self._NO_CURSOR_STATE
         | 
| 315 | 
            -
                        )
         | 
| 316 | 
            -
                        cursor = self._create_cursor(partition_state)
         | 
| 317 | 
            -
             | 
| 318 | 
            -
                        self._cursor_per_partition[
         | 
| 319 | 
            -
                            self._to_partition_key(record.associated_slice.partition)
         | 
| 320 | 
            -
                        ] = cursor
         | 
| 321 306 | 
             
                    return self._get_cursor(record).should_be_synced(
         | 
| 322 307 | 
             
                        self._convert_record_to_cursor_record(record)
         | 
| 323 308 | 
             
                    )
         | 
| @@ -22,6 +22,7 @@ from airbyte_cdk.models import ( | |
| 22 22 | 
             
                ConnectorSpecification,
         | 
| 23 23 | 
             
                FailureType,
         | 
| 24 24 | 
             
            )
         | 
| 25 | 
            +
            from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
         | 
| 25 26 | 
             
            from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
         | 
| 26 27 | 
             
            from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
         | 
| 27 28 | 
             
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         | 
| @@ -107,7 +108,7 @@ class ManifestDeclarativeSource(DeclarativeSource): | |
| 107 108 | 
             
                    if "type" not in check:
         | 
| 108 109 | 
             
                        check["type"] = "CheckStream"
         | 
| 109 110 | 
             
                    check_stream = self._constructor.create_component(
         | 
| 110 | 
            -
                         | 
| 111 | 
            +
                        COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
         | 
| 111 112 | 
             
                        check,
         | 
| 112 113 | 
             
                        dict(),
         | 
| 113 114 | 
             
                        emit_connector_builder_messages=self._emit_connector_builder_messages,
         | 
| @@ -52,6 +52,15 @@ class CheckStream(BaseModel): | |
| 52 52 | 
             
                )
         | 
| 53 53 |  | 
| 54 54 |  | 
| 55 | 
            +
            class CheckDynamicStream(BaseModel):
         | 
| 56 | 
            +
                type: Literal["CheckDynamicStream"]
         | 
| 57 | 
            +
                stream_count: int = Field(
         | 
| 58 | 
            +
                    ...,
         | 
| 59 | 
            +
                    description="Numbers of the streams to try reading from when running a check operation.",
         | 
| 60 | 
            +
                    title="Stream Count",
         | 
| 61 | 
            +
                )
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 55 64 | 
             
            class ConcurrencyLevel(BaseModel):
         | 
| 56 65 | 
             
                type: Optional[Literal["ConcurrencyLevel"]] = None
         | 
| 57 66 | 
             
                default_concurrency: Union[int, str] = Field(
         | 
| @@ -481,12 +490,24 @@ class RefreshTokenUpdater(BaseModel): | |
| 481 490 |  | 
| 482 491 | 
             
            class OAuthAuthenticator(BaseModel):
         | 
| 483 492 | 
             
                type: Literal["OAuthAuthenticator"]
         | 
| 493 | 
            +
                client_id_name: Optional[str] = Field(
         | 
| 494 | 
            +
                    "client_id",
         | 
| 495 | 
            +
                    description="The name of the property to use to refresh the `access_token`.",
         | 
| 496 | 
            +
                    examples=["custom_app_id"],
         | 
| 497 | 
            +
                    title="Client ID Property Name",
         | 
| 498 | 
            +
                )
         | 
| 484 499 | 
             
                client_id: str = Field(
         | 
| 485 500 | 
             
                    ...,
         | 
| 486 501 | 
             
                    description="The OAuth client ID. Fill it in the user inputs.",
         | 
| 487 502 | 
             
                    examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
         | 
| 488 503 | 
             
                    title="Client ID",
         | 
| 489 504 | 
             
                )
         | 
| 505 | 
            +
                client_secret_name: Optional[str] = Field(
         | 
| 506 | 
            +
                    "client_secret",
         | 
| 507 | 
            +
                    description="The name of the property to use to refresh the `access_token`.",
         | 
| 508 | 
            +
                    examples=["custom_app_secret"],
         | 
| 509 | 
            +
                    title="Client Secret Property Name",
         | 
| 510 | 
            +
                )
         | 
| 490 511 | 
             
                client_secret: str = Field(
         | 
| 491 512 | 
             
                    ...,
         | 
| 492 513 | 
             
                    description="The OAuth client secret. Fill it in the user inputs.",
         | 
| @@ -496,6 +517,12 @@ class OAuthAuthenticator(BaseModel): | |
| 496 517 | 
             
                    ],
         | 
| 497 518 | 
             
                    title="Client Secret",
         | 
| 498 519 | 
             
                )
         | 
| 520 | 
            +
                refresh_token_name: Optional[str] = Field(
         | 
| 521 | 
            +
                    "refresh_token",
         | 
| 522 | 
            +
                    description="The name of the property to use to refresh the `access_token`.",
         | 
| 523 | 
            +
                    examples=["custom_app_refresh_value"],
         | 
| 524 | 
            +
                    title="Refresh Token Property Name",
         | 
| 525 | 
            +
                )
         | 
| 499 526 | 
             
                refresh_token: Optional[str] = Field(
         | 
| 500 527 | 
             
                    None,
         | 
| 501 528 | 
             
                    description="Credential artifact used to get a new access token.",
         | 
| @@ -529,6 +556,12 @@ class OAuthAuthenticator(BaseModel): | |
| 529 556 | 
             
                    examples=["expires_in"],
         | 
| 530 557 | 
             
                    title="Token Expiry Property Name",
         | 
| 531 558 | 
             
                )
         | 
| 559 | 
            +
                grant_type_name: Optional[str] = Field(
         | 
| 560 | 
            +
                    "grant_type",
         | 
| 561 | 
            +
                    description="The name of the property to use to refresh the `access_token`.",
         | 
| 562 | 
            +
                    examples=["custom_grant_type"],
         | 
| 563 | 
            +
                    title="Grant Type Property Name",
         | 
| 564 | 
            +
                )
         | 
| 532 565 | 
             
                grant_type: Optional[str] = Field(
         | 
| 533 566 | 
             
                    "refresh_token",
         | 
| 534 567 | 
             
                    description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
         | 
| @@ -547,6 +580,17 @@ class OAuthAuthenticator(BaseModel): | |
| 547 580 | 
             
                    ],
         | 
| 548 581 | 
             
                    title="Refresh Request Body",
         | 
| 549 582 | 
             
                )
         | 
| 583 | 
            +
                refresh_request_headers: Optional[Dict[str, Any]] = Field(
         | 
| 584 | 
            +
                    None,
         | 
| 585 | 
            +
                    description="Headers of the request sent to get a new access token.",
         | 
| 586 | 
            +
                    examples=[
         | 
| 587 | 
            +
                        {
         | 
| 588 | 
            +
                            "Authorization": "<AUTH_TOKEN>",
         | 
| 589 | 
            +
                            "Content-Type": "application/x-www-form-urlencoded",
         | 
| 590 | 
            +
                        }
         | 
| 591 | 
            +
                    ],
         | 
| 592 | 
            +
                    title="Refresh Request Headers",
         | 
| 593 | 
            +
                )
         | 
| 550 594 | 
             
                scopes: Optional[List[str]] = Field(
         | 
| 551 595 | 
             
                    None,
         | 
| 552 596 | 
             
                    description="List of scopes that should be granted to the access token.",
         | 
| @@ -675,6 +719,7 @@ class HttpResponseFilter(BaseModel): | |
| 675 719 | 
             
            class TypesMap(BaseModel):
         | 
| 676 720 | 
             
                target_type: Union[str, List[str]]
         | 
| 677 721 | 
             
                current_type: Union[str, List[str]]
         | 
| 722 | 
            +
                condition: Optional[str]
         | 
| 678 723 |  | 
| 679 724 |  | 
| 680 725 | 
             
            class SchemaTypeIdentifier(BaseModel):
         | 
| @@ -737,33 +782,43 @@ class KeysToSnakeCase(BaseModel): | |
| 737 782 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 738 783 |  | 
| 739 784 |  | 
| 785 | 
            +
            class FlattenFields(BaseModel):
         | 
| 786 | 
            +
                type: Literal["FlattenFields"]
         | 
| 787 | 
            +
                flatten_lists: Optional[bool] = Field(
         | 
| 788 | 
            +
                    True,
         | 
| 789 | 
            +
                    description="Whether to flatten lists or leave it as is. Default is True.",
         | 
| 790 | 
            +
                    title="Flatten Lists",
         | 
| 791 | 
            +
                )
         | 
| 792 | 
            +
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 793 | 
            +
             | 
| 794 | 
            +
             | 
| 740 795 | 
             
            class KeysReplace(BaseModel):
         | 
| 741 796 | 
             
                type: Literal["KeysReplace"]
         | 
| 742 797 | 
             
                old: str = Field(
         | 
| 743 798 | 
             
                    ...,
         | 
| 744 799 | 
             
                    description="Old value to replace.",
         | 
| 745 | 
            -
                    examples=[ | 
| 800 | 
            +
                    examples=[
         | 
| 801 | 
            +
                        " ",
         | 
| 802 | 
            +
                        "{{ record.id }}",
         | 
| 803 | 
            +
                        "{{ config['id'] }}",
         | 
| 804 | 
            +
                        "{{ stream_slice['id'] }}",
         | 
| 805 | 
            +
                    ],
         | 
| 746 806 | 
             
                    title="Old value",
         | 
| 747 807 | 
             
                )
         | 
| 748 808 | 
             
                new: str = Field(
         | 
| 749 809 | 
             
                    ...,
         | 
| 750 810 | 
             
                    description="New value to set.",
         | 
| 751 | 
            -
                    examples=[ | 
| 811 | 
            +
                    examples=[
         | 
| 812 | 
            +
                        "_",
         | 
| 813 | 
            +
                        "{{ record.id }}",
         | 
| 814 | 
            +
                        "{{ config['id'] }}",
         | 
| 815 | 
            +
                        "{{ stream_slice['id'] }}",
         | 
| 816 | 
            +
                    ],
         | 
| 752 817 | 
             
                    title="New value",
         | 
| 753 818 | 
             
                )
         | 
| 754 819 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 755 820 |  | 
| 756 821 |  | 
| 757 | 
            -
            class FlattenFields(BaseModel):
         | 
| 758 | 
            -
                type: Literal["FlattenFields"]
         | 
| 759 | 
            -
                flatten_lists: Optional[bool] = Field(
         | 
| 760 | 
            -
                    True,
         | 
| 761 | 
            -
                    description="Whether to flatten lists or leave it as is. Default is True.",
         | 
| 762 | 
            -
                    title="Flatten Lists",
         | 
| 763 | 
            -
                )
         | 
| 764 | 
            -
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 765 | 
            -
             | 
| 766 | 
            -
             | 
| 767 822 | 
             
            class IterableDecoder(BaseModel):
         | 
| 768 823 | 
             
                type: Literal["IterableDecoder"]
         | 
| 769 824 |  | 
| @@ -849,8 +904,8 @@ class OauthConnectorInputSpecification(BaseModel): | |
| 849 904 | 
             
                    ...,
         | 
| 850 905 | 
             
                    description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
         | 
| 851 906 | 
             
                    examples=[
         | 
| 852 | 
            -
                        "https://domain.host.com/marketing_api/auth?{client_id_key}={{ | 
| 853 | 
            -
                        "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{ | 
| 907 | 
            +
                        "https://domain.host.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",
         | 
| 908 | 
            +
                        "https://endpoint.host.com/oauth2/authorize?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{scope_key}}={{{{scope_value}} | urlEncoder}}&{{state_key}}={{state_value}}&subdomain={{subdomain}}",
         | 
| 854 909 | 
             
                    ],
         | 
| 855 910 | 
             
                    title="Consent URL",
         | 
| 856 911 | 
             
                )
         | 
| @@ -864,14 +919,18 @@ class OauthConnectorInputSpecification(BaseModel): | |
| 864 919 | 
             
                    ...,
         | 
| 865 920 | 
             
                    description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
         | 
| 866 921 | 
             
                    examples=[
         | 
| 867 | 
            -
                        "https://auth.host.com/oauth2/token?{client_id_key}={{ | 
| 922 | 
            +
                        "https://auth.host.com/oauth2/token?{{client_id_key}}={{client_id_value}}&{{client_secret_key}}={{client_secret_value}}&{{auth_code_key}}={{auth_code_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}"
         | 
| 868 923 | 
             
                    ],
         | 
| 869 924 | 
             
                    title="Access Token URL",
         | 
| 870 925 | 
             
                )
         | 
| 871 926 | 
             
                access_token_headers: Optional[Dict[str, Any]] = Field(
         | 
| 872 927 | 
             
                    None,
         | 
| 873 928 | 
             
                    description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
         | 
| 874 | 
            -
                    examples=[ | 
| 929 | 
            +
                    examples=[
         | 
| 930 | 
            +
                        {
         | 
| 931 | 
            +
                            "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"
         | 
| 932 | 
            +
                        }
         | 
| 933 | 
            +
                    ],
         | 
| 875 934 | 
             
                    title="Access Token Headers",
         | 
| 876 935 | 
             
                )
         | 
| 877 936 | 
             
                access_token_params: Optional[Dict[str, Any]] = Field(
         | 
| @@ -879,15 +938,15 @@ class OauthConnectorInputSpecification(BaseModel): | |
| 879 938 | 
             
                    description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
         | 
| 880 939 | 
             
                    examples=[
         | 
| 881 940 | 
             
                        {
         | 
| 882 | 
            -
                            "{auth_code_key}": "{{ | 
| 883 | 
            -
                            "{client_id_key}": "{{ | 
| 884 | 
            -
                            "{client_secret_key}": "{{ | 
| 941 | 
            +
                            "{{ auth_code_key }}": "{{ auth_code_value }}",
         | 
| 942 | 
            +
                            "{{ client_id_key }}": "{{ client_id_value }}",
         | 
| 943 | 
            +
                            "{{ client_secret_key }}": "{{ client_secret_value }}",
         | 
| 885 944 | 
             
                        }
         | 
| 886 945 | 
             
                    ],
         | 
| 887 946 | 
             
                    title="Access Token Query Params (Json Encoded)",
         | 
| 888 947 | 
             
                )
         | 
| 889 | 
            -
                extract_output: List[str] = Field(
         | 
| 890 | 
            -
                     | 
| 948 | 
            +
                extract_output: Optional[List[str]] = Field(
         | 
| 949 | 
            +
                    None,
         | 
| 891 950 | 
             
                    description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
         | 
| 892 951 | 
             
                    examples=[["access_token", "refresh_token", "other_field"]],
         | 
| 893 952 | 
             
                    title="Extract Output",
         | 
| @@ -956,7 +1015,7 @@ class OAuthConfigSpecification(BaseModel): | |
| 956 1015 | 
             
                )
         | 
| 957 1016 | 
             
                oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
         | 
| 958 1017 | 
             
                    None,
         | 
| 959 | 
            -
                    description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n  + base64Encoder - encode to `base64`, { | 
| 1018 | 
            +
                    description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n  + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n  + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n  + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n  + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n  + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n  - The TikTok Marketing DeclarativeOAuth spec:\n  {\n    "oauth_connector_input_specification": {\n      "type": "object",\n      "additionalProperties": false,\n      "properties": {\n          "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n          "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n          "access_token_params": {\n              "{{ auth_code_key }}": "{{ auth_code_value }}",\n              "{{ client_id_key }}": "{{ client_id_value }}",\n              "{{ client_secret_key }}": "{{ client_secret_value }}"\n          },\n          "access_token_headers": {\n              "Content-Type": "application/json",\n              "Accept": "application/json"\n          },\n          "extract_output": ["data.access_token"],\n          "client_id_key": "app_id",\n          "client_secret_key": "secret",\n          "auth_code_key": "auth_code"\n      }\n    }\n  }',
         | 
| 960 1019 | 
             
                    title="DeclarativeOAuth Connector Specification",
         | 
| 961 1020 | 
             
                )
         | 
| 962 1021 | 
             
                complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
         | 
| @@ -1163,6 +1222,11 @@ class LegacySessionTokenAuthenticator(BaseModel): | |
| 1163 1222 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 1164 1223 |  | 
| 1165 1224 |  | 
| 1225 | 
            +
            class JsonParser(BaseModel):
         | 
| 1226 | 
            +
                type: Literal["JsonParser"]
         | 
| 1227 | 
            +
                encoding: Optional[str] = "utf-8"
         | 
| 1228 | 
            +
             | 
| 1229 | 
            +
             | 
| 1166 1230 | 
             
            class JsonLineParser(BaseModel):
         | 
| 1167 1231 | 
             
                type: Literal["JsonLineParser"]
         | 
| 1168 1232 | 
             
                encoding: Optional[str] = "utf-8"
         | 
| @@ -1561,7 +1625,7 @@ class RecordSelector(BaseModel): | |
| 1561 1625 |  | 
| 1562 1626 | 
             
            class GzipParser(BaseModel):
         | 
| 1563 1627 | 
             
                type: Literal["GzipParser"]
         | 
| 1564 | 
            -
                inner_parser: Union[JsonLineParser, CsvParser]
         | 
| 1628 | 
            +
                inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
         | 
| 1565 1629 |  | 
| 1566 1630 |  | 
| 1567 1631 | 
             
            class Spec(BaseModel):
         | 
| @@ -1594,9 +1658,21 @@ class CompositeErrorHandler(BaseModel): | |
| 1594 1658 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 1595 1659 |  | 
| 1596 1660 |  | 
| 1661 | 
            +
            class ZipfileDecoder(BaseModel):
         | 
| 1662 | 
            +
                class Config:
         | 
| 1663 | 
            +
                    extra = Extra.allow
         | 
| 1664 | 
            +
             | 
| 1665 | 
            +
                type: Literal["ZipfileDecoder"]
         | 
| 1666 | 
            +
                parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
         | 
| 1667 | 
            +
                    ...,
         | 
| 1668 | 
            +
                    description="Parser to parse the decompressed data from the zipfile(s).",
         | 
| 1669 | 
            +
                    title="Parser",
         | 
| 1670 | 
            +
                )
         | 
| 1671 | 
            +
             | 
| 1672 | 
            +
             | 
| 1597 1673 | 
             
            class CompositeRawDecoder(BaseModel):
         | 
| 1598 1674 | 
             
                type: Literal["CompositeRawDecoder"]
         | 
| 1599 | 
            -
                parser: Union[GzipParser, JsonLineParser, CsvParser]
         | 
| 1675 | 
            +
                parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
         | 
| 1600 1676 |  | 
| 1601 1677 |  | 
| 1602 1678 | 
             
            class DeclarativeSource1(BaseModel):
         | 
| @@ -1604,7 +1680,7 @@ class DeclarativeSource1(BaseModel): | |
| 1604 1680 | 
             
                    extra = Extra.forbid
         | 
| 1605 1681 |  | 
| 1606 1682 | 
             
                type: Literal["DeclarativeSource"]
         | 
| 1607 | 
            -
                check: CheckStream
         | 
| 1683 | 
            +
                check: Union[CheckStream, CheckDynamicStream]
         | 
| 1608 1684 | 
             
                streams: List[DeclarativeStream]
         | 
| 1609 1685 | 
             
                dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
         | 
| 1610 1686 | 
             
                version: str = Field(
         | 
| @@ -1630,7 +1706,7 @@ class DeclarativeSource2(BaseModel): | |
| 1630 1706 | 
             
                    extra = Extra.forbid
         | 
| 1631 1707 |  | 
| 1632 1708 | 
             
                type: Literal["DeclarativeSource"]
         | 
| 1633 | 
            -
                check: CheckStream
         | 
| 1709 | 
            +
                check: Union[CheckStream, CheckDynamicStream]
         | 
| 1634 1710 | 
             
                streams: Optional[List[DeclarativeStream]] = None
         | 
| 1635 1711 | 
             
                dynamic_streams: List[DynamicDeclarativeStream]
         | 
| 1636 1712 | 
             
                version: str = Field(
         | 
| @@ -1799,7 +1875,7 @@ class SessionTokenAuthenticator(BaseModel): | |
| 1799 1875 | 
             
                    description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
         | 
| 1800 1876 | 
             
                    title="Data Request Authentication",
         | 
| 1801 1877 | 
             
                )
         | 
| 1802 | 
            -
                decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
         | 
| 1878 | 
            +
                decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
         | 
| 1803 1879 | 
             
                    None, description="Component used to decode the response.", title="Decoder"
         | 
| 1804 1880 | 
             
                )
         | 
| 1805 1881 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| @@ -2004,6 +2080,7 @@ class SimpleRetriever(BaseModel): | |
| 2004 2080 | 
             
                        XmlDecoder,
         | 
| 2005 2081 | 
             
                        GzipJsonDecoder,
         | 
| 2006 2082 | 
             
                        CompositeRawDecoder,
         | 
| 2083 | 
            +
                        ZipfileDecoder,
         | 
| 2007 2084 | 
             
                    ]
         | 
| 2008 2085 | 
             
                ] = Field(
         | 
| 2009 2086 | 
             
                    None,
         | 
| @@ -2040,6 +2117,10 @@ class AsyncRetriever(BaseModel): | |
| 2040 2117 | 
             
                    ...,
         | 
| 2041 2118 | 
             
                    description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
         | 
| 2042 2119 | 
             
                )
         | 
| 2120 | 
            +
                url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
         | 
| 2121 | 
            +
                    None,
         | 
| 2122 | 
            +
                    description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
         | 
| 2123 | 
            +
                )
         | 
| 2043 2124 | 
             
                download_requester: Union[CustomRequester, HttpRequester] = Field(
         | 
| 2044 2125 | 
             
                    ...,
         | 
| 2045 2126 | 
             
                    description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",
         | 
| @@ -2076,6 +2157,8 @@ class AsyncRetriever(BaseModel): | |
| 2076 2157 | 
             
                        IterableDecoder,
         | 
| 2077 2158 | 
             
                        XmlDecoder,
         | 
| 2078 2159 | 
             
                        GzipJsonDecoder,
         | 
| 2160 | 
            +
                        CompositeRawDecoder,
         | 
| 2161 | 
            +
                        ZipfileDecoder,
         | 
| 2079 2162 | 
             
                    ]
         | 
| 2080 2163 | 
             
                ] = Field(
         | 
| 2081 2164 | 
             
                    None,
         | 
| @@ -2090,6 +2173,8 @@ class AsyncRetriever(BaseModel): | |
| 2090 2173 | 
             
                        IterableDecoder,
         | 
| 2091 2174 | 
             
                        XmlDecoder,
         | 
| 2092 2175 | 
             
                        GzipJsonDecoder,
         | 
| 2176 | 
            +
                        CompositeRawDecoder,
         | 
| 2177 | 
            +
                        ZipfileDecoder,
         | 
| 2093 2178 | 
             
                    ]
         | 
| 2094 2179 | 
             
                ] = Field(
         | 
| 2095 2180 | 
             
                    None,
         |