airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/models.py +16 -14
- airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
- airbyte_cdk/connector_builder/test_reader/types.py +9 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +15 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +5 -43
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +16 -4
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -42
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +52 -63
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +42 -4
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
- airbyte_cdk/sources/http_logger.py +3 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/RECORD +23 -24
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dev1.dist-info}/entry_points.txt +0 -0
| @@ -21,20 +21,6 @@ class HttpRequest: | |
| 21 21 | 
             
                body: Optional[str] = None
         | 
| 22 22 |  | 
| 23 23 |  | 
| 24 | 
            -
            @dataclass
         | 
| 25 | 
            -
            class StreamReadPages:
         | 
| 26 | 
            -
                records: List[object]
         | 
| 27 | 
            -
                request: Optional[HttpRequest] = None
         | 
| 28 | 
            -
                response: Optional[HttpResponse] = None
         | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
            @dataclass
         | 
| 32 | 
            -
            class StreamReadSlices:
         | 
| 33 | 
            -
                pages: List[StreamReadPages]
         | 
| 34 | 
            -
                slice_descriptor: Optional[Dict[str, Any]]
         | 
| 35 | 
            -
                state: Optional[List[Dict[str, Any]]] = None
         | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 24 | 
             
            @dataclass
         | 
| 39 25 | 
             
            class LogMessage:
         | 
| 40 26 | 
             
                message: str
         | 
| @@ -46,11 +32,27 @@ class LogMessage: | |
| 46 32 | 
             
            @dataclass
         | 
| 47 33 | 
             
            class AuxiliaryRequest:
         | 
| 48 34 | 
             
                title: str
         | 
| 35 | 
            +
                type: str
         | 
| 49 36 | 
             
                description: str
         | 
| 50 37 | 
             
                request: HttpRequest
         | 
| 51 38 | 
             
                response: HttpResponse
         | 
| 52 39 |  | 
| 53 40 |  | 
| 41 | 
            +
            @dataclass
         | 
| 42 | 
            +
            class StreamReadPages:
         | 
| 43 | 
            +
                records: List[object]
         | 
| 44 | 
            +
                request: Optional[HttpRequest] = None
         | 
| 45 | 
            +
                response: Optional[HttpResponse] = None
         | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            @dataclass
         | 
| 49 | 
            +
            class StreamReadSlices:
         | 
| 50 | 
            +
                pages: List[StreamReadPages]
         | 
| 51 | 
            +
                slice_descriptor: Optional[Dict[str, Any]]
         | 
| 52 | 
            +
                state: Optional[List[Dict[str, Any]]] = None
         | 
| 53 | 
            +
                auxiliary_requests: Optional[List[AuxiliaryRequest]] = None
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 54 56 | 
             
            @dataclass
         | 
| 55 57 | 
             
            class StreamRead(object):
         | 
| 56 58 | 
             
                logs: List[LogMessage]
         | 
| @@ -28,7 +28,7 @@ from airbyte_cdk.utils.schema_inferrer import ( | |
| 28 28 | 
             
                SchemaInferrer,
         | 
| 29 29 | 
             
            )
         | 
| 30 30 |  | 
| 31 | 
            -
            from .types import LOG_MESSAGES_OUTPUT_TYPE
         | 
| 31 | 
            +
            from .types import ASYNC_AUXILIARY_REQUEST_TYPES, LOG_MESSAGES_OUTPUT_TYPE
         | 
| 32 32 |  | 
| 33 33 | 
             
            # -------
         | 
| 34 34 | 
             
            # Parsers
         | 
| @@ -226,7 +226,8 @@ def should_close_page( | |
| 226 226 | 
             
                    at_least_one_page_in_group
         | 
| 227 227 | 
             
                    and is_log_message(message)
         | 
| 228 228 | 
             
                    and (
         | 
| 229 | 
            -
                        is_page_http_request(json_message) | 
| 229 | 
            +
                        is_page_http_request(json_message)
         | 
| 230 | 
            +
                        or message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX)  # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
         | 
| 230 231 | 
             
                    )
         | 
| 231 232 | 
             
                )
         | 
| 232 233 |  | 
| @@ -330,6 +331,10 @@ def is_auxiliary_http_request(message: Optional[Dict[str, Any]]) -> bool: | |
| 330 331 | 
             
                return is_http_log(message) and message.get("http", {}).get("is_auxiliary", False)
         | 
| 331 332 |  | 
| 332 333 |  | 
| 334 | 
            +
            def is_async_auxiliary_request(message: AuxiliaryRequest) -> bool:
         | 
| 335 | 
            +
                return message.type in ASYNC_AUXILIARY_REQUEST_TYPES
         | 
| 336 | 
            +
             | 
| 337 | 
            +
             | 
| 333 338 | 
             
            def is_log_message(message: AirbyteMessage) -> bool:
         | 
| 334 339 | 
             
                """
         | 
| 335 340 | 
             
                Determines whether the provided message is of type LOG.
         | 
| @@ -413,6 +418,7 @@ def handle_current_slice( | |
| 413 418 | 
             
                current_slice_pages: List[StreamReadPages],
         | 
| 414 419 | 
             
                current_slice_descriptor: Optional[Dict[str, Any]] = None,
         | 
| 415 420 | 
             
                latest_state_message: Optional[Dict[str, Any]] = None,
         | 
| 421 | 
            +
                auxiliary_requests: Optional[List[AuxiliaryRequest]] = None,
         | 
| 416 422 | 
             
            ) -> StreamReadSlices:
         | 
| 417 423 | 
             
                """
         | 
| 418 424 | 
             
                Handles the current slice by packaging its pages, descriptor, and state into a StreamReadSlices instance.
         | 
| @@ -421,6 +427,7 @@ def handle_current_slice( | |
| 421 427 | 
             
                    current_slice_pages (List[StreamReadPages]): The pages to be included in the slice.
         | 
| 422 428 | 
             
                    current_slice_descriptor (Optional[Dict[str, Any]]): Descriptor for the current slice, optional.
         | 
| 423 429 | 
             
                    latest_state_message (Optional[Dict[str, Any]]): The latest state message, optional.
         | 
| 430 | 
            +
                    auxiliary_requests (Optional[List[AuxiliaryRequest]]): The auxiliary requests to include, optional.
         | 
| 424 431 |  | 
| 425 432 | 
             
                Returns:
         | 
| 426 433 | 
             
                    StreamReadSlices: An object containing the current slice's pages, descriptor, and state.
         | 
| @@ -429,6 +436,7 @@ def handle_current_slice( | |
| 429 436 | 
             
                    pages=current_slice_pages,
         | 
| 430 437 | 
             
                    slice_descriptor=current_slice_descriptor,
         | 
| 431 438 | 
             
                    state=[latest_state_message] if latest_state_message else [],
         | 
| 439 | 
            +
                    auxiliary_requests=auxiliary_requests if auxiliary_requests else [],
         | 
| 432 440 | 
             
                )
         | 
| 433 441 |  | 
| 434 442 |  | 
| @@ -486,29 +494,24 @@ def handle_auxiliary_request(json_message: Dict[str, JsonType]) -> AuxiliaryRequ | |
| 486 494 | 
             
                Raises:
         | 
| 487 495 | 
             
                    ValueError: If any of the "airbyte_cdk", "stream", or "http" fields is not a dictionary.
         | 
| 488 496 | 
             
                """
         | 
| 489 | 
            -
                airbyte_cdk = json_message.get("airbyte_cdk", {})
         | 
| 490 | 
            -
             | 
| 491 | 
            -
                if not isinstance(airbyte_cdk, dict):
         | 
| 492 | 
            -
                    raise ValueError(
         | 
| 493 | 
            -
                        f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
         | 
| 494 | 
            -
                    )
         | 
| 495 | 
            -
             | 
| 496 | 
            -
                stream = airbyte_cdk.get("stream", {})
         | 
| 497 497 |  | 
| 498 | 
            -
                 | 
| 499 | 
            -
             | 
| 498 | 
            +
                airbyte_cdk = get_airbyte_cdk_from_message(json_message)
         | 
| 499 | 
            +
                stream = get_stream_from_airbyte_cdk(airbyte_cdk)
         | 
| 500 | 
            +
                title_prefix = get_auxiliary_request_title_prefix(stream)
         | 
| 501 | 
            +
                http = get_http_property_from_message(json_message)
         | 
| 502 | 
            +
                request_type = get_auxiliary_request_type(stream, http)
         | 
| 500 503 |  | 
| 501 | 
            -
                 | 
| 502 | 
            -
                 | 
| 503 | 
            -
             | 
| 504 | 
            -
                 | 
| 505 | 
            -
                    raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
         | 
| 504 | 
            +
                title = title_prefix + str(http.get("title", None))
         | 
| 505 | 
            +
                description = str(http.get("description", None))
         | 
| 506 | 
            +
                request = create_request_from_log_message(json_message)
         | 
| 507 | 
            +
                response = create_response_from_log_message(json_message)
         | 
| 506 508 |  | 
| 507 509 | 
             
                return AuxiliaryRequest(
         | 
| 508 | 
            -
                    title= | 
| 509 | 
            -
                     | 
| 510 | 
            -
                     | 
| 511 | 
            -
                     | 
| 510 | 
            +
                    title=title,
         | 
| 511 | 
            +
                    type=request_type,
         | 
| 512 | 
            +
                    description=description,
         | 
| 513 | 
            +
                    request=request,
         | 
| 514 | 
            +
                    response=response,
         | 
| 512 515 | 
             
                )
         | 
| 513 516 |  | 
| 514 517 |  | 
| @@ -558,7 +561,8 @@ def handle_log_message( | |
| 558 561 | 
             
                    at_least_one_page_in_group,
         | 
| 559 562 | 
             
                    current_page_request,
         | 
| 560 563 | 
             
                    current_page_response,
         | 
| 561 | 
            -
                    auxiliary_request | 
| 564 | 
            +
                    auxiliary_request,
         | 
| 565 | 
            +
                    log_message,
         | 
| 562 566 | 
             
                )
         | 
| 563 567 |  | 
| 564 568 |  | 
| @@ -589,3 +593,97 @@ def handle_record_message( | |
| 589 593 | 
             
                    datetime_format_inferrer.accumulate(message.record)  # type: ignore
         | 
| 590 594 |  | 
| 591 595 | 
             
                return records_count
         | 
| 596 | 
            +
             | 
| 597 | 
            +
             | 
| 598 | 
            +
            # -------
         | 
| 599 | 
            +
            # Reusable Getters
         | 
| 600 | 
            +
            # -------
         | 
| 601 | 
            +
             | 
| 602 | 
            +
             | 
| 603 | 
            +
            def get_airbyte_cdk_from_message(json_message: Dict[str, JsonType]) -> dict:  # type: ignore
         | 
| 604 | 
            +
                """
         | 
| 605 | 
            +
                Retrieves the "airbyte_cdk" dictionary from the provided JSON message.
         | 
| 606 | 
            +
             | 
| 607 | 
            +
                This function validates that the extracted "airbyte_cdk" is of type dict,
         | 
| 608 | 
            +
                raising a ValueError if the validation fails.
         | 
| 609 | 
            +
             | 
| 610 | 
            +
                Parameters:
         | 
| 611 | 
            +
                    json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
         | 
| 612 | 
            +
             | 
| 613 | 
            +
                Returns:
         | 
| 614 | 
            +
                    dict: The "airbyte_cdk" dictionary extracted from the JSON message.
         | 
| 615 | 
            +
             | 
| 616 | 
            +
                Raises:
         | 
| 617 | 
            +
                    ValueError: If the "airbyte_cdk" field is not a dictionary.
         | 
| 618 | 
            +
                """
         | 
| 619 | 
            +
                airbyte_cdk = json_message.get("airbyte_cdk", {})
         | 
| 620 | 
            +
             | 
| 621 | 
            +
                if not isinstance(airbyte_cdk, dict):
         | 
| 622 | 
            +
                    raise ValueError(
         | 
| 623 | 
            +
                        f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
         | 
| 624 | 
            +
                    )
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                return airbyte_cdk
         | 
| 627 | 
            +
             | 
| 628 | 
            +
             | 
| 629 | 
            +
            def get_stream_from_airbyte_cdk(airbyte_cdk: dict) -> dict:  # type: ignore
         | 
| 630 | 
            +
                """
         | 
| 631 | 
            +
                Retrieves the "stream" dictionary from the provided "airbyte_cdk" dictionary.
         | 
| 632 | 
            +
             | 
| 633 | 
            +
                This function ensures that the extracted "stream" is of type dict,
         | 
| 634 | 
            +
                raising a ValueError if the validation fails.
         | 
| 635 | 
            +
             | 
| 636 | 
            +
                Parameters:
         | 
| 637 | 
            +
                    airbyte_cdk (dict): The dictionary representing the Airbyte CDK data.
         | 
| 638 | 
            +
             | 
| 639 | 
            +
                Returns:
         | 
| 640 | 
            +
                    dict: The "stream" dictionary extracted from the Airbyte CDK data.
         | 
| 641 | 
            +
             | 
| 642 | 
            +
                Raises:
         | 
| 643 | 
            +
                    ValueError: If the "stream" field is not a dictionary.
         | 
| 644 | 
            +
                """
         | 
| 645 | 
            +
             | 
| 646 | 
            +
                stream = airbyte_cdk.get("stream", {})
         | 
| 647 | 
            +
             | 
| 648 | 
            +
                if not isinstance(stream, dict):
         | 
| 649 | 
            +
                    raise ValueError(f"Expected stream to be a dict, got {stream} of type {type(stream)}")
         | 
| 650 | 
            +
             | 
| 651 | 
            +
                return stream
         | 
| 652 | 
            +
             | 
| 653 | 
            +
             | 
| 654 | 
            +
            def get_auxiliary_request_title_prefix(stream: dict) -> str:  # type: ignore
         | 
| 655 | 
            +
                """
         | 
| 656 | 
            +
                Generates a title prefix based on the stream type.
         | 
| 657 | 
            +
                """
         | 
| 658 | 
            +
                return "Parent stream: " if stream.get("is_substream", False) else ""
         | 
| 659 | 
            +
             | 
| 660 | 
            +
             | 
| 661 | 
            +
            def get_http_property_from_message(json_message: Dict[str, JsonType]) -> dict:  # type: ignore
         | 
| 662 | 
            +
                """
         | 
| 663 | 
            +
                Retrieves the "http" dictionary from the provided JSON message.
         | 
| 664 | 
            +
             | 
| 665 | 
            +
                This function validates that the extracted "http" is of type dict,
         | 
| 666 | 
            +
                raising a ValueError if the validation fails.
         | 
| 667 | 
            +
             | 
| 668 | 
            +
                Parameters:
         | 
| 669 | 
            +
                    json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
         | 
| 670 | 
            +
             | 
| 671 | 
            +
                Returns:
         | 
| 672 | 
            +
                    dict: The "http" dictionary extracted from the JSON message.
         | 
| 673 | 
            +
             | 
| 674 | 
            +
                Raises:
         | 
| 675 | 
            +
                    ValueError: If the "http" field is not a dictionary.
         | 
| 676 | 
            +
                """
         | 
| 677 | 
            +
                http = json_message.get("http", {})
         | 
| 678 | 
            +
             | 
| 679 | 
            +
                if not isinstance(http, dict):
         | 
| 680 | 
            +
                    raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
         | 
| 681 | 
            +
             | 
| 682 | 
            +
                return http
         | 
| 683 | 
            +
             | 
| 684 | 
            +
             | 
| 685 | 
            +
            def get_auxiliary_request_type(stream: dict, http: dict) -> str:  # type: ignore
         | 
| 686 | 
            +
                """
         | 
| 687 | 
            +
                Determines the type of the auxiliary request based on the stream and HTTP properties.
         | 
| 688 | 
            +
                """
         | 
| 689 | 
            +
                return "PARENT_STREAM" if stream.get("is_substream", False) else str(http.get("type", None))
         | 
| @@ -6,6 +6,7 @@ | |
| 6 6 | 
             
            from typing import Any, Dict, Iterator, List, Mapping, Optional
         | 
| 7 7 |  | 
| 8 8 | 
             
            from airbyte_cdk.connector_builder.models import (
         | 
| 9 | 
            +
                AuxiliaryRequest,
         | 
| 9 10 | 
             
                HttpRequest,
         | 
| 10 11 | 
             
                HttpResponse,
         | 
| 11 12 | 
             
                StreamReadPages,
         | 
| @@ -24,6 +25,7 @@ from .helpers import ( | |
| 24 25 | 
             
                handle_current_slice,
         | 
| 25 26 | 
             
                handle_log_message,
         | 
| 26 27 | 
             
                handle_record_message,
         | 
| 28 | 
            +
                is_async_auxiliary_request,
         | 
| 27 29 | 
             
                is_config_update_message,
         | 
| 28 30 | 
             
                is_log_message,
         | 
| 29 31 | 
             
                is_record_message,
         | 
| @@ -89,6 +91,7 @@ def get_message_groups( | |
| 89 91 | 
             
                current_page_request: Optional[HttpRequest] = None
         | 
| 90 92 | 
             
                current_page_response: Optional[HttpResponse] = None
         | 
| 91 93 | 
             
                latest_state_message: Optional[Dict[str, Any]] = None
         | 
| 94 | 
            +
                slice_auxiliary_requests: List[AuxiliaryRequest] = []
         | 
| 92 95 |  | 
| 93 96 | 
             
                while records_count < limit and (message := next(messages, None)):
         | 
| 94 97 | 
             
                    json_message = airbyte_message_to_json(message)
         | 
| @@ -106,6 +109,7 @@ def get_message_groups( | |
| 106 109 | 
             
                            current_slice_pages,
         | 
| 107 110 | 
             
                            current_slice_descriptor,
         | 
| 108 111 | 
             
                            latest_state_message,
         | 
| 112 | 
            +
                            slice_auxiliary_requests,
         | 
| 109 113 | 
             
                        )
         | 
| 110 114 | 
             
                        current_slice_descriptor = parse_slice_description(message.log.message)  # type: ignore
         | 
| 111 115 | 
             
                        current_slice_pages = []
         | 
| @@ -118,7 +122,8 @@ def get_message_groups( | |
| 118 122 | 
             
                            at_least_one_page_in_group,
         | 
| 119 123 | 
             
                            current_page_request,
         | 
| 120 124 | 
             
                            current_page_response,
         | 
| 121 | 
            -
                             | 
| 125 | 
            +
                            auxiliary_request,
         | 
| 126 | 
            +
                            log_message,
         | 
| 122 127 | 
             
                        ) = handle_log_message(
         | 
| 123 128 | 
             
                            message,
         | 
| 124 129 | 
             
                            json_message,
         | 
| @@ -126,8 +131,15 @@ def get_message_groups( | |
| 126 131 | 
             
                            current_page_request,
         | 
| 127 132 | 
             
                            current_page_response,
         | 
| 128 133 | 
             
                        )
         | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 134 | 
            +
             | 
| 135 | 
            +
                        if auxiliary_request:
         | 
| 136 | 
            +
                            if is_async_auxiliary_request(auxiliary_request):
         | 
| 137 | 
            +
                                slice_auxiliary_requests.append(auxiliary_request)
         | 
| 138 | 
            +
                            else:
         | 
| 139 | 
            +
                                yield auxiliary_request
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                        if log_message:
         | 
| 142 | 
            +
                            yield log_message
         | 
| 131 143 | 
             
                    elif is_trace_with_error(message):
         | 
| 132 144 | 
             
                        if message.trace is not None:
         | 
| 133 145 | 
             
                            yield message.trace
         | 
| @@ -157,4 +169,5 @@ def get_message_groups( | |
| 157 169 | 
             
                            current_slice_pages,
         | 
| 158 170 | 
             
                            current_slice_descriptor,
         | 
| 159 171 | 
             
                            latest_state_message,
         | 
| 172 | 
            +
                            slice_auxiliary_requests,
         | 
| 160 173 | 
             
                        )
         | 
| @@ -71,5 +71,13 @@ LOG_MESSAGES_OUTPUT_TYPE = tuple[ | |
| 71 71 | 
             
                bool,
         | 
| 72 72 | 
             
                HttpRequest | None,
         | 
| 73 73 | 
             
                HttpResponse | None,
         | 
| 74 | 
            -
                AuxiliaryRequest |  | 
| 74 | 
            +
                AuxiliaryRequest | None,
         | 
| 75 | 
            +
                AirbyteLogMessage | None,
         | 
| 76 | 
            +
            ]
         | 
| 77 | 
            +
             | 
| 78 | 
            +
            ASYNC_AUXILIARY_REQUEST_TYPES = [
         | 
| 79 | 
            +
                "ASYNC_CREATE",
         | 
| 80 | 
            +
                "ASYNC_POLL",
         | 
| 81 | 
            +
                "ASYNC_ABORT",
         | 
| 82 | 
            +
                "ASYNC_DELETE",
         | 
| 75 83 | 
             
            ]
         | 
| @@ -44,6 +44,7 @@ from airbyte_cdk.sources.declarative.types import ConnectionDefinition | |
| 44 44 | 
             
            from airbyte_cdk.sources.source import TState
         | 
| 45 45 | 
             
            from airbyte_cdk.sources.streams import Stream
         | 
| 46 46 | 
             
            from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
         | 
| 47 | 
            +
            from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
         | 
| 47 48 | 
             
            from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
         | 
| 48 49 | 
             
                AlwaysAvailableAvailabilityStrategy,
         | 
| 49 50 | 
             
            )
         | 
| @@ -118,6 +119,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 118 119 | 
             
                        message_repository=self.message_repository,
         | 
| 119 120 | 
             
                    )
         | 
| 120 121 |  | 
| 122 | 
            +
                # TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
         | 
| 123 | 
            +
                @property
         | 
| 124 | 
            +
                def is_partially_declarative(self) -> bool:
         | 
| 125 | 
            +
                    """This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
         | 
| 126 | 
            +
                    return False
         | 
| 127 | 
            +
             | 
| 121 128 | 
             
                def read(
         | 
| 122 129 | 
             
                    self,
         | 
| 123 130 | 
             
                    logger: logging.Logger,
         | 
| @@ -369,6 +376,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): | |
| 369 376 | 
             
                                )
         | 
| 370 377 | 
             
                            else:
         | 
| 371 378 | 
             
                                synchronous_streams.append(declarative_stream)
         | 
| 379 | 
            +
                        # TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
         | 
| 380 | 
            +
                        # Condition below needs to ensure that concurrent support is not lost for sources that already support
         | 
| 381 | 
            +
                        # it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
         | 
| 382 | 
            +
                        elif (
         | 
| 383 | 
            +
                            isinstance(declarative_stream, AbstractStreamFacade)
         | 
| 384 | 
            +
                            and self.is_partially_declarative
         | 
| 385 | 
            +
                        ):
         | 
| 386 | 
            +
                            concurrent_streams.append(declarative_stream.get_underlying_stream())
         | 
| 372 387 | 
             
                        else:
         | 
| 373 388 | 
             
                            synchronous_streams.append(declarative_stream)
         | 
| 374 389 |  | 
| @@ -1490,7 +1490,11 @@ definitions: | |
| 1490 1490 | 
             
                  limit:
         | 
| 1491 1491 | 
             
                    title: Limit
         | 
| 1492 1492 | 
             
                    description: The maximum number of calls allowed within the interval.
         | 
| 1493 | 
            -
                     | 
| 1493 | 
            +
                    anyOf:
         | 
| 1494 | 
            +
                      - type: integer
         | 
| 1495 | 
            +
                      - type: string
         | 
| 1496 | 
            +
                    interpolation_context:
         | 
| 1497 | 
            +
                      - config
         | 
| 1494 1498 | 
             
                  interval:
         | 
| 1495 1499 | 
             
                    title: Interval
         | 
| 1496 1500 | 
             
                    description: The time interval for the rate limit.
         | 
| @@ -3130,14 +3134,12 @@ definitions: | |
| 3130 3134 | 
             
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3131 3135 | 
             
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3132 3136 | 
             
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3133 | 
            -
                      - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3134 3137 | 
             
                      - type: array
         | 
| 3135 3138 | 
             
                        items:
         | 
| 3136 3139 | 
             
                          anyOf:
         | 
| 3137 3140 | 
             
                            - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3138 3141 | 
             
                            - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3139 3142 | 
             
                            - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3140 | 
            -
                            - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3141 3143 | 
             
                  decoder:
         | 
| 3142 3144 | 
             
                    title: Decoder
         | 
| 3143 3145 | 
             
                    description: Component decoding the response so records can be extracted.
         | 
| @@ -3292,14 +3294,12 @@ definitions: | |
| 3292 3294 | 
             
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3293 3295 | 
             
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3294 3296 | 
             
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3295 | 
            -
                      - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3296 3297 | 
             
                      - type: array
         | 
| 3297 3298 | 
             
                        items:
         | 
| 3298 3299 | 
             
                          anyOf:
         | 
| 3299 3300 | 
             
                            - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3300 3301 | 
             
                            - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3301 3302 | 
             
                            - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3302 | 
            -
                            - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3303 3303 | 
             
                  decoder:
         | 
| 3304 3304 | 
             
                    title: Decoder
         | 
| 3305 3305 | 
             
                    description: Component decoding the response so records can be extracted.
         | 
| @@ -3416,44 +3416,6 @@ definitions: | |
| 3416 3416 | 
             
                  $parameters:
         | 
| 3417 3417 | 
             
                    type: object
         | 
| 3418 3418 | 
             
                    additionalProperties: true
         | 
| 3419 | 
            -
              GroupingPartitionRouter:
         | 
| 3420 | 
            -
                title: Grouping Partition Router
         | 
| 3421 | 
            -
                description: >
         | 
| 3422 | 
            -
                  A decorator on top of a partition router that groups partitions into batches of a specified size.
         | 
| 3423 | 
            -
                  This is useful for APIs that support filtering by multiple partition keys in a single request.
         | 
| 3424 | 
            -
                  Note that per-partition incremental syncs may not work as expected because the grouping
         | 
| 3425 | 
            -
                  of partitions might change between syncs, potentially leading to inconsistent state tracking.
         | 
| 3426 | 
            -
                type: object
         | 
| 3427 | 
            -
                required:
         | 
| 3428 | 
            -
                  - type
         | 
| 3429 | 
            -
                  - group_size
         | 
| 3430 | 
            -
                  - underlying_partition_router
         | 
| 3431 | 
            -
                properties:
         | 
| 3432 | 
            -
                  type:
         | 
| 3433 | 
            -
                    type: string
         | 
| 3434 | 
            -
                    enum: [GroupingPartitionRouter]
         | 
| 3435 | 
            -
                  group_size:
         | 
| 3436 | 
            -
                    title: Group Size
         | 
| 3437 | 
            -
                    description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
         | 
| 3438 | 
            -
                    type: integer
         | 
| 3439 | 
            -
                    examples:
         | 
| 3440 | 
            -
                      - 10
         | 
| 3441 | 
            -
                      - 50
         | 
| 3442 | 
            -
                  underlying_partition_router:
         | 
| 3443 | 
            -
                    title: Underlying Partition Router
         | 
| 3444 | 
            -
                    description: The partition router whose output will be grouped. This can be any valid partition router component.
         | 
| 3445 | 
            -
                    anyOf:
         | 
| 3446 | 
            -
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3447 | 
            -
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3448 | 
            -
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3449 | 
            -
                  deduplicate:
         | 
| 3450 | 
            -
                    title: Deduplicate Partitions
         | 
| 3451 | 
            -
                    description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
         | 
| 3452 | 
            -
                    type: boolean
         | 
| 3453 | 
            -
                    default: true
         | 
| 3454 | 
            -
                  $parameters:
         | 
| 3455 | 
            -
                    type: object
         | 
| 3456 | 
            -
                    additionalProperties: true
         | 
| 3457 3419 | 
             
              WaitUntilTimeFromHeader:
         | 
| 3458 3420 | 
             
                title: Wait Until Time Defined In Response Header
         | 
| 3459 3421 | 
             
                description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
         | 
| @@ -5,7 +5,7 @@ import json | |
| 5 5 | 
             
            import logging
         | 
| 6 6 | 
             
            from abc import ABC, abstractmethod
         | 
| 7 7 | 
             
            from dataclasses import dataclass
         | 
| 8 | 
            -
            from io import BufferedIOBase, TextIOWrapper
         | 
| 8 | 
            +
            from io import BufferedIOBase, BytesIO, TextIOWrapper
         | 
| 9 9 | 
             
            from typing import Any, Generator, MutableMapping, Optional
         | 
| 10 10 |  | 
| 11 11 | 
             
            import orjson
         | 
| @@ -107,6 +107,16 @@ class CsvParser(Parser): | |
| 107 107 | 
             
                encoding: Optional[str] = "utf-8"
         | 
| 108 108 | 
             
                delimiter: Optional[str] = ","
         | 
| 109 109 |  | 
| 110 | 
            +
                def _get_delimiter(self) -> Optional[str]:
         | 
| 111 | 
            +
                    """
         | 
| 112 | 
            +
                    Get delimiter from the configuration. Check for the escape character and decode it.
         | 
| 113 | 
            +
                    """
         | 
| 114 | 
            +
                    if self.delimiter is not None:
         | 
| 115 | 
            +
                        if self.delimiter.startswith("\\"):
         | 
| 116 | 
            +
                            self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    return self.delimiter
         | 
| 119 | 
            +
             | 
| 110 120 | 
             
                def parse(
         | 
| 111 121 | 
             
                    self,
         | 
| 112 122 | 
             
                    data: BufferedIOBase,
         | 
| @@ -114,9 +124,11 @@ class CsvParser(Parser): | |
| 114 124 | 
             
                    """
         | 
| 115 125 | 
             
                    Parse CSV data from decompressed bytes.
         | 
| 116 126 | 
             
                    """
         | 
| 117 | 
            -
                     | 
| 118 | 
            -
                     | 
| 119 | 
            -
                     | 
| 127 | 
            +
                    bytes_data = BytesIO(data.read())
         | 
| 128 | 
            +
                    text_data = TextIOWrapper(bytes_data, encoding=self.encoding)  # type: ignore
         | 
| 129 | 
            +
                    reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
         | 
| 130 | 
            +
                    for row in reader:
         | 
| 131 | 
            +
                        yield row
         | 
| 120 132 |  | 
| 121 133 |  | 
| 122 134 | 
             
            @dataclass
         | 
| @@ -136,6 +136,7 @@ class ResponseToFileExtractor(RecordExtractor): | |
| 136 136 | 
             
                    """
         | 
| 137 137 |  | 
| 138 138 | 
             
                    try:
         | 
| 139 | 
            +
                        # TODO: Add support for other file types, like `json`, with `pd.read_json()`
         | 
| 139 140 | 
             
                        with open(path, "r", encoding=file_encoding) as data:
         | 
| 140 141 | 
             
                            chunks = pd.read_csv(
         | 
| 141 142 | 
             
                                data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
         | 
| @@ -95,6 +95,10 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 95 95 | 
             
                    # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
         | 
| 96 96 | 
             
                    self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
         | 
| 97 97 | 
             
                    self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    # Parent-state tracking: store each partition’s parent state in creation order
         | 
| 100 | 
            +
                    self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
         | 
| 101 | 
            +
             | 
| 98 102 | 
             
                    self._finished_partitions: set[str] = set()
         | 
| 99 103 | 
             
                    self._lock = threading.Lock()
         | 
| 100 104 | 
             
                    self._timer = Timer()
         | 
| @@ -155,11 +159,62 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 155 159 | 
             
                                and self._semaphore_per_partition[partition_key]._value == 0
         | 
| 156 160 | 
             
                            ):
         | 
| 157 161 | 
             
                                self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
         | 
| 158 | 
            -
             | 
| 162 | 
            +
             | 
| 163 | 
            +
                        self._check_and_update_parent_state()
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                        self._emit_state_message()
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                def _check_and_update_parent_state(self) -> None:
         | 
| 168 | 
            +
                    """
         | 
| 169 | 
            +
                    Pop the leftmost partition state from _partition_parent_state_map only if
         | 
| 170 | 
            +
                    *all partitions* up to (and including) that partition key in _semaphore_per_partition
         | 
| 171 | 
            +
                    are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
         | 
| 172 | 
            +
                    Additionally, delete finished semaphores with a value of 0 to free up memory,
         | 
| 173 | 
            +
                    as they are only needed to track errors and completion status.
         | 
| 174 | 
            +
                    """
         | 
| 175 | 
            +
                    last_closed_state = None
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    while self._partition_parent_state_map:
         | 
| 178 | 
            +
                        # Look at the earliest partition key in creation order
         | 
| 179 | 
            +
                        earliest_key = next(iter(self._partition_parent_state_map))
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                        # Verify ALL partitions from the left up to earliest_key are finished
         | 
| 182 | 
            +
                        all_left_finished = True
         | 
| 183 | 
            +
                        for p_key, sem in list(
         | 
| 184 | 
            +
                            self._semaphore_per_partition.items()
         | 
| 185 | 
            +
                        ):  # Use list to allow modification during iteration
         | 
| 186 | 
            +
                            # If any earlier partition is still not finished, we must stop
         | 
| 187 | 
            +
                            if p_key not in self._finished_partitions or sem._value != 0:
         | 
| 188 | 
            +
                                all_left_finished = False
         | 
| 189 | 
            +
                                break
         | 
| 190 | 
            +
                            # Once we've reached earliest_key in the semaphore order, we can stop checking
         | 
| 191 | 
            +
                            if p_key == earliest_key:
         | 
| 192 | 
            +
                                break
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                        # If the partitions up to earliest_key are not all finished, break the while-loop
         | 
| 195 | 
            +
                        if not all_left_finished:
         | 
| 196 | 
            +
                            break
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                        # Pop the leftmost entry from parent-state map
         | 
| 199 | 
            +
                        _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
         | 
| 200 | 
            +
                        last_closed_state = closed_parent_state
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                        # Clean up finished semaphores with value 0 up to and including earliest_key
         | 
| 203 | 
            +
                        for p_key in list(self._semaphore_per_partition.keys()):
         | 
| 204 | 
            +
                            sem = self._semaphore_per_partition[p_key]
         | 
| 205 | 
            +
                            if p_key in self._finished_partitions and sem._value == 0:
         | 
| 206 | 
            +
                                del self._semaphore_per_partition[p_key]
         | 
| 207 | 
            +
                                logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
         | 
| 208 | 
            +
                            if p_key == earliest_key:
         | 
| 209 | 
            +
                                break
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                    # Update _parent_state if we popped at least one partition
         | 
| 212 | 
            +
                    if last_closed_state is not None:
         | 
| 213 | 
            +
                        self._parent_state = last_closed_state
         | 
| 159 214 |  | 
| 160 215 | 
             
                def ensure_at_least_one_state_emitted(self) -> None:
         | 
| 161 216 | 
             
                    """
         | 
| 162 | 
            -
                    The platform  | 
| 217 | 
            +
                    The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
         | 
| 163 218 | 
             
                    called.
         | 
| 164 219 | 
             
                    """
         | 
| 165 220 | 
             
                    if not any(
         | 
| @@ -201,13 +256,19 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 201 256 |  | 
| 202 257 | 
             
                    slices = self._partition_router.stream_slices()
         | 
| 203 258 | 
             
                    self._timer.start()
         | 
| 204 | 
            -
                    for partition in  | 
| 205 | 
            -
                         | 
| 259 | 
            +
                    for partition, last, parent_state in iterate_with_last_flag_and_state(
         | 
| 260 | 
            +
                        slices, self._partition_router.get_stream_state
         | 
| 261 | 
            +
                    ):
         | 
| 262 | 
            +
                        yield from self._generate_slices_from_partition(partition, parent_state)
         | 
| 206 263 |  | 
| 207 | 
            -
                def _generate_slices_from_partition( | 
| 264 | 
            +
                def _generate_slices_from_partition(
         | 
| 265 | 
            +
                    self, partition: StreamSlice, parent_state: Mapping[str, Any]
         | 
| 266 | 
            +
                ) -> Iterable[StreamSlice]:
         | 
| 208 267 | 
             
                    # Ensure the maximum number of partitions is not exceeded
         | 
| 209 268 | 
             
                    self._ensure_partition_limit()
         | 
| 210 269 |  | 
| 270 | 
            +
                    partition_key = self._to_partition_key(partition.partition)
         | 
| 271 | 
            +
             | 
| 211 272 | 
             
                    cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
         | 
| 212 273 | 
             
                    if not cursor:
         | 
| 213 274 | 
             
                        cursor = self._create_cursor(
         | 
| @@ -216,18 +277,26 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 216 277 | 
             
                        )
         | 
| 217 278 | 
             
                        with self._lock:
         | 
| 218 279 | 
             
                            self._number_of_partitions += 1
         | 
| 219 | 
            -
                            self._cursor_per_partition[ | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 280 | 
            +
                            self._cursor_per_partition[partition_key] = cursor
         | 
| 281 | 
            +
                    self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                    with self._lock:
         | 
| 284 | 
            +
                        if (
         | 
| 285 | 
            +
                            len(self._partition_parent_state_map) == 0
         | 
| 286 | 
            +
                            or self._partition_parent_state_map[
         | 
| 287 | 
            +
                                next(reversed(self._partition_parent_state_map))
         | 
| 288 | 
            +
                            ]
         | 
| 289 | 
            +
                            != parent_state
         | 
| 290 | 
            +
                        ):
         | 
| 291 | 
            +
                            self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
         | 
| 223 292 |  | 
| 224 293 | 
             
                    for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
         | 
| 225 294 | 
             
                        cursor.stream_slices(),
         | 
| 226 295 | 
             
                        lambda: None,
         | 
| 227 296 | 
             
                    ):
         | 
| 228 | 
            -
                        self._semaphore_per_partition[ | 
| 297 | 
            +
                        self._semaphore_per_partition[partition_key].release()
         | 
| 229 298 | 
             
                        if is_last_slice:
         | 
| 230 | 
            -
                            self._finished_partitions.add( | 
| 299 | 
            +
                            self._finished_partitions.add(partition_key)
         | 
| 231 300 | 
             
                        yield StreamSlice(
         | 
| 232 301 | 
             
                            partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
         | 
| 233 302 | 
             
                        )
         | 
| @@ -257,9 +326,9 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 257 326 | 
             
                        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
         | 
| 258 327 | 
             
                            # Try removing finished partitions first
         | 
| 259 328 | 
             
                            for partition_key in list(self._cursor_per_partition.keys()):
         | 
| 260 | 
            -
                                if (
         | 
| 261 | 
            -
                                    partition_key in self. | 
| 262 | 
            -
                                     | 
| 329 | 
            +
                                if partition_key in self._finished_partitions and (
         | 
| 330 | 
            +
                                    partition_key not in self._semaphore_per_partition
         | 
| 331 | 
            +
                                    or self._semaphore_per_partition[partition_key]._value == 0
         | 
| 263 332 | 
             
                                ):
         | 
| 264 333 | 
             
                                    oldest_partition = self._cursor_per_partition.pop(
         | 
| 265 334 | 
             
                                        partition_key
         | 
| @@ -338,9 +407,6 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 338 407 | 
             
                            self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
         | 
| 339 408 | 
             
                                self._create_cursor(state["cursor"])
         | 
| 340 409 | 
             
                            )
         | 
| 341 | 
            -
                            self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
         | 
| 342 | 
            -
                                threading.Semaphore(0)
         | 
| 343 | 
            -
                            )
         | 
| 344 410 |  | 
| 345 411 | 
             
                        # set default state for missing partitions if it is per partition with fallback to global
         | 
| 346 412 | 
             
                        if self._GLOBAL_STATE_KEY in stream_state:
         |