airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-cdk might be problematic. Click here for more details.
- airbyte_cdk/connector_builder/models.py +16 -14
- airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
- airbyte_cdk/connector_builder/test_reader/types.py +9 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -7
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +67 -46
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +30 -45
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +171 -70
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +60 -17
- airbyte_cdk/sources/declarative/requesters/http_requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/http_logger.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/RECORD +31 -31
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/entry_points.txt +0 -0
| @@ -777,6 +777,44 @@ definitions: | |
| 777 777 | 
             
                  type:
         | 
| 778 778 | 
             
                    type: string
         | 
| 779 779 | 
             
                    enum: [LegacyToPerPartitionStateMigration]
         | 
| 780 | 
            +
              IncrementingCountCursor:
         | 
| 781 | 
            +
                title: Incrementing Count Cursor
         | 
| 782 | 
            +
                description: Cursor that allows for incremental sync according to a continuously increasing integer.
         | 
| 783 | 
            +
                type: object
         | 
| 784 | 
            +
                required:
         | 
| 785 | 
            +
                  - type
         | 
| 786 | 
            +
                  - cursor_field
         | 
| 787 | 
            +
                properties:
         | 
| 788 | 
            +
                  type:
         | 
| 789 | 
            +
                    type: string
         | 
| 790 | 
            +
                    enum: [IncrementingCountCursor]
         | 
| 791 | 
            +
                  cursor_field:
         | 
| 792 | 
            +
                    title: Cursor Field
         | 
| 793 | 
            +
                    description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
         | 
| 794 | 
            +
                    type: string
         | 
| 795 | 
            +
                    interpolation_context:
         | 
| 796 | 
            +
                      - config
         | 
| 797 | 
            +
                    examples:
         | 
| 798 | 
            +
                      - "created_at"
         | 
| 799 | 
            +
                      - "{{ config['record_cursor'] }}"
         | 
| 800 | 
            +
                  start_value:
         | 
| 801 | 
            +
                    title: Start Value
         | 
| 802 | 
            +
                    description: The value that determines the earliest record that should be synced.
         | 
| 803 | 
            +
                    anyOf:
         | 
| 804 | 
            +
                      - type: string
         | 
| 805 | 
            +
                      - type: integer
         | 
| 806 | 
            +
                    interpolation_context:
         | 
| 807 | 
            +
                      - config
         | 
| 808 | 
            +
                    examples:
         | 
| 809 | 
            +
                      - 0
         | 
| 810 | 
            +
                      - "{{ config['start_value'] }}"
         | 
| 811 | 
            +
                  start_value_option:
         | 
| 812 | 
            +
                    title: Inject Start Value Into Outgoing HTTP Request
         | 
| 813 | 
            +
                    description: Optionally configures how the start value will be sent in requests to the source API.
         | 
| 814 | 
            +
                    "$ref": "#/definitions/RequestOption"
         | 
| 815 | 
            +
                  $parameters:
         | 
| 816 | 
            +
                    type: object
         | 
| 817 | 
            +
                    additionalProperties: true
         | 
| 780 818 | 
             
              DatetimeBasedCursor:
         | 
| 781 819 | 
             
                title: Datetime Based Cursor
         | 
| 782 820 | 
             
                description: Cursor to provide incremental capabilities over datetime.
         | 
| @@ -844,6 +882,7 @@ definitions: | |
| 844 882 | 
             
                        * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
         | 
| 845 883 | 
             
                        * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
         | 
| 846 884 | 
             
                        * **%f**: Microsecond (zero-padded to 6 digits) - `000000`
         | 
| 885 | 
            +
                        * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`
         | 
| 847 886 | 
             
                        * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
         | 
| 848 887 | 
             
                        * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
         | 
| 849 888 | 
             
                        * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
         | 
| @@ -1318,6 +1357,7 @@ definitions: | |
| 1318 1357 | 
             
                    anyOf:
         | 
| 1319 1358 | 
             
                      - "$ref": "#/definitions/CustomIncrementalSync"
         | 
| 1320 1359 | 
             
                      - "$ref": "#/definitions/DatetimeBasedCursor"
         | 
| 1360 | 
            +
                      - "$ref": "#/definitions/IncrementingCountCursor"
         | 
| 1321 1361 | 
             
                  name:
         | 
| 1322 1362 | 
             
                    title: Name
         | 
| 1323 1363 | 
             
                    description: The stream name.
         | 
| @@ -1490,7 +1530,11 @@ definitions: | |
| 1490 1530 | 
             
                  limit:
         | 
| 1491 1531 | 
             
                    title: Limit
         | 
| 1492 1532 | 
             
                    description: The maximum number of calls allowed within the interval.
         | 
| 1493 | 
            -
                     | 
| 1533 | 
            +
                    anyOf:
         | 
| 1534 | 
            +
                      - type: integer
         | 
| 1535 | 
            +
                      - type: string
         | 
| 1536 | 
            +
                    interpolation_context:
         | 
| 1537 | 
            +
                      - config
         | 
| 1494 1538 | 
             
                  interval:
         | 
| 1495 1539 | 
             
                    title: Interval
         | 
| 1496 1540 | 
             
                    description: The time interval for the rate limit.
         | 
| @@ -1775,6 +1819,9 @@ definitions: | |
| 1775 1819 | 
             
                      - stream_interval
         | 
| 1776 1820 | 
             
                      - stream_partition
         | 
| 1777 1821 | 
             
                      - stream_slice
         | 
| 1822 | 
            +
                      - creation_response
         | 
| 1823 | 
            +
                      - polling_response
         | 
| 1824 | 
            +
                      - download_target
         | 
| 1778 1825 | 
             
                    examples:
         | 
| 1779 1826 | 
             
                      - "/products"
         | 
| 1780 1827 | 
             
                      - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
         | 
| @@ -2394,6 +2441,7 @@ definitions: | |
| 2394 2441 | 
             
                        * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
         | 
| 2395 2442 | 
             
                        * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
         | 
| 2396 2443 | 
             
                        * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
         | 
| 2444 | 
            +
                        * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`
         | 
| 2397 2445 | 
             
                        * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
         | 
| 2398 2446 | 
             
                        * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
         | 
| 2399 2447 | 
             
                        * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
         | 
| @@ -3130,14 +3178,12 @@ definitions: | |
| 3130 3178 | 
             
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3131 3179 | 
             
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3132 3180 | 
             
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3133 | 
            -
                      - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3134 3181 | 
             
                      - type: array
         | 
| 3135 3182 | 
             
                        items:
         | 
| 3136 3183 | 
             
                          anyOf:
         | 
| 3137 3184 | 
             
                            - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3138 3185 | 
             
                            - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3139 3186 | 
             
                            - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3140 | 
            -
                            - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3141 3187 | 
             
                  decoder:
         | 
| 3142 3188 | 
             
                    title: Decoder
         | 
| 3143 3189 | 
             
                    description: Component decoding the response so records can be extracted.
         | 
| @@ -3221,7 +3267,7 @@ definitions: | |
| 3221 3267 | 
             
                  - polling_requester
         | 
| 3222 3268 | 
             
                  - download_requester
         | 
| 3223 3269 | 
             
                  - status_extractor
         | 
| 3224 | 
            -
                  -  | 
| 3270 | 
            +
                  - download_target_extractor
         | 
| 3225 3271 | 
             
                properties:
         | 
| 3226 3272 | 
             
                  type:
         | 
| 3227 3273 | 
             
                    type: string
         | 
| @@ -3238,7 +3284,7 @@ definitions: | |
| 3238 3284 | 
             
                    anyOf:
         | 
| 3239 3285 | 
             
                      - "$ref": "#/definitions/CustomRecordExtractor"
         | 
| 3240 3286 | 
             
                      - "$ref": "#/definitions/DpathExtractor"
         | 
| 3241 | 
            -
                   | 
| 3287 | 
            +
                  download_target_extractor:
         | 
| 3242 3288 | 
             
                    description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
         | 
| 3243 3289 | 
             
                    anyOf:
         | 
| 3244 3290 | 
             
                      - "$ref": "#/definitions/CustomRecordExtractor"
         | 
| @@ -3259,7 +3305,7 @@ definitions: | |
| 3259 3305 | 
             
                    anyOf:
         | 
| 3260 3306 | 
             
                      - "$ref": "#/definitions/CustomRequester"
         | 
| 3261 3307 | 
             
                      - "$ref": "#/definitions/HttpRequester"
         | 
| 3262 | 
            -
                   | 
| 3308 | 
            +
                  download_target_requester:
         | 
| 3263 3309 | 
             
                    description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
         | 
| 3264 3310 | 
             
                    anyOf:
         | 
| 3265 3311 | 
             
                      - "$ref": "#/definitions/CustomRequester"
         | 
| @@ -3292,14 +3338,12 @@ definitions: | |
| 3292 3338 | 
             
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3293 3339 | 
             
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3294 3340 | 
             
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3295 | 
            -
                      - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3296 3341 | 
             
                      - type: array
         | 
| 3297 3342 | 
             
                        items:
         | 
| 3298 3343 | 
             
                          anyOf:
         | 
| 3299 3344 | 
             
                            - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3300 3345 | 
             
                            - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3301 3346 | 
             
                            - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3302 | 
            -
                            - "$ref": "#/definitions/GroupingPartitionRouter"
         | 
| 3303 3347 | 
             
                  decoder:
         | 
| 3304 3348 | 
             
                    title: Decoder
         | 
| 3305 3349 | 
             
                    description: Component decoding the response so records can be extracted.
         | 
| @@ -3416,44 +3460,6 @@ definitions: | |
| 3416 3460 | 
             
                  $parameters:
         | 
| 3417 3461 | 
             
                    type: object
         | 
| 3418 3462 | 
             
                    additionalProperties: true
         | 
| 3419 | 
            -
              GroupingPartitionRouter:
         | 
| 3420 | 
            -
                title: Grouping Partition Router
         | 
| 3421 | 
            -
                description: >
         | 
| 3422 | 
            -
                  A decorator on top of a partition router that groups partitions into batches of a specified size.
         | 
| 3423 | 
            -
                  This is useful for APIs that support filtering by multiple partition keys in a single request.
         | 
| 3424 | 
            -
                  Note that per-partition incremental syncs may not work as expected because the grouping
         | 
| 3425 | 
            -
                  of partitions might change between syncs, potentially leading to inconsistent state tracking.
         | 
| 3426 | 
            -
                type: object
         | 
| 3427 | 
            -
                required:
         | 
| 3428 | 
            -
                  - type
         | 
| 3429 | 
            -
                  - group_size
         | 
| 3430 | 
            -
                  - underlying_partition_router
         | 
| 3431 | 
            -
                properties:
         | 
| 3432 | 
            -
                  type:
         | 
| 3433 | 
            -
                    type: string
         | 
| 3434 | 
            -
                    enum: [GroupingPartitionRouter]
         | 
| 3435 | 
            -
                  group_size:
         | 
| 3436 | 
            -
                    title: Group Size
         | 
| 3437 | 
            -
                    description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
         | 
| 3438 | 
            -
                    type: integer
         | 
| 3439 | 
            -
                    examples:
         | 
| 3440 | 
            -
                      - 10
         | 
| 3441 | 
            -
                      - 50
         | 
| 3442 | 
            -
                  underlying_partition_router:
         | 
| 3443 | 
            -
                    title: Underlying Partition Router
         | 
| 3444 | 
            -
                    description: The partition router whose output will be grouped. This can be any valid partition router component.
         | 
| 3445 | 
            -
                    anyOf:
         | 
| 3446 | 
            -
                      - "$ref": "#/definitions/CustomPartitionRouter"
         | 
| 3447 | 
            -
                      - "$ref": "#/definitions/ListPartitionRouter"
         | 
| 3448 | 
            -
                      - "$ref": "#/definitions/SubstreamPartitionRouter"
         | 
| 3449 | 
            -
                  deduplicate:
         | 
| 3450 | 
            -
                    title: Deduplicate Partitions
         | 
| 3451 | 
            -
                    description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
         | 
| 3452 | 
            -
                    type: boolean
         | 
| 3453 | 
            -
                    default: true
         | 
| 3454 | 
            -
                  $parameters:
         | 
| 3455 | 
            -
                    type: object
         | 
| 3456 | 
            -
                    additionalProperties: true
         | 
| 3457 3463 | 
             
              WaitUntilTimeFromHeader:
         | 
| 3458 3464 | 
             
                title: Wait Until Time Defined In Response Header
         | 
| 3459 3465 | 
             
                description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
         | 
| @@ -3705,6 +3711,21 @@ interpolation: | |
| 3705 3711 | 
             
                        self: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=
         | 
| 3706 3712 | 
             
                        next: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2
         | 
| 3707 3713 | 
             
                        count: 82
         | 
| 3714 | 
            +
                - title: creation_response
         | 
| 3715 | 
            +
                  description: The response received from the creation_requester in the AsyncRetriever component.
         | 
| 3716 | 
            +
                  type: object
         | 
| 3717 | 
            +
                  examples:
         | 
| 3718 | 
            +
                    - id: "1234"
         | 
| 3719 | 
            +
                - title: polling_response
         | 
| 3720 | 
            +
                  description: The response received from the polling_requester in the AsyncRetriever component.
         | 
| 3721 | 
            +
                  type: object
         | 
| 3722 | 
            +
                  examples:
         | 
| 3723 | 
            +
                    - id: "1234"
         | 
| 3724 | 
            +
                - title: download_target
         | 
| 3725 | 
            +
                  description: The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.
         | 
| 3726 | 
            +
                  type: string
         | 
| 3727 | 
            +
                  examples:
         | 
| 3728 | 
            +
                    - "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv"
         | 
| 3708 3729 | 
             
                - title: stream_interval
         | 
| 3709 3730 | 
             
                  description: The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.
         | 
| 3710 3731 | 
             
                  type: object
         | 
| @@ -107,6 +107,16 @@ class CsvParser(Parser): | |
| 107 107 | 
             
                encoding: Optional[str] = "utf-8"
         | 
| 108 108 | 
             
                delimiter: Optional[str] = ","
         | 
| 109 109 |  | 
| 110 | 
            +
                def _get_delimiter(self) -> Optional[str]:
         | 
| 111 | 
            +
                    """
         | 
| 112 | 
            +
                    Get delimiter from the configuration. Check for the escape character and decode it.
         | 
| 113 | 
            +
                    """
         | 
| 114 | 
            +
                    if self.delimiter is not None:
         | 
| 115 | 
            +
                        if self.delimiter.startswith("\\"):
         | 
| 116 | 
            +
                            self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    return self.delimiter
         | 
| 119 | 
            +
             | 
| 110 120 | 
             
                def parse(
         | 
| 111 121 | 
             
                    self,
         | 
| 112 122 | 
             
                    data: BufferedIOBase,
         | 
| @@ -115,8 +125,9 @@ class CsvParser(Parser): | |
| 115 125 | 
             
                    Parse CSV data from decompressed bytes.
         | 
| 116 126 | 
             
                    """
         | 
| 117 127 | 
             
                    text_data = TextIOWrapper(data, encoding=self.encoding)  # type: ignore
         | 
| 118 | 
            -
                    reader = csv.DictReader(text_data, delimiter=self. | 
| 119 | 
            -
                     | 
| 128 | 
            +
                    reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
         | 
| 129 | 
            +
                    for row in reader:
         | 
| 130 | 
            +
                        yield row
         | 
| 120 131 |  | 
| 121 132 |  | 
| 122 133 | 
             
            @dataclass
         | 
| @@ -136,6 +136,7 @@ class ResponseToFileExtractor(RecordExtractor): | |
| 136 136 | 
             
                    """
         | 
| 137 137 |  | 
| 138 138 | 
             
                    try:
         | 
| 139 | 
            +
                        # TODO: Add support for other file types, like `json`, with `pd.read_json()`
         | 
| 139 140 | 
             
                        with open(path, "r", encoding=file_encoding) as data:
         | 
| 140 141 | 
             
                            chunks = pd.read_csv(
         | 
| 141 142 | 
             
                                data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
         | 
| @@ -95,6 +95,10 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 95 95 | 
             
                    # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
         | 
| 96 96 | 
             
                    self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
         | 
| 97 97 | 
             
                    self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    # Parent-state tracking: store each partition’s parent state in creation order
         | 
| 100 | 
            +
                    self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
         | 
| 101 | 
            +
             | 
| 98 102 | 
             
                    self._finished_partitions: set[str] = set()
         | 
| 99 103 | 
             
                    self._lock = threading.Lock()
         | 
| 100 104 | 
             
                    self._timer = Timer()
         | 
| @@ -155,11 +159,62 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 155 159 | 
             
                                and self._semaphore_per_partition[partition_key]._value == 0
         | 
| 156 160 | 
             
                            ):
         | 
| 157 161 | 
             
                                self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
         | 
| 158 | 
            -
             | 
| 162 | 
            +
             | 
| 163 | 
            +
                        self._check_and_update_parent_state()
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                        self._emit_state_message()
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                def _check_and_update_parent_state(self) -> None:
         | 
| 168 | 
            +
                    """
         | 
| 169 | 
            +
                    Pop the leftmost partition state from _partition_parent_state_map only if
         | 
| 170 | 
            +
                    *all partitions* up to (and including) that partition key in _semaphore_per_partition
         | 
| 171 | 
            +
                    are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
         | 
| 172 | 
            +
                    Additionally, delete finished semaphores with a value of 0 to free up memory,
         | 
| 173 | 
            +
                    as they are only needed to track errors and completion status.
         | 
| 174 | 
            +
                    """
         | 
| 175 | 
            +
                    last_closed_state = None
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    while self._partition_parent_state_map:
         | 
| 178 | 
            +
                        # Look at the earliest partition key in creation order
         | 
| 179 | 
            +
                        earliest_key = next(iter(self._partition_parent_state_map))
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                        # Verify ALL partitions from the left up to earliest_key are finished
         | 
| 182 | 
            +
                        all_left_finished = True
         | 
| 183 | 
            +
                        for p_key, sem in list(
         | 
| 184 | 
            +
                            self._semaphore_per_partition.items()
         | 
| 185 | 
            +
                        ):  # Use list to allow modification during iteration
         | 
| 186 | 
            +
                            # If any earlier partition is still not finished, we must stop
         | 
| 187 | 
            +
                            if p_key not in self._finished_partitions or sem._value != 0:
         | 
| 188 | 
            +
                                all_left_finished = False
         | 
| 189 | 
            +
                                break
         | 
| 190 | 
            +
                            # Once we've reached earliest_key in the semaphore order, we can stop checking
         | 
| 191 | 
            +
                            if p_key == earliest_key:
         | 
| 192 | 
            +
                                break
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                        # If the partitions up to earliest_key are not all finished, break the while-loop
         | 
| 195 | 
            +
                        if not all_left_finished:
         | 
| 196 | 
            +
                            break
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                        # Pop the leftmost entry from parent-state map
         | 
| 199 | 
            +
                        _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
         | 
| 200 | 
            +
                        last_closed_state = closed_parent_state
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                        # Clean up finished semaphores with value 0 up to and including earliest_key
         | 
| 203 | 
            +
                        for p_key in list(self._semaphore_per_partition.keys()):
         | 
| 204 | 
            +
                            sem = self._semaphore_per_partition[p_key]
         | 
| 205 | 
            +
                            if p_key in self._finished_partitions and sem._value == 0:
         | 
| 206 | 
            +
                                del self._semaphore_per_partition[p_key]
         | 
| 207 | 
            +
                                logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
         | 
| 208 | 
            +
                            if p_key == earliest_key:
         | 
| 209 | 
            +
                                break
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                    # Update _parent_state if we popped at least one partition
         | 
| 212 | 
            +
                    if last_closed_state is not None:
         | 
| 213 | 
            +
                        self._parent_state = last_closed_state
         | 
| 159 214 |  | 
| 160 215 | 
             
                def ensure_at_least_one_state_emitted(self) -> None:
         | 
| 161 216 | 
             
                    """
         | 
| 162 | 
            -
                    The platform  | 
| 217 | 
            +
                    The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
         | 
| 163 218 | 
             
                    called.
         | 
| 164 219 | 
             
                    """
         | 
| 165 220 | 
             
                    if not any(
         | 
| @@ -201,13 +256,19 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 201 256 |  | 
| 202 257 | 
             
                    slices = self._partition_router.stream_slices()
         | 
| 203 258 | 
             
                    self._timer.start()
         | 
| 204 | 
            -
                    for partition in  | 
| 205 | 
            -
                         | 
| 259 | 
            +
                    for partition, last, parent_state in iterate_with_last_flag_and_state(
         | 
| 260 | 
            +
                        slices, self._partition_router.get_stream_state
         | 
| 261 | 
            +
                    ):
         | 
| 262 | 
            +
                        yield from self._generate_slices_from_partition(partition, parent_state)
         | 
| 206 263 |  | 
| 207 | 
            -
                def _generate_slices_from_partition( | 
| 264 | 
            +
                def _generate_slices_from_partition(
         | 
| 265 | 
            +
                    self, partition: StreamSlice, parent_state: Mapping[str, Any]
         | 
| 266 | 
            +
                ) -> Iterable[StreamSlice]:
         | 
| 208 267 | 
             
                    # Ensure the maximum number of partitions is not exceeded
         | 
| 209 268 | 
             
                    self._ensure_partition_limit()
         | 
| 210 269 |  | 
| 270 | 
            +
                    partition_key = self._to_partition_key(partition.partition)
         | 
| 271 | 
            +
             | 
| 211 272 | 
             
                    cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
         | 
| 212 273 | 
             
                    if not cursor:
         | 
| 213 274 | 
             
                        cursor = self._create_cursor(
         | 
| @@ -216,18 +277,26 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 216 277 | 
             
                        )
         | 
| 217 278 | 
             
                        with self._lock:
         | 
| 218 279 | 
             
                            self._number_of_partitions += 1
         | 
| 219 | 
            -
                            self._cursor_per_partition[ | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 280 | 
            +
                            self._cursor_per_partition[partition_key] = cursor
         | 
| 281 | 
            +
                    self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                    with self._lock:
         | 
| 284 | 
            +
                        if (
         | 
| 285 | 
            +
                            len(self._partition_parent_state_map) == 0
         | 
| 286 | 
            +
                            or self._partition_parent_state_map[
         | 
| 287 | 
            +
                                next(reversed(self._partition_parent_state_map))
         | 
| 288 | 
            +
                            ]
         | 
| 289 | 
            +
                            != parent_state
         | 
| 290 | 
            +
                        ):
         | 
| 291 | 
            +
                            self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
         | 
| 223 292 |  | 
| 224 293 | 
             
                    for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
         | 
| 225 294 | 
             
                        cursor.stream_slices(),
         | 
| 226 295 | 
             
                        lambda: None,
         | 
| 227 296 | 
             
                    ):
         | 
| 228 | 
            -
                        self._semaphore_per_partition[ | 
| 297 | 
            +
                        self._semaphore_per_partition[partition_key].release()
         | 
| 229 298 | 
             
                        if is_last_slice:
         | 
| 230 | 
            -
                            self._finished_partitions.add( | 
| 299 | 
            +
                            self._finished_partitions.add(partition_key)
         | 
| 231 300 | 
             
                        yield StreamSlice(
         | 
| 232 301 | 
             
                            partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
         | 
| 233 302 | 
             
                        )
         | 
| @@ -257,9 +326,9 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 257 326 | 
             
                        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
         | 
| 258 327 | 
             
                            # Try removing finished partitions first
         | 
| 259 328 | 
             
                            for partition_key in list(self._cursor_per_partition.keys()):
         | 
| 260 | 
            -
                                if (
         | 
| 261 | 
            -
                                    partition_key in self. | 
| 262 | 
            -
                                     | 
| 329 | 
            +
                                if partition_key in self._finished_partitions and (
         | 
| 330 | 
            +
                                    partition_key not in self._semaphore_per_partition
         | 
| 331 | 
            +
                                    or self._semaphore_per_partition[partition_key]._value == 0
         | 
| 263 332 | 
             
                                ):
         | 
| 264 333 | 
             
                                    oldest_partition = self._cursor_per_partition.pop(
         | 
| 265 334 | 
             
                                        partition_key
         | 
| @@ -338,9 +407,6 @@ class ConcurrentPerPartitionCursor(Cursor): | |
| 338 407 | 
             
                            self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
         | 
| 339 408 | 
             
                                self._create_cursor(state["cursor"])
         | 
| 340 409 | 
             
                            )
         | 
| 341 | 
            -
                            self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
         | 
| 342 | 
            -
                                threading.Semaphore(0)
         | 
| 343 | 
            -
                            )
         | 
| 344 410 |  | 
| 345 411 | 
             
                        # set default state for missing partitions if it is per partition with fallback to global
         | 
| 346 412 | 
             
                        if self._GLOBAL_STATE_KEY in stream_state:
         | 
| @@ -646,7 +646,7 @@ class Rate(BaseModel): | |
| 646 646 | 
             
                class Config:
         | 
| 647 647 | 
             
                    extra = Extra.allow
         | 
| 648 648 |  | 
| 649 | 
            -
                limit: int = Field(
         | 
| 649 | 
            +
                limit: Union[int, str] = Field(
         | 
| 650 650 | 
             
                    ...,
         | 
| 651 651 | 
             
                    description="The maximum number of calls allowed within the interval.",
         | 
| 652 652 | 
             
                    title="Limit",
         | 
| @@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel): | |
| 1508 1508 | 
             
                oauth_config_specification: Optional[OAuthConfigSpecification] = None
         | 
| 1509 1509 |  | 
| 1510 1510 |  | 
| 1511 | 
            +
            class IncrementingCountCursor(BaseModel):
         | 
| 1512 | 
            +
                type: Literal["IncrementingCountCursor"]
         | 
| 1513 | 
            +
                cursor_field: str = Field(
         | 
| 1514 | 
            +
                    ...,
         | 
| 1515 | 
            +
                    description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
         | 
| 1516 | 
            +
                    examples=["created_at", "{{ config['record_cursor'] }}"],
         | 
| 1517 | 
            +
                    title="Cursor Field",
         | 
| 1518 | 
            +
                )
         | 
| 1519 | 
            +
                start_value: Optional[Union[str, int]] = Field(
         | 
| 1520 | 
            +
                    None,
         | 
| 1521 | 
            +
                    description="The value that determines the earliest record that should be synced.",
         | 
| 1522 | 
            +
                    examples=[0, "{{ config['start_value'] }}"],
         | 
| 1523 | 
            +
                    title="Start Value",
         | 
| 1524 | 
            +
                )
         | 
| 1525 | 
            +
                start_value_option: Optional[RequestOption] = Field(
         | 
| 1526 | 
            +
                    None,
         | 
| 1527 | 
            +
                    description="Optionally configures how the start value will be sent in requests to the source API.",
         | 
| 1528 | 
            +
                    title="Inject Start Value Into Outgoing HTTP Request",
         | 
| 1529 | 
            +
                )
         | 
| 1530 | 
            +
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 1531 | 
            +
             | 
| 1532 | 
            +
             | 
| 1511 1533 | 
             
            class DatetimeBasedCursor(BaseModel):
         | 
| 1512 1534 | 
             
                type: Literal["DatetimeBasedCursor"]
         | 
| 1513 1535 | 
             
                clamping: Optional[Clamping] = Field(
         | 
| @@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel): | |
| 1948 1970 | 
             
                    description="Component used to coordinate how records are extracted across stream slices and request pages.",
         | 
| 1949 1971 | 
             
                    title="Retriever",
         | 
| 1950 1972 | 
             
                )
         | 
| 1951 | 
            -
                incremental_sync: Optional[ | 
| 1973 | 
            +
                incremental_sync: Optional[
         | 
| 1974 | 
            +
                    Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
         | 
| 1975 | 
            +
                ] = Field(
         | 
| 1952 1976 | 
             
                    None,
         | 
| 1953 1977 | 
             
                    description="Component used to fetch data incrementally based on a time field in the data.",
         | 
| 1954 1978 | 
             
                    title="Incremental Sync",
         | 
| @@ -2225,15 +2249,7 @@ class SimpleRetriever(BaseModel): | |
| 2225 2249 | 
             
                        CustomPartitionRouter,
         | 
| 2226 2250 | 
             
                        ListPartitionRouter,
         | 
| 2227 2251 | 
             
                        SubstreamPartitionRouter,
         | 
| 2228 | 
            -
                         | 
| 2229 | 
            -
                        List[
         | 
| 2230 | 
            -
                            Union[
         | 
| 2231 | 
            -
                                CustomPartitionRouter,
         | 
| 2232 | 
            -
                                ListPartitionRouter,
         | 
| 2233 | 
            -
                                SubstreamPartitionRouter,
         | 
| 2234 | 
            -
                                GroupingPartitionRouter,
         | 
| 2235 | 
            -
                            ]
         | 
| 2236 | 
            -
                        ],
         | 
| 2252 | 
            +
                        List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
         | 
| 2237 2253 | 
             
                    ]
         | 
| 2238 2254 | 
             
                ] = Field(
         | 
| 2239 2255 | 
             
                    [],
         | 
| @@ -2271,7 +2287,7 @@ class AsyncRetriever(BaseModel): | |
| 2271 2287 | 
             
                status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
         | 
| 2272 2288 | 
             
                    ..., description="Responsible for fetching the actual status of the async job."
         | 
| 2273 2289 | 
             
                )
         | 
| 2274 | 
            -
                 | 
| 2290 | 
            +
                download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
         | 
| 2275 2291 | 
             
                    ...,
         | 
| 2276 2292 | 
             
                    description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
         | 
| 2277 2293 | 
             
                )
         | 
| @@ -2286,7 +2302,7 @@ class AsyncRetriever(BaseModel): | |
| 2286 2302 | 
             
                    ...,
         | 
| 2287 2303 | 
             
                    description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
         | 
| 2288 2304 | 
             
                )
         | 
| 2289 | 
            -
                 | 
| 2305 | 
            +
                download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
         | 
| 2290 2306 | 
             
                    None,
         | 
| 2291 2307 | 
             
                    description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
         | 
| 2292 2308 | 
             
                )
         | 
| @@ -2311,15 +2327,7 @@ class AsyncRetriever(BaseModel): | |
| 2311 2327 | 
             
                        CustomPartitionRouter,
         | 
| 2312 2328 | 
             
                        ListPartitionRouter,
         | 
| 2313 2329 | 
             
                        SubstreamPartitionRouter,
         | 
| 2314 | 
            -
                         | 
| 2315 | 
            -
                        List[
         | 
| 2316 | 
            -
                            Union[
         | 
| 2317 | 
            -
                                CustomPartitionRouter,
         | 
| 2318 | 
            -
                                ListPartitionRouter,
         | 
| 2319 | 
            -
                                SubstreamPartitionRouter,
         | 
| 2320 | 
            -
                                GroupingPartitionRouter,
         | 
| 2321 | 
            -
                            ]
         | 
| 2322 | 
            -
                        ],
         | 
| 2330 | 
            +
                        List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
         | 
| 2323 2331 | 
             
                    ]
         | 
| 2324 2332 | 
             
                ] = Field(
         | 
| 2325 2333 | 
             
                    [],
         | 
| @@ -2371,29 +2379,6 @@ class SubstreamPartitionRouter(BaseModel): | |
| 2371 2379 | 
             
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 2372 2380 |  | 
| 2373 2381 |  | 
| 2374 | 
            -
            class GroupingPartitionRouter(BaseModel):
         | 
| 2375 | 
            -
                type: Literal["GroupingPartitionRouter"]
         | 
| 2376 | 
            -
                group_size: int = Field(
         | 
| 2377 | 
            -
                    ...,
         | 
| 2378 | 
            -
                    description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
         | 
| 2379 | 
            -
                    examples=[10, 50],
         | 
| 2380 | 
            -
                    title="Group Size",
         | 
| 2381 | 
            -
                )
         | 
| 2382 | 
            -
                underlying_partition_router: Union[
         | 
| 2383 | 
            -
                    CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
         | 
| 2384 | 
            -
                ] = Field(
         | 
| 2385 | 
            -
                    ...,
         | 
| 2386 | 
            -
                    description="The partition router whose output will be grouped. This can be any valid partition router component.",
         | 
| 2387 | 
            -
                    title="Underlying Partition Router",
         | 
| 2388 | 
            -
                )
         | 
| 2389 | 
            -
                deduplicate: Optional[bool] = Field(
         | 
| 2390 | 
            -
                    True,
         | 
| 2391 | 
            -
                    description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
         | 
| 2392 | 
            -
                    title="Deduplicate Partitions",
         | 
| 2393 | 
            -
                )
         | 
| 2394 | 
            -
                parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
         | 
| 2395 | 
            -
             | 
| 2396 | 
            -
             | 
| 2397 2382 | 
             
            class HttpComponentsResolver(BaseModel):
         | 
| 2398 2383 | 
             
                type: Literal["HttpComponentsResolver"]
         | 
| 2399 2384 | 
             
                retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
         | 
| @@ -45,7 +45,7 @@ class AirbyteCustomCodeNotPermittedError(Exception): | |
| 45 45 | 
             
            def _hash_text(input_text: str, hash_type: str = "md5") -> str:
         | 
| 46 46 | 
             
                """Return the hash of the input text using the specified hash type."""
         | 
| 47 47 | 
             
                if not input_text:
         | 
| 48 | 
            -
                    raise ValueError(" | 
| 48 | 
            +
                    raise ValueError("Hash input text cannot be empty.")
         | 
| 49 49 |  | 
| 50 50 | 
             
                hash_object = CHECKSUM_FUNCTIONS[hash_type]()
         | 
| 51 51 | 
             
                hash_object.update(input_text.encode())
         | 
| @@ -68,6 +68,10 @@ def validate_python_code( | |
| 68 68 |  | 
| 69 69 | 
             
                Currently we fail if no checksums are provided, although this may change in the future.
         | 
| 70 70 | 
             
                """
         | 
| 71 | 
            +
                if not code_text:
         | 
| 72 | 
            +
                    # No code provided, nothing to validate.
         | 
| 73 | 
            +
                    return
         | 
| 74 | 
            +
             | 
| 71 75 | 
             
                if not checksums:
         | 
| 72 76 | 
             
                    raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
         | 
| 73 77 |  | 
| @@ -77,8 +81,18 @@ def validate_python_code( | |
| 77 81 | 
             
                            f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
         | 
| 78 82 | 
             
                        )
         | 
| 79 83 |  | 
| 80 | 
            -
                     | 
| 81 | 
            -
             | 
| 84 | 
            +
                    calculated_checksum = _hash_text(code_text, checksum_type)
         | 
| 85 | 
            +
                    if calculated_checksum != checksum:
         | 
| 86 | 
            +
                        raise AirbyteCodeTamperedError(
         | 
| 87 | 
            +
                            f"{checksum_type} checksum does not match."
         | 
| 88 | 
            +
                            + str(
         | 
| 89 | 
            +
                                {
         | 
| 90 | 
            +
                                    "expected_checksum": checksum,
         | 
| 91 | 
            +
                                    "actual_checksum": calculated_checksum,
         | 
| 92 | 
            +
                                    "code_text": code_text,
         | 
| 93 | 
            +
                                }
         | 
| 94 | 
            +
                            ),
         | 
| 95 | 
            +
                        )
         | 
| 82 96 |  | 
| 83 97 |  | 
| 84 98 | 
             
            def get_registered_components_module(
         | 
| @@ -94,7 +108,7 @@ def get_registered_components_module( | |
| 94 108 |  | 
| 95 109 | 
             
                Returns `None` if no components is provided and the `components` module is not found.
         | 
| 96 110 | 
             
                """
         | 
| 97 | 
            -
                if config and INJECTED_COMPONENTS_PY  | 
| 111 | 
            +
                if config and config.get(INJECTED_COMPONENTS_PY, None):
         | 
| 98 112 | 
             
                    if not custom_code_execution_permitted():
         | 
| 99 113 | 
             
                        raise AirbyteCustomCodeNotPermittedError
         | 
| 100 114 |  |