airbyte-cdk 6.41.5__py3-none-any.whl → 6.41.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,9 +3,11 @@
3
3
  import logging
4
4
  import threading
5
5
  import uuid
6
- from typing import Set
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Mapping, Set, Union
7
8
 
8
9
  from airbyte_cdk.logger import lazy_log
10
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
9
11
 
10
12
  LOGGER = logging.getLogger("airbyte")
11
13
 
@@ -14,15 +16,29 @@ class ConcurrentJobLimitReached(Exception):
14
16
  pass
15
17
 
16
18
 
19
+ @dataclass
17
20
  class JobTracker:
18
- def __init__(self, limit: int):
21
+ limit: Union[int, str]
22
+ config: Mapping[str, Any] = field(default_factory=dict)
23
+
24
+ def __post_init__(self) -> None:
19
25
  self._jobs: Set[str] = set()
20
- if limit < 1:
26
+ self._lock = threading.Lock()
27
+ if isinstance(self.limit, str):
28
+ try:
29
+ self.limit = int(
30
+ InterpolatedString(self.limit, parameters={}).eval(config=self.config)
31
+ )
32
+ except Exception as e:
33
+ LOGGER.warning(
34
+ f"Error interpolating max job count: {self.limit}. Setting to 1. {e}"
35
+ )
36
+ self.limit = 1
37
+ if self.limit < 1:
21
38
  LOGGER.warning(
22
- f"The `max_concurrent_async_job_count` property is less than 1: {limit}. Setting to 1. Please update the source manifest to set a valid value."
39
+ f"The `max_concurrent_async_job_count` property is less than 1: {self.limit}. Setting to 1. Please update the source manifest to set a valid value."
23
40
  )
24
- self._limit = 1 if limit < 1 else limit
25
- self._lock = threading.Lock()
41
+ self._limit = self.limit if self.limit >= 1 else 1
26
42
 
27
43
  def try_to_get_intent(self) -> str:
28
44
  lazy_log(
@@ -47,7 +47,12 @@ properties:
47
47
  max_concurrent_async_job_count:
48
48
  title: Maximum Concurrent Asynchronous Jobs
49
49
  description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
50
- type: integer
50
+ type:
51
+ - integer
52
+ - string
53
+ examples:
54
+ - 3
55
+ - "{{ config['max_concurrent_async_job_count'] }}"
51
56
  metadata:
52
57
  type: object
53
58
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -2192,7 +2197,8 @@ definitions:
2192
2197
  type: object
2193
2198
  additionalProperties: true
2194
2199
  JsonDecoder:
2195
- title: Json Decoder
2200
+ title: JSON
2201
+ description: Select 'JSON' if the response is formatted as a JSON object.
2196
2202
  type: object
2197
2203
  required:
2198
2204
  - type
@@ -2201,8 +2207,8 @@ definitions:
2201
2207
  type: string
2202
2208
  enum: [JsonDecoder]
2203
2209
  JsonlDecoder:
2204
- title: JSONL Decoder
2205
- description: Use this if the response consists of JSON objects separated by new lines (`\n`) in JSONL format.
2210
+ title: JSON Lines
2211
+ description: Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\n') in JSONL format.
2206
2212
  type: object
2207
2213
  required:
2208
2214
  - type
@@ -2327,8 +2333,8 @@ definitions:
2327
2333
  type: object
2328
2334
  additionalProperties: true
2329
2335
  IterableDecoder:
2330
- title: Iterable Decoder
2331
- description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
2336
+ title: Iterable
2337
+ description: Select 'Iterable' if the response consists of strings separated by new lines (`\n`). The string will then be wrapped into a JSON object with the `record` key.
2332
2338
  type: object
2333
2339
  required:
2334
2340
  - type
@@ -2337,8 +2343,8 @@ definitions:
2337
2343
  type: string
2338
2344
  enum: [IterableDecoder]
2339
2345
  XmlDecoder:
2340
- title: XML Decoder
2341
- description: Use this if the response is XML.
2346
+ title: XML
2347
+ description: Select 'XML' if the response consists of XML-formatted data.
2342
2348
  type: object
2343
2349
  required:
2344
2350
  - type
@@ -2369,8 +2375,8 @@ definitions:
2369
2375
  type: object
2370
2376
  additionalProperties: true
2371
2377
  ZipfileDecoder:
2372
- title: Zipfile Decoder
2373
- description: Decoder for response data that is returned as zipfile(s).
2378
+ title: ZIP File
2379
+ description: Select 'ZIP file' for response data that is returned as a zipfile. Requires specifying an inner data type/decoder to parse the unzipped data.
2374
2380
  type: object
2375
2381
  additionalProperties: true
2376
2382
  required:
@@ -2894,7 +2900,7 @@ definitions:
2894
2900
  title: Lazy Read Pointer
2895
2901
  description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
2896
2902
  type: array
2897
- default: [ ]
2903
+ default: []
2898
2904
  items:
2899
2905
  - type: string
2900
2906
  interpolation_context:
@@ -3199,7 +3205,7 @@ definitions:
3199
3205
  properties:
3200
3206
  type:
3201
3207
  type: string
3202
- enum: [ StateDelegatingStream ]
3208
+ enum: [StateDelegatingStream]
3203
3209
  name:
3204
3210
  title: Name
3205
3211
  description: The stream name.
@@ -3254,12 +3260,14 @@ definitions:
3254
3260
  - "$ref": "#/definitions/CustomPartitionRouter"
3255
3261
  - "$ref": "#/definitions/ListPartitionRouter"
3256
3262
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3263
+ - "$ref": "#/definitions/GroupingPartitionRouter"
3257
3264
  - type: array
3258
3265
  items:
3259
3266
  anyOf:
3260
3267
  - "$ref": "#/definitions/CustomPartitionRouter"
3261
3268
  - "$ref": "#/definitions/ListPartitionRouter"
3262
3269
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3270
+ - "$ref": "#/definitions/GroupingPartitionRouter"
3263
3271
  decoder:
3264
3272
  title: Decoder
3265
3273
  description: Component decoding the response so records can be extracted.
@@ -3276,6 +3284,8 @@ definitions:
3276
3284
  type: object
3277
3285
  additionalProperties: true
3278
3286
  GzipDecoder:
3287
+ title: gzip
3288
+ description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
3279
3289
  type: object
3280
3290
  required:
3281
3291
  - type
@@ -3291,6 +3301,8 @@ definitions:
3291
3301
  - "$ref": "#/definitions/JsonDecoder"
3292
3302
  - "$ref": "#/definitions/JsonlDecoder"
3293
3303
  CsvDecoder:
3304
+ title: CSV
3305
+ description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
3294
3306
  type: object
3295
3307
  required:
3296
3308
  - type
@@ -3421,12 +3433,14 @@ definitions:
3421
3433
  - "$ref": "#/definitions/CustomPartitionRouter"
3422
3434
  - "$ref": "#/definitions/ListPartitionRouter"
3423
3435
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3436
+ - "$ref": "#/definitions/GroupingPartitionRouter"
3424
3437
  - type: array
3425
3438
  items:
3426
3439
  anyOf:
3427
3440
  - "$ref": "#/definitions/CustomPartitionRouter"
3428
3441
  - "$ref": "#/definitions/ListPartitionRouter"
3429
3442
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3443
+ - "$ref": "#/definitions/GroupingPartitionRouter"
3430
3444
  decoder:
3431
3445
  title: Decoder
3432
3446
  description: Component decoding the response so records can be extracted.
@@ -3543,6 +3557,44 @@ definitions:
3543
3557
  $parameters:
3544
3558
  type: object
3545
3559
  additionalProperties: true
3560
+ GroupingPartitionRouter:
3561
+ title: Grouping Partition Router
3562
+ description: >
3563
+ A decorator on top of a partition router that groups partitions into batches of a specified size.
3564
+ This is useful for APIs that support filtering by multiple partition keys in a single request.
3565
+ Note that per-partition incremental syncs may not work as expected because the grouping
3566
+ of partitions might change between syncs, potentially leading to inconsistent state tracking.
3567
+ type: object
3568
+ required:
3569
+ - type
3570
+ - group_size
3571
+ - underlying_partition_router
3572
+ properties:
3573
+ type:
3574
+ type: string
3575
+ enum: [GroupingPartitionRouter]
3576
+ group_size:
3577
+ title: Group Size
3578
+ description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
3579
+ type: integer
3580
+ examples:
3581
+ - 10
3582
+ - 50
3583
+ underlying_partition_router:
3584
+ title: Underlying Partition Router
3585
+ description: The partition router whose output will be grouped. This can be any valid partition router component.
3586
+ anyOf:
3587
+ - "$ref": "#/definitions/CustomPartitionRouter"
3588
+ - "$ref": "#/definitions/ListPartitionRouter"
3589
+ - "$ref": "#/definitions/SubstreamPartitionRouter"
3590
+ deduplicate:
3591
+ title: Deduplicate Partitions
3592
+ description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
3593
+ type: boolean
3594
+ default: true
3595
+ $parameters:
3596
+ type: object
3597
+ additionalProperties: true
3546
3598
  WaitUntilTimeFromHeader:
3547
3599
  title: Wait Until Time Defined In Response Header
3548
3600
  description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
@@ -79,6 +79,7 @@ class ConcurrentPerPartitionCursor(Cursor):
79
79
  connector_state_manager: ConnectorStateManager,
80
80
  connector_state_converter: AbstractStreamStateConverter,
81
81
  cursor_field: CursorField,
82
+ use_global_cursor: bool = False,
82
83
  ) -> None:
83
84
  self._global_cursor: Optional[StreamState] = {}
84
85
  self._stream_name = stream_name
@@ -106,7 +107,7 @@ class ConcurrentPerPartitionCursor(Cursor):
106
107
  self._lookback_window: int = 0
107
108
  self._parent_state: Optional[StreamState] = None
108
109
  self._number_of_partitions: int = 0
109
- self._use_global_cursor: bool = False
110
+ self._use_global_cursor: bool = use_global_cursor
110
111
  self._partition_serializer = PerPartitionKeySerializer()
111
112
  # Track the last time a state message was emitted
112
113
  self._last_emission_time: float = 0.0
@@ -1890,9 +1890,10 @@ class DeclarativeSource1(BaseModel):
1890
1890
  spec: Optional[Spec] = None
1891
1891
  concurrency_level: Optional[ConcurrencyLevel] = None
1892
1892
  api_budget: Optional[HTTPAPIBudget] = None
1893
- max_concurrent_async_job_count: Optional[int] = Field(
1893
+ max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1894
1894
  None,
1895
1895
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1896
+ examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1896
1897
  title="Maximum Concurrent Asynchronous Jobs",
1897
1898
  )
1898
1899
  metadata: Optional[Dict[str, Any]] = Field(
@@ -1922,9 +1923,10 @@ class DeclarativeSource2(BaseModel):
1922
1923
  spec: Optional[Spec] = None
1923
1924
  concurrency_level: Optional[ConcurrencyLevel] = None
1924
1925
  api_budget: Optional[HTTPAPIBudget] = None
1925
- max_concurrent_async_job_count: Optional[int] = Field(
1926
+ max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1926
1927
  None,
1927
1928
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1929
+ examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1928
1930
  title="Maximum Concurrent Asynchronous Jobs",
1929
1931
  )
1930
1932
  metadata: Optional[Dict[str, Any]] = Field(
@@ -2301,7 +2303,15 @@ class SimpleRetriever(BaseModel):
2301
2303
  CustomPartitionRouter,
2302
2304
  ListPartitionRouter,
2303
2305
  SubstreamPartitionRouter,
2304
- List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2306
+ GroupingPartitionRouter,
2307
+ List[
2308
+ Union[
2309
+ CustomPartitionRouter,
2310
+ ListPartitionRouter,
2311
+ SubstreamPartitionRouter,
2312
+ GroupingPartitionRouter,
2313
+ ]
2314
+ ],
2305
2315
  ]
2306
2316
  ] = Field(
2307
2317
  [],
@@ -2383,7 +2393,15 @@ class AsyncRetriever(BaseModel):
2383
2393
  CustomPartitionRouter,
2384
2394
  ListPartitionRouter,
2385
2395
  SubstreamPartitionRouter,
2386
- List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2396
+ GroupingPartitionRouter,
2397
+ List[
2398
+ Union[
2399
+ CustomPartitionRouter,
2400
+ ListPartitionRouter,
2401
+ SubstreamPartitionRouter,
2402
+ GroupingPartitionRouter,
2403
+ ]
2404
+ ],
2387
2405
  ]
2388
2406
  ] = Field(
2389
2407
  [],
@@ -2435,6 +2453,29 @@ class SubstreamPartitionRouter(BaseModel):
2435
2453
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2436
2454
 
2437
2455
 
2456
+ class GroupingPartitionRouter(BaseModel):
2457
+ type: Literal["GroupingPartitionRouter"]
2458
+ group_size: int = Field(
2459
+ ...,
2460
+ description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2461
+ examples=[10, 50],
2462
+ title="Group Size",
2463
+ )
2464
+ underlying_partition_router: Union[
2465
+ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2466
+ ] = Field(
2467
+ ...,
2468
+ description="The partition router whose output will be grouped. This can be any valid partition router component.",
2469
+ title="Underlying Partition Router",
2470
+ )
2471
+ deduplicate: Optional[bool] = Field(
2472
+ True,
2473
+ description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2474
+ title="Deduplicate Partitions",
2475
+ )
2476
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2477
+
2478
+
2438
2479
  class HttpComponentsResolver(BaseModel):
2439
2480
  type: Literal["HttpComponentsResolver"]
2440
2481
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -227,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
227
227
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
228
228
  FlattenFields as FlattenFieldsModel,
229
229
  )
230
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
+ GroupingPartitionRouter as GroupingPartitionRouterModel,
232
+ )
230
233
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
234
  GzipDecoder as GzipDecoderModel,
232
235
  )
@@ -385,6 +388,7 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
385
388
  )
386
389
  from airbyte_cdk.sources.declarative.partition_routers import (
387
390
  CartesianProductStreamSlicer,
391
+ GroupingPartitionRouter,
388
392
  ListPartitionRouter,
389
393
  PartitionRouter,
390
394
  SinglePartitionRouter,
@@ -638,6 +642,7 @@ class ModelToComponentFactory:
638
642
  UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
639
643
  RateModel: self.create_rate,
640
644
  HttpRequestRegexMatcherModel: self.create_http_request_matcher,
645
+ GroupingPartitionRouterModel: self.create_grouping_partition_router,
641
646
  }
642
647
 
643
648
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1355,6 +1360,9 @@ class ModelToComponentFactory:
1355
1360
  )
1356
1361
  stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1357
1362
 
1363
+ # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1364
+ use_global_cursor = isinstance(partition_router, GroupingPartitionRouter)
1365
+
1358
1366
  # Return the concurrent cursor and state converter
1359
1367
  return ConcurrentPerPartitionCursor(
1360
1368
  cursor_factory=cursor_factory,
@@ -1366,6 +1374,7 @@ class ModelToComponentFactory:
1366
1374
  connector_state_manager=state_manager,
1367
1375
  connector_state_converter=connector_state_converter,
1368
1376
  cursor_field=cursor_field,
1377
+ use_global_cursor=use_global_cursor,
1369
1378
  )
1370
1379
 
1371
1380
  @staticmethod
@@ -3370,3 +3379,34 @@ class ModelToComponentFactory:
3370
3379
  self._api_budget = self.create_component(
3371
3380
  model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3372
3381
  )
3382
+
3383
+ def create_grouping_partition_router(
3384
+ self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3385
+ ) -> GroupingPartitionRouter:
3386
+ underlying_router = self._create_component_from_model(
3387
+ model=model.underlying_partition_router, config=config
3388
+ )
3389
+ if model.group_size < 1:
3390
+ raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3391
+
3392
+ # Request options in underlying partition routers are not supported for GroupingPartitionRouter
3393
+ # because they are specific to individual partitions and cannot be aggregated or handled
3394
+ # when grouping, potentially leading to incorrect API calls. Any request customization
3395
+ # should be managed at the stream level through the requester's configuration.
3396
+ if isinstance(underlying_router, SubstreamPartitionRouter):
3397
+ if any(
3398
+ parent_config.request_option
3399
+ for parent_config in underlying_router.parent_stream_configs
3400
+ ):
3401
+ raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3402
+
3403
+ if isinstance(underlying_router, ListPartitionRouter):
3404
+ if underlying_router.request_option:
3405
+ raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3406
+
3407
+ return GroupingPartitionRouter(
3408
+ group_size=model.group_size,
3409
+ underlying_partition_router=underlying_router,
3410
+ deduplicate=model.deduplicate if model.deduplicate is not None else True,
3411
+ config=config,
3412
+ )
@@ -8,6 +8,9 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
8
8
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
9
9
  CartesianProductStreamSlicer,
10
10
  )
11
+ from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
12
+ GroupingPartitionRouter,
13
+ )
11
14
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
12
15
  ListPartitionRouter,
13
16
  )
@@ -22,6 +25,7 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
22
25
  __all__ = [
23
26
  "AsyncJobPartitionRouter",
24
27
  "CartesianProductStreamSlicer",
28
+ "GroupingPartitionRouter",
25
29
  "ListPartitionRouter",
26
30
  "SinglePartitionRouter",
27
31
  "SubstreamPartitionRouter",
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class GroupingPartitionRouter(PartitionRouter):
14
+ """
15
+ A partition router that groups partitions from an underlying partition router into batches of a specified size.
16
+ This is useful for APIs that support filtering by multiple partition keys in a single request.
17
+
18
+ Attributes:
19
+ group_size (int): The number of partitions to include in each group.
20
+ underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
21
+ deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
22
+ config (Config): The connector configuration.
23
+ parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
24
+ """
25
+
26
+ group_size: int
27
+ underlying_partition_router: PartitionRouter
28
+ config: Config
29
+ deduplicate: bool = True
30
+
31
+ def __post_init__(self) -> None:
32
+ self._state: Optional[Mapping[str, StreamState]] = {}
33
+
34
+ def stream_slices(self) -> Iterable[StreamSlice]:
35
+ """
36
+ Lazily groups partitions from the underlying partition router into batches of size `group_size`.
37
+
38
+ This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
39
+ When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
40
+ If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
41
+
42
+ Yields:
43
+ Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
44
+ """
45
+ batch = []
46
+ seen_keys = set()
47
+
48
+ # Iterate over partitions lazily from the underlying router
49
+ for partition in self.underlying_partition_router.stream_slices():
50
+ # Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
51
+ partition_keys = list(partition.partition.keys())
52
+ # skip parent_slice as it is part of SubstreamPartitionRouter partition
53
+ if "parent_slice" in partition_keys:
54
+ partition_keys.remove("parent_slice")
55
+ if len(partition_keys) != 1:
56
+ raise ValueError(
57
+ f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
58
+ )
59
+ key = partition.partition[partition_keys[0]]
60
+
61
+ # Skip duplicates if deduplication is enabled
62
+ if self.deduplicate and key in seen_keys:
63
+ continue
64
+
65
+ # Add partition to the batch
66
+ batch.append(partition)
67
+ if self.deduplicate:
68
+ seen_keys.add(key)
69
+
70
+ # Yield the batch when it reaches the group_size
71
+ if len(batch) == self.group_size:
72
+ self._state = self.underlying_partition_router.get_stream_state()
73
+ yield self._create_grouped_slice(batch)
74
+ batch = [] # Reset the batch
75
+
76
+ self._state = self.underlying_partition_router.get_stream_state()
77
+ # Yield any remaining partitions if the batch isn't empty
78
+ if batch:
79
+ yield self._create_grouped_slice(batch)
80
+
81
+ def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
82
+ """
83
+ Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
84
+
85
+ Args:
86
+ batch (list[StreamSlice]): A list of StreamSlice objects to group.
87
+
88
+ Returns:
89
+ StreamSlice: A single StreamSlice with combined partition and extra field values.
90
+ """
91
+ # Combine partition values into a single dict with lists
92
+ grouped_partition = {
93
+ key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
94
+ }
95
+
96
+ # Aggregate extra fields into a dict with list values
97
+ extra_fields_dict = (
98
+ {
99
+ key: [p.extra_fields.get(key) for p in batch]
100
+ for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
101
+ }
102
+ if any(p.extra_fields for p in batch)
103
+ else {}
104
+ )
105
+ return StreamSlice(
106
+ partition=grouped_partition,
107
+ cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
108
+ extra_fields=extra_fields_dict,
109
+ )
110
+
111
+ def get_request_params(
112
+ self,
113
+ stream_state: Optional[StreamState] = None,
114
+ stream_slice: Optional[StreamSlice] = None,
115
+ next_page_token: Optional[Mapping[str, Any]] = None,
116
+ ) -> Mapping[str, Any]:
117
+ return {}
118
+
119
+ def get_request_headers(
120
+ self,
121
+ stream_state: Optional[StreamState] = None,
122
+ stream_slice: Optional[StreamSlice] = None,
123
+ next_page_token: Optional[Mapping[str, Any]] = None,
124
+ ) -> Mapping[str, Any]:
125
+ return {}
126
+
127
+ def get_request_body_data(
128
+ self,
129
+ stream_state: Optional[StreamState] = None,
130
+ stream_slice: Optional[StreamSlice] = None,
131
+ next_page_token: Optional[Mapping[str, Any]] = None,
132
+ ) -> Mapping[str, Any]:
133
+ return {}
134
+
135
+ def get_request_body_json(
136
+ self,
137
+ stream_state: Optional[StreamState] = None,
138
+ stream_slice: Optional[StreamSlice] = None,
139
+ next_page_token: Optional[Mapping[str, Any]] = None,
140
+ ) -> Mapping[str, Any]:
141
+ return {}
142
+
143
+ def set_initial_state(self, stream_state: StreamState) -> None:
144
+ """Delegate state initialization to the underlying partition router."""
145
+ self.underlying_partition_router.set_initial_state(stream_state)
146
+ self._state = self.underlying_partition_router.get_stream_state()
147
+
148
+ def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
149
+ """Delegate state retrieval to the underlying partition router."""
150
+ return self._state
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.41.5
3
+ Version: 6.41.7
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -50,7 +50,7 @@ airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4G
50
50
  airbyte_cdk/sources/declarative/async_job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  airbyte_cdk/sources/declarative/async_job/job.py,sha256=aR5UZAkNUYA1I1zjUMAcvdzCFL3lXXOllkFmlhEKgkc,2001
52
52
  airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=tcHvB5QdBnx4XQmFvr4Swdq2DLRPst5w5M-OIJHnp5c,22034
53
- airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=oAaqKxj5dGKeF5wkqiOZbu5gW6JvtaROxirDU2KqT1o,2565
53
+ airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=JowKzdT4E6IeE1cYIf4mOtB6sVEJoCeSsfzaFi9ghQ8,3231
54
54
  airbyte_cdk/sources/declarative/async_job/repository.py,sha256=2OkWiZp5IKTOi_SIpP1U-Rw3gH36LBy_a8CgXoENTtg,1044
55
55
  airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa74l2N9CdhLbVFM6KDoBgBM,715
56
56
  airbyte_cdk/sources/declarative/async_job/timer.py,sha256=Fb8P72CQ7jIzJyzMSSNuBf2vt8bmrg9SrfmNxKwph2A,1242
@@ -71,7 +71,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=uhy0dRkA
71
71
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
72
72
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
73
73
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
74
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=dROHv50GZ7rGn7u2KrQSaAg5MjRZj1dNc-7VrZMIUb0,150439
74
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Nvtm2TXNpzJ3sUEgqvnp7GnGUwM4yWgyh2D1F863cUU,152972
75
75
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
76
76
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
77
77
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
@@ -93,7 +93,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_
93
93
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
94
94
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
95
95
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
96
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=MT5JbdEbnPzk3VWZGGvThe4opoX5dHhSXFrnTRYC6dg,22210
96
+ airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=Mrx5XY6G8ZT-imsjUggpjzWo-Po_Wvi1WpylEW1ohIQ,22263
97
97
  airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
98
98
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
99
99
  airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=2tsE6FgXzemf4fZZ4uGtd8QpRBl9GJ2CRqSNJE5p0EI,16077
@@ -114,16 +114,17 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
114
114
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
115
115
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
116
116
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
117
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=0vCjpAXGkkgocHrgUUVzSCzcCKfXQMr-u0I1U-mVczQ,106717
117
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=8ljLhODbBlMxacuKZzlWwmNeGkb6oqfKBVZA70Cg7gE,108248
118
118
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
119
119
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
120
120
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
121
121
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
122
122
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
123
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=72haNs6JXWSbe9Vwya2mJo3GFBvzYwjLlReWmvO2uPo,147623
124
- airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
123
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=APJkP5dmDU4aIaj7w3quGjrP1cV3MMp2gxbTckhOVRA,149720
124
+ airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
125
125
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
126
126
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
127
+ airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py,sha256=-W1CAg2NayCMDNj7QLWn7Nqipaz7av9sLjbMnyMGUek,6271
127
128
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=tmGGpMoOBmaMfhVZq53AEWxoHm2lmNVi6hA2_IVEnAA,4882
128
129
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
129
130
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
@@ -358,9 +359,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
358
359
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
359
360
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
360
361
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
361
- airbyte_cdk-6.41.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
- airbyte_cdk-6.41.5.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
- airbyte_cdk-6.41.5.dist-info/METADATA,sha256=lU55s01kRtTNVELjzwH0ZFZcKc9mDaAoegbKH5sRuAs,6071
364
- airbyte_cdk-6.41.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
- airbyte_cdk-6.41.5.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
- airbyte_cdk-6.41.5.dist-info/RECORD,,
362
+ airbyte_cdk-6.41.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
363
+ airbyte_cdk-6.41.7.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
364
+ airbyte_cdk-6.41.7.dist-info/METADATA,sha256=4Td-yOXScntR4BKB6Nw94j_CWYB0BgALwx_qvFfBIHs,6071
365
+ airbyte_cdk-6.41.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
366
+ airbyte_cdk-6.41.7.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
367
+ airbyte_cdk-6.41.7.dist-info/RECORD,,