airbyte-cdk 6.41.2.dev0__py3-none-any.whl → 6.41.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +38 -10
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +10 -45
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +7 -42
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -41
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +55 -5
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +0 -7
- airbyte_cdk/test/mock_http/mocker.py +13 -8
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/RECORD +15 -16
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.41.2.dev0.dist-info → airbyte_cdk-6.41.4.dist-info}/entry_points.txt +0 -0
@@ -179,7 +179,7 @@ class AsyncJobOrchestrator:
|
|
179
179
|
self._non_breaking_exceptions: List[Exception] = []
|
180
180
|
|
181
181
|
def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
|
182
|
-
failed_status_jobs = (AsyncJobStatus.FAILED,
|
182
|
+
failed_status_jobs = (AsyncJobStatus.FAILED,)
|
183
183
|
jobs_to_replace = [job for job in partition.jobs if job.status() in failed_status_jobs]
|
184
184
|
for job in jobs_to_replace:
|
185
185
|
new_job = self._start_job(job.job_parameters(), job.api_job_id())
|
@@ -359,14 +359,11 @@ class AsyncJobOrchestrator:
|
|
359
359
|
self._process_partitions_with_errors(partition)
|
360
360
|
case _:
|
361
361
|
self._stop_timed_out_jobs(partition)
|
362
|
+
# re-allocate FAILED jobs, but TIMEOUT jobs are not re-allocated
|
363
|
+
self._reallocate_partition(current_running_partitions, partition)
|
362
364
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
for job in partition.jobs:
|
367
|
-
# We only remove completed jobs as we want failed/timed out jobs to be re-allocated in priority
|
368
|
-
if job.status() == AsyncJobStatus.COMPLETED:
|
369
|
-
self._job_tracker.remove_job(job.api_job_id())
|
365
|
+
# We only remove completed / timeout jobs jobs as we want failed jobs to be re-allocated in priority
|
366
|
+
self._remove_completed_or_timed_out_jobs(partition)
|
370
367
|
|
371
368
|
# update the referenced list with running partitions
|
372
369
|
self._running_partitions = current_running_partitions
|
@@ -381,8 +378,11 @@ class AsyncJobOrchestrator:
|
|
381
378
|
def _stop_timed_out_jobs(self, partition: AsyncPartition) -> None:
|
382
379
|
for job in partition.jobs:
|
383
380
|
if job.status() == AsyncJobStatus.TIMED_OUT:
|
384
|
-
|
385
|
-
|
381
|
+
self._abort_job(job, free_job_allocation=True)
|
382
|
+
raise AirbyteTracedException(
|
383
|
+
internal_message=f"Job {job.api_job_id()} has timed out. Try increasing the `polling job timeout`.",
|
384
|
+
failure_type=FailureType.config_error,
|
385
|
+
)
|
386
386
|
|
387
387
|
def _abort_job(self, job: AsyncJob, free_job_allocation: bool = True) -> None:
|
388
388
|
try:
|
@@ -392,6 +392,34 @@ class AsyncJobOrchestrator:
|
|
392
392
|
except Exception as exception:
|
393
393
|
LOGGER.warning(f"Could not free budget for job {job.api_job_id()}: {exception}")
|
394
394
|
|
395
|
+
def _remove_completed_or_timed_out_jobs(self, partition: AsyncPartition) -> None:
|
396
|
+
"""
|
397
|
+
Remove completed or timed out jobs from the partition.
|
398
|
+
|
399
|
+
Args:
|
400
|
+
partition (AsyncPartition): The partition to process.
|
401
|
+
"""
|
402
|
+
for job in partition.jobs:
|
403
|
+
if job.status() in [AsyncJobStatus.COMPLETED, AsyncJobStatus.TIMED_OUT]:
|
404
|
+
self._job_tracker.remove_job(job.api_job_id())
|
405
|
+
|
406
|
+
def _reallocate_partition(
|
407
|
+
self,
|
408
|
+
current_running_partitions: List[AsyncPartition],
|
409
|
+
partition: AsyncPartition,
|
410
|
+
) -> None:
|
411
|
+
"""
|
412
|
+
Reallocate the partition by starting a new job for each job in the
|
413
|
+
partition.
|
414
|
+
Args:
|
415
|
+
current_running_partitions (list): The list of currently running partitions.
|
416
|
+
partition (AsyncPartition): The partition to reallocate.
|
417
|
+
"""
|
418
|
+
for job in partition.jobs:
|
419
|
+
if job.status() != AsyncJobStatus.TIMED_OUT:
|
420
|
+
# allow the FAILED jobs to be re-allocated for partition
|
421
|
+
current_running_partitions.insert(0, partition)
|
422
|
+
|
395
423
|
def _process_partitions_with_errors(self, partition: AsyncPartition) -> None:
|
396
424
|
"""
|
397
425
|
Process a partition with status errors (FAILED and TIMEOUT).
|
@@ -2894,7 +2894,7 @@ definitions:
|
|
2894
2894
|
title: Lazy Read Pointer
|
2895
2895
|
description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
|
2896
2896
|
type: array
|
2897
|
-
default: []
|
2897
|
+
default: [ ]
|
2898
2898
|
items:
|
2899
2899
|
- type: string
|
2900
2900
|
interpolation_context:
|
@@ -3199,7 +3199,7 @@ definitions:
|
|
3199
3199
|
properties:
|
3200
3200
|
type:
|
3201
3201
|
type: string
|
3202
|
-
enum: [StateDelegatingStream]
|
3202
|
+
enum: [ StateDelegatingStream ]
|
3203
3203
|
name:
|
3204
3204
|
title: Name
|
3205
3205
|
description: The stream name.
|
@@ -3254,14 +3254,12 @@ definitions:
|
|
3254
3254
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3255
3255
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3256
3256
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3257
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3258
3257
|
- type: array
|
3259
3258
|
items:
|
3260
3259
|
anyOf:
|
3261
3260
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3262
3261
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3263
3262
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3264
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3265
3263
|
decoder:
|
3266
3264
|
title: Decoder
|
3267
3265
|
description: Component decoding the response so records can be extracted.
|
@@ -3335,7 +3333,7 @@ definitions:
|
|
3335
3333
|
items:
|
3336
3334
|
type: string
|
3337
3335
|
AsyncRetriever:
|
3338
|
-
description: "
|
3336
|
+
description: "Retrieves records by Asynchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router."
|
3339
3337
|
type: object
|
3340
3338
|
required:
|
3341
3339
|
- type
|
@@ -3383,6 +3381,13 @@ definitions:
|
|
3383
3381
|
anyOf:
|
3384
3382
|
- "$ref": "#/definitions/CustomRequester"
|
3385
3383
|
- "$ref": "#/definitions/HttpRequester"
|
3384
|
+
polling_job_timeout:
|
3385
|
+
description: The time in minutes after which the single Async Job should be considered as Timed Out.
|
3386
|
+
anyOf:
|
3387
|
+
- type: integer
|
3388
|
+
- type: string
|
3389
|
+
interpolation_context:
|
3390
|
+
- config
|
3386
3391
|
download_target_requester:
|
3387
3392
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
3388
3393
|
anyOf:
|
@@ -3416,14 +3421,12 @@ definitions:
|
|
3416
3421
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3417
3422
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3418
3423
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3419
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3420
3424
|
- type: array
|
3421
3425
|
items:
|
3422
3426
|
anyOf:
|
3423
3427
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3424
3428
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3425
3429
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3426
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3427
3430
|
decoder:
|
3428
3431
|
title: Decoder
|
3429
3432
|
description: Component decoding the response so records can be extracted.
|
@@ -3540,44 +3543,6 @@ definitions:
|
|
3540
3543
|
$parameters:
|
3541
3544
|
type: object
|
3542
3545
|
additionalProperties: true
|
3543
|
-
GroupingPartitionRouter:
|
3544
|
-
title: Grouping Partition Router
|
3545
|
-
description: >
|
3546
|
-
A decorator on top of a partition router that groups partitions into batches of a specified size.
|
3547
|
-
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
3548
|
-
Note that per-partition incremental syncs may not work as expected because the grouping
|
3549
|
-
of partitions might change between syncs, potentially leading to inconsistent state tracking.
|
3550
|
-
type: object
|
3551
|
-
required:
|
3552
|
-
- type
|
3553
|
-
- group_size
|
3554
|
-
- underlying_partition_router
|
3555
|
-
properties:
|
3556
|
-
type:
|
3557
|
-
type: string
|
3558
|
-
enum: [GroupingPartitionRouter]
|
3559
|
-
group_size:
|
3560
|
-
title: Group Size
|
3561
|
-
description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
|
3562
|
-
type: integer
|
3563
|
-
examples:
|
3564
|
-
- 10
|
3565
|
-
- 50
|
3566
|
-
underlying_partition_router:
|
3567
|
-
title: Underlying Partition Router
|
3568
|
-
description: The partition router whose output will be grouped. This can be any valid partition router component.
|
3569
|
-
anyOf:
|
3570
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3571
|
-
- "$ref": "#/definitions/ListPartitionRouter"
|
3572
|
-
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3573
|
-
deduplicate:
|
3574
|
-
title: Deduplicate Partitions
|
3575
|
-
description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
|
3576
|
-
type: boolean
|
3577
|
-
default: true
|
3578
|
-
$parameters:
|
3579
|
-
type: object
|
3580
|
-
additionalProperties: true
|
3581
3546
|
WaitUntilTimeFromHeader:
|
3582
3547
|
title: Wait Until Time Defined In Response Header
|
3583
3548
|
description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
|
@@ -79,7 +79,6 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
79
79
|
connector_state_manager: ConnectorStateManager,
|
80
80
|
connector_state_converter: AbstractStreamStateConverter,
|
81
81
|
cursor_field: CursorField,
|
82
|
-
use_global_cursor: bool = False,
|
83
82
|
) -> None:
|
84
83
|
self._global_cursor: Optional[StreamState] = {}
|
85
84
|
self._stream_name = stream_name
|
@@ -107,7 +106,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
107
106
|
self._lookback_window: int = 0
|
108
107
|
self._parent_state: Optional[StreamState] = None
|
109
108
|
self._number_of_partitions: int = 0
|
110
|
-
self._use_global_cursor: bool =
|
109
|
+
self._use_global_cursor: bool = False
|
111
110
|
self._partition_serializer = PerPartitionKeySerializer()
|
112
111
|
# Track the last time a state message was emitted
|
113
112
|
self._last_emission_time: float = 0.0
|
@@ -1467,7 +1467,7 @@ class AddFields(BaseModel):
|
|
1467
1467
|
)
|
1468
1468
|
condition: Optional[str] = Field(
|
1469
1469
|
"",
|
1470
|
-
description="Fields will be added if expression is evaluated to True
|
1470
|
+
description="Fields will be added if expression is evaluated to True.",
|
1471
1471
|
examples=[
|
1472
1472
|
"{{ property|string == '' }}",
|
1473
1473
|
"{{ property is integer }}",
|
@@ -2301,15 +2301,7 @@ class SimpleRetriever(BaseModel):
|
|
2301
2301
|
CustomPartitionRouter,
|
2302
2302
|
ListPartitionRouter,
|
2303
2303
|
SubstreamPartitionRouter,
|
2304
|
-
|
2305
|
-
List[
|
2306
|
-
Union[
|
2307
|
-
CustomPartitionRouter,
|
2308
|
-
ListPartitionRouter,
|
2309
|
-
SubstreamPartitionRouter,
|
2310
|
-
GroupingPartitionRouter,
|
2311
|
-
]
|
2312
|
-
],
|
2304
|
+
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
|
2313
2305
|
]
|
2314
2306
|
] = Field(
|
2315
2307
|
[],
|
@@ -2362,6 +2354,10 @@ class AsyncRetriever(BaseModel):
|
|
2362
2354
|
...,
|
2363
2355
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
|
2364
2356
|
)
|
2357
|
+
polling_job_timeout: Optional[Union[int, str]] = Field(
|
2358
|
+
None,
|
2359
|
+
description="The time in minutes after which the single Async Job should be considered as Timed Out.",
|
2360
|
+
)
|
2365
2361
|
download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
|
2366
2362
|
None,
|
2367
2363
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
|
@@ -2387,15 +2383,7 @@ class AsyncRetriever(BaseModel):
|
|
2387
2383
|
CustomPartitionRouter,
|
2388
2384
|
ListPartitionRouter,
|
2389
2385
|
SubstreamPartitionRouter,
|
2390
|
-
|
2391
|
-
List[
|
2392
|
-
Union[
|
2393
|
-
CustomPartitionRouter,
|
2394
|
-
ListPartitionRouter,
|
2395
|
-
SubstreamPartitionRouter,
|
2396
|
-
GroupingPartitionRouter,
|
2397
|
-
]
|
2398
|
-
],
|
2386
|
+
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
|
2399
2387
|
]
|
2400
2388
|
] = Field(
|
2401
2389
|
[],
|
@@ -2447,29 +2435,6 @@ class SubstreamPartitionRouter(BaseModel):
|
|
2447
2435
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2448
2436
|
|
2449
2437
|
|
2450
|
-
class GroupingPartitionRouter(BaseModel):
|
2451
|
-
type: Literal["GroupingPartitionRouter"]
|
2452
|
-
group_size: int = Field(
|
2453
|
-
...,
|
2454
|
-
description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
|
2455
|
-
examples=[10, 50],
|
2456
|
-
title="Group Size",
|
2457
|
-
)
|
2458
|
-
underlying_partition_router: Union[
|
2459
|
-
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
|
2460
|
-
] = Field(
|
2461
|
-
...,
|
2462
|
-
description="The partition router whose output will be grouped. This can be any valid partition router component.",
|
2463
|
-
title="Underlying Partition Router",
|
2464
|
-
)
|
2465
|
-
deduplicate: Optional[bool] = Field(
|
2466
|
-
True,
|
2467
|
-
description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
|
2468
|
-
title="Deduplicate Partitions",
|
2469
|
-
)
|
2470
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2471
|
-
|
2472
|
-
|
2473
2438
|
class HttpComponentsResolver(BaseModel):
|
2474
2439
|
type: Literal["HttpComponentsResolver"]
|
2475
2440
|
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
@@ -227,9 +227,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
228
|
FlattenFields as FlattenFieldsModel,
|
229
229
|
)
|
230
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
-
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
232
|
-
)
|
233
230
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
231
|
GzipDecoder as GzipDecoderModel,
|
235
232
|
)
|
@@ -388,7 +385,6 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
|
388
385
|
)
|
389
386
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
390
387
|
CartesianProductStreamSlicer,
|
391
|
-
GroupingPartitionRouter,
|
392
388
|
ListPartitionRouter,
|
393
389
|
PartitionRouter,
|
394
390
|
SinglePartitionRouter,
|
@@ -511,7 +507,7 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_
|
|
511
507
|
IncrementingCountStreamStateConverter,
|
512
508
|
)
|
513
509
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
514
|
-
from airbyte_cdk.sources.types import Config
|
510
|
+
from airbyte_cdk.sources.types import Config
|
515
511
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
516
512
|
|
517
513
|
ComponentDefinition = Mapping[str, Any]
|
@@ -642,7 +638,6 @@ class ModelToComponentFactory:
|
|
642
638
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
643
639
|
RateModel: self.create_rate,
|
644
640
|
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
645
|
-
GroupingPartitionRouterModel: self.create_grouping_partition_router,
|
646
641
|
}
|
647
642
|
|
648
643
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -1360,9 +1355,6 @@ class ModelToComponentFactory:
|
|
1360
1355
|
)
|
1361
1356
|
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
1362
1357
|
|
1363
|
-
# Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
|
1364
|
-
use_global_cursor = isinstance(partition_router, GroupingPartitionRouter)
|
1365
|
-
|
1366
1358
|
# Return the concurrent cursor and state converter
|
1367
1359
|
return ConcurrentPerPartitionCursor(
|
1368
1360
|
cursor_factory=cursor_factory,
|
@@ -1374,7 +1366,6 @@ class ModelToComponentFactory:
|
|
1374
1366
|
connector_state_manager=state_manager,
|
1375
1367
|
connector_state_converter=connector_state_converter,
|
1376
1368
|
cursor_field=cursor_field,
|
1377
|
-
use_global_cursor=use_global_cursor,
|
1378
1369
|
)
|
1379
1370
|
|
1380
1371
|
@staticmethod
|
@@ -2948,6 +2939,27 @@ class ModelToComponentFactory:
|
|
2948
2939
|
parameters={},
|
2949
2940
|
)
|
2950
2941
|
|
2942
|
+
def _get_job_timeout() -> datetime.timedelta:
|
2943
|
+
user_defined_timeout: Optional[int] = (
|
2944
|
+
int(
|
2945
|
+
InterpolatedString.create(
|
2946
|
+
str(model.polling_job_timeout),
|
2947
|
+
parameters={},
|
2948
|
+
).eval(config)
|
2949
|
+
)
|
2950
|
+
if model.polling_job_timeout
|
2951
|
+
else None
|
2952
|
+
)
|
2953
|
+
|
2954
|
+
# check for user defined timeout during the test read or 15 minutes
|
2955
|
+
test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
|
2956
|
+
# default value for non-connector builder is 60 minutes.
|
2957
|
+
default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
|
2958
|
+
|
2959
|
+
return (
|
2960
|
+
test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
|
2961
|
+
)
|
2962
|
+
|
2951
2963
|
decoder = (
|
2952
2964
|
self._create_component_from_model(model=model.decoder, config=config)
|
2953
2965
|
if model.decoder
|
@@ -3041,6 +3053,7 @@ class ModelToComponentFactory:
|
|
3041
3053
|
config=config,
|
3042
3054
|
name=name,
|
3043
3055
|
)
|
3056
|
+
|
3044
3057
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
3045
3058
|
creation_requester=creation_requester,
|
3046
3059
|
polling_requester=polling_requester,
|
@@ -3051,6 +3064,7 @@ class ModelToComponentFactory:
|
|
3051
3064
|
status_extractor=status_extractor,
|
3052
3065
|
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
3053
3066
|
download_target_extractor=download_target_extractor,
|
3067
|
+
job_timeout=_get_job_timeout(),
|
3054
3068
|
)
|
3055
3069
|
|
3056
3070
|
async_job_partition_router = AsyncJobPartitionRouter(
|
@@ -3353,34 +3367,3 @@ class ModelToComponentFactory:
|
|
3353
3367
|
self._api_budget = self.create_component(
|
3354
3368
|
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3355
3369
|
)
|
3356
|
-
|
3357
|
-
def create_grouping_partition_router(
|
3358
|
-
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
3359
|
-
) -> GroupingPartitionRouter:
|
3360
|
-
underlying_router = self._create_component_from_model(
|
3361
|
-
model=model.underlying_partition_router, config=config
|
3362
|
-
)
|
3363
|
-
if model.group_size < 1:
|
3364
|
-
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|
3365
|
-
|
3366
|
-
# Request options in underlying partition routers are not supported for GroupingPartitionRouter
|
3367
|
-
# because they are specific to individual partitions and cannot be aggregated or handled
|
3368
|
-
# when grouping, potentially leading to incorrect API calls. Any request customization
|
3369
|
-
# should be managed at the stream level through the requester's configuration.
|
3370
|
-
if isinstance(underlying_router, SubstreamPartitionRouter):
|
3371
|
-
if any(
|
3372
|
-
parent_config.request_option
|
3373
|
-
for parent_config in underlying_router.parent_stream_configs
|
3374
|
-
):
|
3375
|
-
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3376
|
-
|
3377
|
-
if isinstance(underlying_router, ListPartitionRouter):
|
3378
|
-
if underlying_router.request_option:
|
3379
|
-
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3380
|
-
|
3381
|
-
return GroupingPartitionRouter(
|
3382
|
-
group_size=model.group_size,
|
3383
|
-
underlying_partition_router=underlying_router,
|
3384
|
-
deduplicate=model.deduplicate if model.deduplicate is not None else True,
|
3385
|
-
config=config,
|
3386
|
-
)
|
@@ -8,9 +8,6 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
|
|
8
8
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
9
|
CartesianProductStreamSlicer,
|
10
10
|
)
|
11
|
-
from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
|
12
|
-
GroupingPartitionRouter,
|
13
|
-
)
|
14
11
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
15
12
|
ListPartitionRouter,
|
16
13
|
)
|
@@ -25,7 +22,6 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
|
|
25
22
|
__all__ = [
|
26
23
|
"AsyncJobPartitionRouter",
|
27
24
|
"CartesianProductStreamSlicer",
|
28
|
-
"GroupingPartitionRouter",
|
29
25
|
"ListPartitionRouter",
|
30
26
|
"SinglePartitionRouter",
|
31
27
|
"SubstreamPartitionRouter",
|
@@ -45,7 +45,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
45
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
46
46
|
download_target_extractor: DpathExtractor
|
47
47
|
|
48
|
+
# timeout for the job to be completed, passed from `polling_job_timeout`
|
48
49
|
job_timeout: Optional[timedelta] = None
|
50
|
+
|
49
51
|
record_extractor: RecordExtractor = field(
|
50
52
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
51
53
|
)
|
@@ -131,7 +133,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
131
133
|
log_formatter=lambda response: format_http_message(
|
132
134
|
response=response,
|
133
135
|
title="Async Job -- Create",
|
134
|
-
description="Create the server-side async job.",
|
136
|
+
description=f"Create the server-side async job. Timeout after: {self.job_timeout}",
|
135
137
|
stream_name=None,
|
136
138
|
is_auxiliary=True,
|
137
139
|
type="ASYNC_CREATE",
|
@@ -271,12 +273,59 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
271
273
|
del self._create_job_response_by_id[job_id]
|
272
274
|
del self._polling_job_response_by_id[job_id]
|
273
275
|
|
276
|
+
def _get_creation_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]:
|
277
|
+
"""
|
278
|
+
Returns the interpolation context for the creation response.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
job (AsyncJob): The job for which to get the creation response interpolation context.
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
Dict[str, Any]: The interpolation context as a dictionary.
|
285
|
+
"""
|
286
|
+
# TODO: currently we support only JsonDecoder to decode the response to track the ids or the status
|
287
|
+
# of the Jobs. We should consider to add the support of other decoders like XMLDecoder, in the future
|
288
|
+
creation_response_context = dict(self._create_job_response_by_id[job.api_job_id()].json())
|
289
|
+
if not "headers" in creation_response_context:
|
290
|
+
creation_response_context["headers"] = self._create_job_response_by_id[
|
291
|
+
job.api_job_id()
|
292
|
+
].headers
|
293
|
+
if not "request" in creation_response_context:
|
294
|
+
creation_response_context["request"] = self._create_job_response_by_id[
|
295
|
+
job.api_job_id()
|
296
|
+
].request
|
297
|
+
return creation_response_context
|
298
|
+
|
299
|
+
def _get_polling_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]:
|
300
|
+
"""
|
301
|
+
Returns the interpolation context for the polling response.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
job (AsyncJob): The job for which to get the polling response interpolation context.
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
Dict[str, Any]: The interpolation context as a dictionary.
|
308
|
+
"""
|
309
|
+
# TODO: currently we support only JsonDecoder to decode the response to track the ids or the status
|
310
|
+
# of the Jobs. We should consider to add the support of other decoders like XMLDecoder, in the future
|
311
|
+
polling_response_context = dict(self._polling_job_response_by_id[job.api_job_id()].json())
|
312
|
+
if not "headers" in polling_response_context:
|
313
|
+
polling_response_context["headers"] = self._polling_job_response_by_id[
|
314
|
+
job.api_job_id()
|
315
|
+
].headers
|
316
|
+
if not "request" in polling_response_context:
|
317
|
+
polling_response_context["request"] = self._polling_job_response_by_id[
|
318
|
+
job.api_job_id()
|
319
|
+
].request
|
320
|
+
return polling_response_context
|
321
|
+
|
274
322
|
def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
|
275
|
-
creation_response = self._create_job_response_by_id[job.api_job_id()].json()
|
276
323
|
stream_slice = StreamSlice(
|
277
324
|
partition={},
|
278
325
|
cursor_slice={},
|
279
|
-
extra_fields={
|
326
|
+
extra_fields={
|
327
|
+
"creation_response": self._get_creation_response_interpolation_context(job),
|
328
|
+
},
|
280
329
|
)
|
281
330
|
return stream_slice
|
282
331
|
|
@@ -284,11 +333,12 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
284
333
|
if not self.download_target_requester:
|
285
334
|
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
286
335
|
else:
|
287
|
-
polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
|
288
336
|
stream_slice: StreamSlice = StreamSlice(
|
289
337
|
partition={},
|
290
338
|
cursor_slice={},
|
291
|
-
extra_fields={
|
339
|
+
extra_fields={
|
340
|
+
"polling_response": self._get_polling_response_interpolation_context(job),
|
341
|
+
},
|
292
342
|
)
|
293
343
|
url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
294
344
|
if not url_response:
|
@@ -4,24 +4,17 @@
|
|
4
4
|
from dataclasses import InitVar, dataclass, field
|
5
5
|
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
|
-
from typing_extensions import deprecated
|
8
|
-
|
9
7
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
10
8
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
11
9
|
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
12
10
|
AsyncJobPartitionRouter,
|
13
11
|
)
|
14
12
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
|
-
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
16
13
|
from airbyte_cdk.sources.streams.core import StreamData
|
17
14
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
15
|
from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
|
19
16
|
|
20
17
|
|
21
|
-
@deprecated(
|
22
|
-
"This class is experimental. Use at your own risk.",
|
23
|
-
category=ExperimentalClassWarning,
|
24
|
-
)
|
25
18
|
@dataclass
|
26
19
|
class AsyncRetriever(Retriever):
|
27
20
|
config: Config
|
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
import contextlib
|
4
4
|
import functools
|
5
|
+
from collections import defaultdict
|
5
6
|
from enum import Enum
|
6
7
|
from types import TracebackType
|
7
|
-
from typing import Callable, List, Optional, Union
|
8
|
+
from typing import Callable, Dict, Iterable, List, Optional, Union
|
8
9
|
|
9
10
|
import requests_mock
|
10
11
|
|
@@ -40,7 +41,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
40
41
|
|
41
42
|
def __init__(self) -> None:
|
42
43
|
self._mocker = requests_mock.Mocker()
|
43
|
-
self._matchers: List[HttpRequestMatcher] =
|
44
|
+
self._matchers: Dict[SupportedHttpMethods, List[HttpRequestMatcher]] = defaultdict(list)
|
44
45
|
|
45
46
|
def __enter__(self) -> "HttpMocker":
|
46
47
|
self._mocker.__enter__()
|
@@ -55,7 +56,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
55
56
|
self._mocker.__exit__(exc_type, exc_val, exc_tb)
|
56
57
|
|
57
58
|
def _validate_all_matchers_called(self) -> None:
|
58
|
-
for matcher in self.
|
59
|
+
for matcher in self._get_matchers():
|
59
60
|
if not matcher.has_expected_match_count():
|
60
61
|
raise ValueError(f"Invalid number of matches for `{matcher}`")
|
61
62
|
|
@@ -69,9 +70,9 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
69
70
|
responses = [responses]
|
70
71
|
|
71
72
|
matcher = HttpRequestMatcher(request, len(responses))
|
72
|
-
if matcher in self._matchers:
|
73
|
+
if matcher in self._matchers[method]:
|
73
74
|
raise ValueError(f"Request {matcher.request} already mocked")
|
74
|
-
self._matchers.append(matcher)
|
75
|
+
self._matchers[method].append(matcher)
|
75
76
|
|
76
77
|
getattr(self._mocker, method)(
|
77
78
|
requests_mock.ANY,
|
@@ -129,7 +130,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
129
130
|
|
130
131
|
def assert_number_of_calls(self, request: HttpRequest, number_of_calls: int) -> None:
|
131
132
|
corresponding_matchers = list(
|
132
|
-
filter(lambda matcher: matcher.request
|
133
|
+
filter(lambda matcher: matcher.request is request, self._get_matchers())
|
133
134
|
)
|
134
135
|
if len(corresponding_matchers) != 1:
|
135
136
|
raise ValueError(
|
@@ -150,7 +151,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
150
151
|
result = f(*args, **kwargs)
|
151
152
|
except requests_mock.NoMockAddress as no_mock_exception:
|
152
153
|
matchers_as_string = "\n\t".join(
|
153
|
-
map(lambda matcher: str(matcher.request), self.
|
154
|
+
map(lambda matcher: str(matcher.request), self._get_matchers())
|
154
155
|
)
|
155
156
|
raise ValueError(
|
156
157
|
f"No matcher matches {no_mock_exception.args[0]} with headers `{no_mock_exception.request.headers}` "
|
@@ -175,6 +176,10 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
175
176
|
|
176
177
|
return wrapper
|
177
178
|
|
179
|
+
def _get_matchers(self) -> Iterable[HttpRequestMatcher]:
|
180
|
+
for matchers in self._matchers.values():
|
181
|
+
yield from matchers
|
182
|
+
|
178
183
|
def clear_all_matchers(self) -> None:
|
179
184
|
"""Clears all stored matchers by resetting the _matchers list to an empty state."""
|
180
|
-
self._matchers =
|
185
|
+
self._matchers = defaultdict(list)
|
@@ -49,7 +49,7 @@ airbyte_cdk/sources/connector_state_manager.py,sha256=hw3TJJWl3UJKSDsH-PypFQU7mD
|
|
49
49
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
50
50
|
airbyte_cdk/sources/declarative/async_job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
airbyte_cdk/sources/declarative/async_job/job.py,sha256=V4Z6NohXwTlOavDbD-tUUQxOr7Lzpb_r4tRC64AfvDE,1702
|
52
|
-
airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=
|
52
|
+
airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=4uNdRLAx6PA5ESrvsxg9sYwQBDwRCIBh58E9byqfl3k,22207
|
53
53
|
airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=oAaqKxj5dGKeF5wkqiOZbu5gW6JvtaROxirDU2KqT1o,2565
|
54
54
|
airbyte_cdk/sources/declarative/async_job/repository.py,sha256=2OkWiZp5IKTOi_SIpP1U-Rw3gH36LBy_a8CgXoENTtg,1044
|
55
55
|
airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa74l2N9CdhLbVFM6KDoBgBM,715
|
@@ -71,7 +71,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=uhy0dRkA
|
|
71
71
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
72
72
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
73
73
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
74
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
74
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=dROHv50GZ7rGn7u2KrQSaAg5MjRZj1dNc-7VrZMIUb0,150439
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
76
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -93,7 +93,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_
|
|
93
93
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
|
94
94
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
95
95
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
96
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=
|
96
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=MT5JbdEbnPzk3VWZGGvThe4opoX5dHhSXFrnTRYC6dg,22210
|
97
97
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
|
98
98
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
99
99
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=2tsE6FgXzemf4fZZ4uGtd8QpRBl9GJ2CRqSNJE5p0EI,16077
|
@@ -114,17 +114,16 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
114
114
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
115
115
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
116
116
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
117
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
117
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=0vCjpAXGkkgocHrgUUVzSCzcCKfXQMr-u0I1U-mVczQ,106717
|
118
118
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
120
120
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
122
122
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
123
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
124
|
-
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=
|
123
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Rkz5ae83PyjioQg6rcSZwLbR1s4rOte_KwXfNTP2BbM,147373
|
124
|
+
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
126
126
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
127
|
-
airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py,sha256=-W1CAg2NayCMDNj7QLWn7Nqipaz7av9sLjbMnyMGUek,6271
|
128
127
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=tmGGpMoOBmaMfhVZq53AEWxoHm2lmNVi6hA2_IVEnAA,4882
|
129
128
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
130
129
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
|
@@ -144,7 +143,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
|
|
144
143
|
airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
|
145
144
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
146
145
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
|
147
|
-
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=
|
146
|
+
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=uDyLvNsJ183oh3TT-O1PDOgpGt7OD1uqpLTDWTyb9PA,14271
|
148
147
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=uEhUmLGVuwfadKz0c1vunrr66ZNYWmotKZWiaPYPDzw,17402
|
149
148
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
150
149
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=SB-Af3CRb4mJwhm4EKNxzl_PK2w5QS4tqrSNNMO2IV4,12760
|
@@ -171,7 +170,7 @@ airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb
|
|
171
170
|
airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
|
172
171
|
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=AiojNs8wItJFrENZBFUaDvau3sgwudO6Wkra36upSPo,4639
|
173
172
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=nQepwG_RfW53sgwvK5dLPqfCx0VjsQ83nYoPjBMAaLM,527
|
174
|
-
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=
|
173
|
+
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=6oZtnCHm9NdDvjTSrVwPQOXGSdETSIR7eWH2vFjM7jI,4855
|
175
174
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
176
175
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=p6O4FYS7zzPq6uQT2NVnughUjI66tePaXVlyhCAyyv0,27746
|
177
176
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
@@ -332,7 +331,7 @@ airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5Y
|
|
332
331
|
airbyte_cdk/test/entrypoint_wrapper.py,sha256=9XBii_YguQp0d8cykn3hy102FsJcwIBQzSB7co5ho0s,9802
|
333
332
|
airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
|
334
333
|
airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
|
335
|
-
airbyte_cdk/test/mock_http/mocker.py,sha256=
|
334
|
+
airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
|
336
335
|
airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
|
337
336
|
airbyte_cdk/test/mock_http/response.py,sha256=s4-cQQqTtmeej0pQDWqmG0vUWpHS-93lIWMpW3zSVyU,662
|
338
337
|
airbyte_cdk/test/mock_http/response_builder.py,sha256=debPx_lRYBaQVSwCoKLa0F8KFk3h0qG7bWxFBATa0cc,7958
|
@@ -359,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
359
358
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
360
359
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
361
360
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
362
|
-
airbyte_cdk-6.41.
|
363
|
-
airbyte_cdk-6.41.
|
364
|
-
airbyte_cdk-6.41.
|
365
|
-
airbyte_cdk-6.41.
|
366
|
-
airbyte_cdk-6.41.
|
367
|
-
airbyte_cdk-6.41.
|
361
|
+
airbyte_cdk-6.41.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
362
|
+
airbyte_cdk-6.41.4.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
363
|
+
airbyte_cdk-6.41.4.dist-info/METADATA,sha256=B7iABwyr7lOJAzEREVt05ZO_3CFXhoEQX5zFdatD-As,6071
|
364
|
+
airbyte_cdk-6.41.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
365
|
+
airbyte_cdk-6.41.4.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
366
|
+
airbyte_cdk-6.41.4.dist-info/RECORD,,
|
@@ -1,150 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from typing import Any, Iterable, Mapping, Optional
|
7
|
-
|
8
|
-
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
9
|
-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class GroupingPartitionRouter(PartitionRouter):
|
14
|
-
"""
|
15
|
-
A partition router that groups partitions from an underlying partition router into batches of a specified size.
|
16
|
-
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
17
|
-
|
18
|
-
Attributes:
|
19
|
-
group_size (int): The number of partitions to include in each group.
|
20
|
-
underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
|
21
|
-
deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
|
22
|
-
config (Config): The connector configuration.
|
23
|
-
parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
|
24
|
-
"""
|
25
|
-
|
26
|
-
group_size: int
|
27
|
-
underlying_partition_router: PartitionRouter
|
28
|
-
config: Config
|
29
|
-
deduplicate: bool = True
|
30
|
-
|
31
|
-
def __post_init__(self) -> None:
|
32
|
-
self._state: Optional[Mapping[str, StreamState]] = {}
|
33
|
-
|
34
|
-
def stream_slices(self) -> Iterable[StreamSlice]:
|
35
|
-
"""
|
36
|
-
Lazily groups partitions from the underlying partition router into batches of size `group_size`.
|
37
|
-
|
38
|
-
This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
|
39
|
-
When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
|
40
|
-
If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
|
41
|
-
|
42
|
-
Yields:
|
43
|
-
Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
|
44
|
-
"""
|
45
|
-
batch = []
|
46
|
-
seen_keys = set()
|
47
|
-
|
48
|
-
# Iterate over partitions lazily from the underlying router
|
49
|
-
for partition in self.underlying_partition_router.stream_slices():
|
50
|
-
# Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
|
51
|
-
partition_keys = list(partition.partition.keys())
|
52
|
-
# skip parent_slice as it is part of SubstreamPartitionRouter partition
|
53
|
-
if "parent_slice" in partition_keys:
|
54
|
-
partition_keys.remove("parent_slice")
|
55
|
-
if len(partition_keys) != 1:
|
56
|
-
raise ValueError(
|
57
|
-
f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
|
58
|
-
)
|
59
|
-
key = partition.partition[partition_keys[0]]
|
60
|
-
|
61
|
-
# Skip duplicates if deduplication is enabled
|
62
|
-
if self.deduplicate and key in seen_keys:
|
63
|
-
continue
|
64
|
-
|
65
|
-
# Add partition to the batch
|
66
|
-
batch.append(partition)
|
67
|
-
if self.deduplicate:
|
68
|
-
seen_keys.add(key)
|
69
|
-
|
70
|
-
# Yield the batch when it reaches the group_size
|
71
|
-
if len(batch) == self.group_size:
|
72
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
73
|
-
yield self._create_grouped_slice(batch)
|
74
|
-
batch = [] # Reset the batch
|
75
|
-
|
76
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
77
|
-
# Yield any remaining partitions if the batch isn't empty
|
78
|
-
if batch:
|
79
|
-
yield self._create_grouped_slice(batch)
|
80
|
-
|
81
|
-
def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
|
82
|
-
"""
|
83
|
-
Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
|
84
|
-
|
85
|
-
Args:
|
86
|
-
batch (list[StreamSlice]): A list of StreamSlice objects to group.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
StreamSlice: A single StreamSlice with combined partition and extra field values.
|
90
|
-
"""
|
91
|
-
# Combine partition values into a single dict with lists
|
92
|
-
grouped_partition = {
|
93
|
-
key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
|
94
|
-
}
|
95
|
-
|
96
|
-
# Aggregate extra fields into a dict with list values
|
97
|
-
extra_fields_dict = (
|
98
|
-
{
|
99
|
-
key: [p.extra_fields.get(key) for p in batch]
|
100
|
-
for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
|
101
|
-
}
|
102
|
-
if any(p.extra_fields for p in batch)
|
103
|
-
else {}
|
104
|
-
)
|
105
|
-
return StreamSlice(
|
106
|
-
partition=grouped_partition,
|
107
|
-
cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
|
108
|
-
extra_fields=extra_fields_dict,
|
109
|
-
)
|
110
|
-
|
111
|
-
def get_request_params(
|
112
|
-
self,
|
113
|
-
stream_state: Optional[StreamState] = None,
|
114
|
-
stream_slice: Optional[StreamSlice] = None,
|
115
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
116
|
-
) -> Mapping[str, Any]:
|
117
|
-
return {}
|
118
|
-
|
119
|
-
def get_request_headers(
|
120
|
-
self,
|
121
|
-
stream_state: Optional[StreamState] = None,
|
122
|
-
stream_slice: Optional[StreamSlice] = None,
|
123
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
-
) -> Mapping[str, Any]:
|
125
|
-
return {}
|
126
|
-
|
127
|
-
def get_request_body_data(
|
128
|
-
self,
|
129
|
-
stream_state: Optional[StreamState] = None,
|
130
|
-
stream_slice: Optional[StreamSlice] = None,
|
131
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
132
|
-
) -> Mapping[str, Any]:
|
133
|
-
return {}
|
134
|
-
|
135
|
-
def get_request_body_json(
|
136
|
-
self,
|
137
|
-
stream_state: Optional[StreamState] = None,
|
138
|
-
stream_slice: Optional[StreamSlice] = None,
|
139
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
140
|
-
) -> Mapping[str, Any]:
|
141
|
-
return {}
|
142
|
-
|
143
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
144
|
-
"""Delegate state initialization to the underlying partition router."""
|
145
|
-
self.underlying_partition_router.set_initial_state(stream_state)
|
146
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
147
|
-
|
148
|
-
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
149
|
-
"""Delegate state retrieval to the underlying partition router."""
|
150
|
-
return self._state
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|