airbyte-cdk 6.36.2__py3-none-any.whl → 6.37.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/models.py +14 -16
- airbyte_cdk/connector_builder/test_reader/helpers.py +22 -120
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +3 -16
- airbyte_cdk/connector_builder/test_reader/types.py +1 -9
- airbyte_cdk/sources/declarative/auth/token_provider.py +0 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -15
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -5
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -13
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +0 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +17 -83
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +42 -3
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +63 -52
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +136 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -42
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +3 -10
- airbyte_cdk/sources/http_logger.py +0 -3
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +0 -1
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/RECORD +24 -23
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.36.2.dist-info → airbyte_cdk-6.37.0.dev1.dist-info}/entry_points.txt +0 -0
@@ -646,7 +646,7 @@ class Rate(BaseModel):
|
|
646
646
|
class Config:
|
647
647
|
extra = Extra.allow
|
648
648
|
|
649
|
-
limit:
|
649
|
+
limit: int = Field(
|
650
650
|
...,
|
651
651
|
description="The maximum number of calls allowed within the interval.",
|
652
652
|
title="Limit",
|
@@ -2225,7 +2225,15 @@ class SimpleRetriever(BaseModel):
|
|
2225
2225
|
CustomPartitionRouter,
|
2226
2226
|
ListPartitionRouter,
|
2227
2227
|
SubstreamPartitionRouter,
|
2228
|
-
|
2228
|
+
GroupingPartitionRouter,
|
2229
|
+
List[
|
2230
|
+
Union[
|
2231
|
+
CustomPartitionRouter,
|
2232
|
+
ListPartitionRouter,
|
2233
|
+
SubstreamPartitionRouter,
|
2234
|
+
GroupingPartitionRouter,
|
2235
|
+
]
|
2236
|
+
],
|
2229
2237
|
]
|
2230
2238
|
] = Field(
|
2231
2239
|
[],
|
@@ -2303,7 +2311,15 @@ class AsyncRetriever(BaseModel):
|
|
2303
2311
|
CustomPartitionRouter,
|
2304
2312
|
ListPartitionRouter,
|
2305
2313
|
SubstreamPartitionRouter,
|
2306
|
-
|
2314
|
+
GroupingPartitionRouter,
|
2315
|
+
List[
|
2316
|
+
Union[
|
2317
|
+
CustomPartitionRouter,
|
2318
|
+
ListPartitionRouter,
|
2319
|
+
SubstreamPartitionRouter,
|
2320
|
+
GroupingPartitionRouter,
|
2321
|
+
]
|
2322
|
+
],
|
2307
2323
|
]
|
2308
2324
|
] = Field(
|
2309
2325
|
[],
|
@@ -2355,6 +2371,29 @@ class SubstreamPartitionRouter(BaseModel):
|
|
2355
2371
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2356
2372
|
|
2357
2373
|
|
2374
|
+
class GroupingPartitionRouter(BaseModel):
|
2375
|
+
type: Literal["GroupingPartitionRouter"]
|
2376
|
+
group_size: int = Field(
|
2377
|
+
...,
|
2378
|
+
description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
|
2379
|
+
examples=[10, 50],
|
2380
|
+
title="Group Size",
|
2381
|
+
)
|
2382
|
+
underlying_partition_router: Union[
|
2383
|
+
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
|
2384
|
+
] = Field(
|
2385
|
+
...,
|
2386
|
+
description="The partition router whose output will be grouped. This can be any valid partition router component.",
|
2387
|
+
title="Underlying Partition Router",
|
2388
|
+
)
|
2389
|
+
deduplicate: Optional[bool] = Field(
|
2390
|
+
True,
|
2391
|
+
description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
|
2392
|
+
title="Deduplicate Partitions",
|
2393
|
+
)
|
2394
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2395
|
+
|
2396
|
+
|
2358
2397
|
class HttpComponentsResolver(BaseModel):
|
2359
2398
|
type: Literal["HttpComponentsResolver"]
|
2360
2399
|
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
@@ -227,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
228
|
FlattenFields as FlattenFieldsModel,
|
229
229
|
)
|
230
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
+
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
232
|
+
)
|
230
233
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
234
|
GzipDecoder as GzipDecoderModel,
|
232
235
|
)
|
@@ -379,6 +382,7 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
|
379
382
|
)
|
380
383
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
381
384
|
CartesianProductStreamSlicer,
|
385
|
+
GroupingPartitionRouter,
|
382
386
|
ListPartitionRouter,
|
383
387
|
PartitionRouter,
|
384
388
|
SinglePartitionRouter,
|
@@ -624,6 +628,7 @@ class ModelToComponentFactory:
|
|
624
628
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
625
629
|
RateModel: self.create_rate,
|
626
630
|
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
631
|
+
GroupingPartitionRouterModel: self.create_grouping_partition_router,
|
627
632
|
}
|
628
633
|
|
629
634
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -2091,10 +2096,10 @@ class ModelToComponentFactory:
|
|
2091
2096
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2092
2097
|
return JsonDecoder(parameters={})
|
2093
2098
|
|
2094
|
-
|
2099
|
+
@staticmethod
|
2100
|
+
def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2095
2101
|
return CompositeRawDecoder(
|
2096
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2097
|
-
stream_response=False if self._emit_connector_builder_messages else True,
|
2102
|
+
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2098
2103
|
)
|
2099
2104
|
|
2100
2105
|
@staticmethod
|
@@ -2103,12 +2108,10 @@ class ModelToComponentFactory:
|
|
2103
2108
|
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2104
2109
|
)
|
2105
2110
|
|
2106
|
-
|
2107
|
-
|
2108
|
-
) -> Decoder:
|
2111
|
+
@staticmethod
|
2112
|
+
def create_gzip_decoder(model: GzipDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2109
2113
|
return CompositeRawDecoder(
|
2110
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2111
|
-
stream_response=False if self._emit_connector_builder_messages else True,
|
2114
|
+
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2112
2115
|
)
|
2113
2116
|
|
2114
2117
|
@staticmethod
|
@@ -2629,47 +2632,6 @@ class ModelToComponentFactory:
|
|
2629
2632
|
transformations: List[RecordTransformation],
|
2630
2633
|
**kwargs: Any,
|
2631
2634
|
) -> AsyncRetriever:
|
2632
|
-
def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
|
2633
|
-
record_selector = RecordSelector(
|
2634
|
-
extractor=download_extractor,
|
2635
|
-
name=name,
|
2636
|
-
record_filter=None,
|
2637
|
-
transformations=transformations,
|
2638
|
-
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2639
|
-
config=config,
|
2640
|
-
parameters={},
|
2641
|
-
)
|
2642
|
-
paginator = (
|
2643
|
-
self._create_component_from_model(
|
2644
|
-
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2645
|
-
)
|
2646
|
-
if model.download_paginator
|
2647
|
-
else NoPagination(parameters={})
|
2648
|
-
)
|
2649
|
-
maximum_number_of_slices = self._limit_slices_fetched or 5
|
2650
|
-
|
2651
|
-
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
2652
|
-
return SimpleRetrieverTestReadDecorator(
|
2653
|
-
requester=download_requester,
|
2654
|
-
record_selector=record_selector,
|
2655
|
-
primary_key=None,
|
2656
|
-
name=job_download_components_name,
|
2657
|
-
paginator=paginator,
|
2658
|
-
config=config,
|
2659
|
-
parameters={},
|
2660
|
-
maximum_number_of_slices=maximum_number_of_slices,
|
2661
|
-
)
|
2662
|
-
|
2663
|
-
return SimpleRetriever(
|
2664
|
-
requester=download_requester,
|
2665
|
-
record_selector=record_selector,
|
2666
|
-
primary_key=None,
|
2667
|
-
name=job_download_components_name,
|
2668
|
-
paginator=paginator,
|
2669
|
-
config=config,
|
2670
|
-
parameters={},
|
2671
|
-
)
|
2672
|
-
|
2673
2635
|
decoder = (
|
2674
2636
|
self._create_component_from_model(model=model.decoder, config=config)
|
2675
2637
|
if model.decoder
|
@@ -2723,7 +2685,29 @@ class ModelToComponentFactory:
|
|
2723
2685
|
config=config,
|
2724
2686
|
name=job_download_components_name,
|
2725
2687
|
)
|
2726
|
-
download_retriever =
|
2688
|
+
download_retriever = SimpleRetriever(
|
2689
|
+
requester=download_requester,
|
2690
|
+
record_selector=RecordSelector(
|
2691
|
+
extractor=download_extractor,
|
2692
|
+
name=name,
|
2693
|
+
record_filter=None,
|
2694
|
+
transformations=transformations,
|
2695
|
+
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2696
|
+
config=config,
|
2697
|
+
parameters={},
|
2698
|
+
),
|
2699
|
+
primary_key=None,
|
2700
|
+
name=job_download_components_name,
|
2701
|
+
paginator=(
|
2702
|
+
self._create_component_from_model(
|
2703
|
+
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2704
|
+
)
|
2705
|
+
if model.download_paginator
|
2706
|
+
else NoPagination(parameters={})
|
2707
|
+
),
|
2708
|
+
config=config,
|
2709
|
+
parameters={},
|
2710
|
+
)
|
2727
2711
|
abort_requester = (
|
2728
2712
|
self._create_component_from_model(
|
2729
2713
|
model=model.abort_requester,
|
@@ -3045,9 +3029,8 @@ class ModelToComponentFactory:
|
|
3045
3029
|
)
|
3046
3030
|
|
3047
3031
|
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
3048
|
-
interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
|
3049
3032
|
return Rate(
|
3050
|
-
limit=
|
3033
|
+
limit=model.limit,
|
3051
3034
|
interval=parse_duration(model.interval),
|
3052
3035
|
)
|
3053
3036
|
|
@@ -3066,3 +3049,31 @@ class ModelToComponentFactory:
|
|
3066
3049
|
self._api_budget = self.create_component(
|
3067
3050
|
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3068
3051
|
)
|
3052
|
+
|
3053
|
+
def create_grouping_partition_router(
|
3054
|
+
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
3055
|
+
) -> GroupingPartitionRouter:
|
3056
|
+
underlying_router = self._create_component_from_model(
|
3057
|
+
model=model.underlying_partition_router, config=config
|
3058
|
+
)
|
3059
|
+
if model.group_size < 1:
|
3060
|
+
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|
3061
|
+
|
3062
|
+
if not isinstance(underlying_router, PartitionRouter):
|
3063
|
+
raise ValueError(
|
3064
|
+
f"Underlying partition router must be a PartitionRouter subclass, got {type(underlying_router)}"
|
3065
|
+
)
|
3066
|
+
|
3067
|
+
if isinstance(underlying_router, SubstreamPartitionRouter):
|
3068
|
+
if any(
|
3069
|
+
parent_config.request_option
|
3070
|
+
for parent_config in underlying_router.parent_stream_configs
|
3071
|
+
):
|
3072
|
+
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3073
|
+
|
3074
|
+
return GroupingPartitionRouter(
|
3075
|
+
group_size=model.group_size,
|
3076
|
+
underlying_partition_router=underlying_router,
|
3077
|
+
deduplicate=model.deduplicate if model.deduplicate is not None else True,
|
3078
|
+
config=config,
|
3079
|
+
)
|
@@ -8,6 +8,9 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
|
|
8
8
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
9
|
CartesianProductStreamSlicer,
|
10
10
|
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
|
12
|
+
GroupingPartitionRouter,
|
13
|
+
)
|
11
14
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
12
15
|
ListPartitionRouter,
|
13
16
|
)
|
@@ -22,6 +25,7 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
|
|
22
25
|
__all__ = [
|
23
26
|
"AsyncJobPartitionRouter",
|
24
27
|
"CartesianProductStreamSlicer",
|
28
|
+
"GroupingPartitionRouter",
|
25
29
|
"ListPartitionRouter",
|
26
30
|
"SinglePartitionRouter",
|
27
31
|
"SubstreamPartitionRouter",
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Iterable, Mapping, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class GroupingPartitionRouter(PartitionRouter):
|
14
|
+
"""
|
15
|
+
A partition router that groups partitions from an underlying partition router into batches of a specified size.
|
16
|
+
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
17
|
+
|
18
|
+
Attributes:
|
19
|
+
group_size (int): The number of partitions to include in each group.
|
20
|
+
underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
|
21
|
+
deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
|
22
|
+
config (Config): The connector configuration.
|
23
|
+
parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
|
24
|
+
"""
|
25
|
+
|
26
|
+
group_size: int
|
27
|
+
underlying_partition_router: PartitionRouter
|
28
|
+
config: Config
|
29
|
+
deduplicate: bool = True
|
30
|
+
|
31
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
32
|
+
"""
|
33
|
+
Lazily groups partitions from the underlying partition router into batches of size `group_size`.
|
34
|
+
|
35
|
+
This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
|
36
|
+
When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
|
37
|
+
If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
|
38
|
+
|
39
|
+
Yields:
|
40
|
+
Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
|
41
|
+
"""
|
42
|
+
batch = []
|
43
|
+
seen_keys = set()
|
44
|
+
|
45
|
+
# Iterate over partitions lazily from the underlying router
|
46
|
+
for partition in self.underlying_partition_router.stream_slices():
|
47
|
+
# Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
|
48
|
+
key = next(iter(partition.partition.values()), None)
|
49
|
+
|
50
|
+
# Skip duplicates if deduplication is enabled
|
51
|
+
if self.deduplicate and key in seen_keys:
|
52
|
+
continue
|
53
|
+
|
54
|
+
# Add partition to the batch
|
55
|
+
batch.append(partition)
|
56
|
+
if self.deduplicate:
|
57
|
+
seen_keys.add(key)
|
58
|
+
|
59
|
+
# Yield the batch when it reaches the group_size
|
60
|
+
if len(batch) == self.group_size:
|
61
|
+
yield self._create_grouped_slice(batch)
|
62
|
+
batch = [] # Reset the batch
|
63
|
+
|
64
|
+
# Yield any remaining partitions if the batch isn't empty
|
65
|
+
if batch:
|
66
|
+
yield self._create_grouped_slice(batch)
|
67
|
+
|
68
|
+
def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
|
69
|
+
"""
|
70
|
+
Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
batch (list[StreamSlice]): A list of StreamSlice objects to group.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
StreamSlice: A single StreamSlice with combined partition and extra field values.
|
77
|
+
"""
|
78
|
+
# Combine partition values into a single dict with lists
|
79
|
+
grouped_partition = {
|
80
|
+
key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
|
81
|
+
}
|
82
|
+
|
83
|
+
# Aggregate extra fields into a dict with list values
|
84
|
+
extra_fields_dict = (
|
85
|
+
{
|
86
|
+
key: [p.extra_fields.get(key) for p in batch]
|
87
|
+
for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
|
88
|
+
}
|
89
|
+
if any(p.extra_fields for p in batch)
|
90
|
+
else {}
|
91
|
+
)
|
92
|
+
return StreamSlice(
|
93
|
+
partition=grouped_partition,
|
94
|
+
cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
|
95
|
+
extra_fields=extra_fields_dict,
|
96
|
+
)
|
97
|
+
|
98
|
+
def get_request_params(
|
99
|
+
self,
|
100
|
+
stream_state: Optional[StreamState] = None,
|
101
|
+
stream_slice: Optional[StreamSlice] = None,
|
102
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
103
|
+
) -> Mapping[str, Any]:
|
104
|
+
return {}
|
105
|
+
|
106
|
+
def get_request_headers(
|
107
|
+
self,
|
108
|
+
stream_state: Optional[StreamState] = None,
|
109
|
+
stream_slice: Optional[StreamSlice] = None,
|
110
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
111
|
+
) -> Mapping[str, Any]:
|
112
|
+
return {}
|
113
|
+
|
114
|
+
def get_request_body_data(
|
115
|
+
self,
|
116
|
+
stream_state: Optional[StreamState] = None,
|
117
|
+
stream_slice: Optional[StreamSlice] = None,
|
118
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
119
|
+
) -> Mapping[str, Any]:
|
120
|
+
return {}
|
121
|
+
|
122
|
+
def get_request_body_json(
|
123
|
+
self,
|
124
|
+
stream_state: Optional[StreamState] = None,
|
125
|
+
stream_slice: Optional[StreamSlice] = None,
|
126
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
127
|
+
) -> Mapping[str, Any]:
|
128
|
+
return {}
|
129
|
+
|
130
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
131
|
+
"""Delegate state initialization to the underlying partition router."""
|
132
|
+
self.underlying_partition_router.set_initial_state(stream_state)
|
133
|
+
|
134
|
+
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
135
|
+
"""Delegate state retrieval to the underlying partition router."""
|
136
|
+
return self.underlying_partition_router.get_stream_state()
|
@@ -23,7 +23,6 @@ from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor impor
|
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
25
25
|
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
|
26
|
-
from airbyte_cdk.sources.http_logger import format_http_message
|
27
26
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
28
27
|
from airbyte_cdk.utils import AirbyteTracedException
|
29
28
|
|
@@ -72,15 +71,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
72
71
|
"""
|
73
72
|
|
74
73
|
polling_response: Optional[requests.Response] = self.polling_requester.send_request(
|
75
|
-
stream_slice=stream_slice
|
76
|
-
log_formatter=lambda polling_response: format_http_message(
|
77
|
-
response=polling_response,
|
78
|
-
title="Async Job -- Polling",
|
79
|
-
description="Poll the status of the server-side async job.",
|
80
|
-
stream_name=None,
|
81
|
-
is_auxiliary=True,
|
82
|
-
type="ASYNC_POLL",
|
83
|
-
),
|
74
|
+
stream_slice=stream_slice
|
84
75
|
)
|
85
76
|
if polling_response is None:
|
86
77
|
raise AirbyteTracedException(
|
@@ -127,17 +118,8 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
127
118
|
"""
|
128
119
|
|
129
120
|
response: Optional[requests.Response] = self.creation_requester.send_request(
|
130
|
-
stream_slice=stream_slice
|
131
|
-
log_formatter=lambda response: format_http_message(
|
132
|
-
response=response,
|
133
|
-
title="Async Job -- Create",
|
134
|
-
description="Create the server-side async job.",
|
135
|
-
stream_name=None,
|
136
|
-
is_auxiliary=True,
|
137
|
-
type="ASYNC_CREATE",
|
138
|
-
),
|
121
|
+
stream_slice=stream_slice
|
139
122
|
)
|
140
|
-
|
141
123
|
if not response:
|
142
124
|
raise AirbyteTracedException(
|
143
125
|
internal_message="Always expect a response or an exception from creation_requester",
|
@@ -235,33 +217,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
235
217
|
if not self.abort_requester:
|
236
218
|
return
|
237
219
|
|
238
|
-
|
239
|
-
stream_slice=self._get_create_job_stream_slice(job),
|
240
|
-
log_formatter=lambda abort_response: format_http_message(
|
241
|
-
response=abort_response,
|
242
|
-
title="Async Job -- Abort",
|
243
|
-
description="Abort the running server-side async job.",
|
244
|
-
stream_name=None,
|
245
|
-
is_auxiliary=True,
|
246
|
-
type="ASYNC_ABORT",
|
247
|
-
),
|
248
|
-
)
|
220
|
+
self.abort_requester.send_request(stream_slice=self._get_create_job_stream_slice(job))
|
249
221
|
|
250
222
|
def delete(self, job: AsyncJob) -> None:
|
251
223
|
if not self.delete_requester:
|
252
224
|
return
|
253
225
|
|
254
|
-
|
255
|
-
stream_slice=self._get_create_job_stream_slice(job),
|
256
|
-
log_formatter=lambda delete_job_reponse: format_http_message(
|
257
|
-
response=delete_job_reponse,
|
258
|
-
title="Async Job -- Delete",
|
259
|
-
description="Delete the specified job from the list of Jobs.",
|
260
|
-
stream_name=None,
|
261
|
-
is_auxiliary=True,
|
262
|
-
type="ASYNC_DELETE",
|
263
|
-
),
|
264
|
-
)
|
226
|
+
self.delete_requester.send_request(stream_slice=self._get_create_job_stream_slice(job))
|
265
227
|
self._clean_up_job(job.api_job_id())
|
266
228
|
|
267
229
|
def _clean_up_job(self, job_id: str) -> None:
|
@@ -1,12 +1,13 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
|
4
|
-
from dataclasses import InitVar, dataclass
|
4
|
+
from dataclasses import InitVar, dataclass
|
5
5
|
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
9
9
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
10
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
|
10
11
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
11
12
|
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
12
13
|
AsyncJobPartitionRouter,
|
@@ -15,7 +16,6 @@ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
|
15
16
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
16
17
|
from airbyte_cdk.sources.streams.core import StreamData
|
17
18
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
|
-
from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
|
19
19
|
|
20
20
|
|
21
21
|
@deprecated(
|
@@ -28,10 +28,6 @@ class AsyncRetriever(Retriever):
|
|
28
28
|
parameters: InitVar[Mapping[str, Any]]
|
29
29
|
record_selector: RecordSelector
|
30
30
|
stream_slicer: AsyncJobPartitionRouter
|
31
|
-
slice_logger: AlwaysLogSliceLogger = field(
|
32
|
-
init=False,
|
33
|
-
default_factory=lambda: AlwaysLogSliceLogger(),
|
34
|
-
)
|
35
31
|
|
36
32
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
37
33
|
self._parameters = parameters
|
@@ -79,16 +75,13 @@ class AsyncRetriever(Retriever):
|
|
79
75
|
return stream_slice.extra_fields.get("jobs", []) if stream_slice else []
|
80
76
|
|
81
77
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
82
|
-
|
78
|
+
return self.stream_slicer.stream_slices()
|
83
79
|
|
84
80
|
def read_records(
|
85
81
|
self,
|
86
82
|
records_schema: Mapping[str, Any],
|
87
83
|
stream_slice: Optional[StreamSlice] = None,
|
88
84
|
) -> Iterable[StreamData]:
|
89
|
-
# emit the slice_descriptor log message, for connector builder TestRead
|
90
|
-
yield self.slice_logger.create_slice_log_message(stream_slice.cursor_slice) # type: ignore
|
91
|
-
|
92
85
|
stream_state: StreamState = self._get_stream_state()
|
93
86
|
jobs: Iterable[AsyncJob] = self._validate_and_get_stream_slice_jobs(stream_slice)
|
94
87
|
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(jobs)
|
@@ -15,14 +15,11 @@ def format_http_message(
|
|
15
15
|
description: str,
|
16
16
|
stream_name: Optional[str],
|
17
17
|
is_auxiliary: bool | None = None,
|
18
|
-
type: Optional[str] = None,
|
19
18
|
) -> LogMessage:
|
20
|
-
request_type: str = type if type else "HTTP"
|
21
19
|
request = response.request
|
22
20
|
log_message = {
|
23
21
|
"http": {
|
24
22
|
"title": title,
|
25
|
-
"type": request_type,
|
26
23
|
"description": description,
|
27
24
|
"request": {
|
28
25
|
"method": request.method,
|