airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/models.py +16 -14
- airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
- airbyte_cdk/connector_builder/test_reader/types.py +9 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -7
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +77 -48
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +37 -50
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +171 -70
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +60 -17
- airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/http_logger.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
- airbyte_cdk/sources/types.py +1 -0
- airbyte_cdk/utils/mapping_helpers.py +18 -1
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/METADATA +4 -4
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/RECORD +39 -44
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
- airbyte_cdk/sources/embedded/__init__.py +0 -3
- airbyte_cdk/sources/embedded/base_integration.py +0 -61
- airbyte_cdk/sources/embedded/catalog.py +0 -57
- airbyte_cdk/sources/embedded/runner.py +0 -57
- airbyte_cdk/sources/embedded/tools.py +0 -27
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/entry_points.txt +0 -0
@@ -45,7 +45,7 @@ class AirbyteCustomCodeNotPermittedError(Exception):
|
|
45
45
|
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
46
|
"""Return the hash of the input text using the specified hash type."""
|
47
47
|
if not input_text:
|
48
|
-
raise ValueError("
|
48
|
+
raise ValueError("Hash input text cannot be empty.")
|
49
49
|
|
50
50
|
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
51
|
hash_object.update(input_text.encode())
|
@@ -68,6 +68,10 @@ def validate_python_code(
|
|
68
68
|
|
69
69
|
Currently we fail if no checksums are provided, although this may change in the future.
|
70
70
|
"""
|
71
|
+
if not code_text:
|
72
|
+
# No code provided, nothing to validate.
|
73
|
+
return
|
74
|
+
|
71
75
|
if not checksums:
|
72
76
|
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
77
|
|
@@ -77,8 +81,18 @@ def validate_python_code(
|
|
77
81
|
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
82
|
)
|
79
83
|
|
80
|
-
|
81
|
-
|
84
|
+
calculated_checksum = _hash_text(code_text, checksum_type)
|
85
|
+
if calculated_checksum != checksum:
|
86
|
+
raise AirbyteCodeTamperedError(
|
87
|
+
f"{checksum_type} checksum does not match."
|
88
|
+
+ str(
|
89
|
+
{
|
90
|
+
"expected_checksum": checksum,
|
91
|
+
"actual_checksum": calculated_checksum,
|
92
|
+
"code_text": code_text,
|
93
|
+
}
|
94
|
+
),
|
95
|
+
)
|
82
96
|
|
83
97
|
|
84
98
|
def get_registered_components_module(
|
@@ -94,7 +108,7 @@ def get_registered_components_module(
|
|
94
108
|
|
95
109
|
Returns `None` if no components is provided and the `components` module is not found.
|
96
110
|
"""
|
97
|
-
if config and INJECTED_COMPONENTS_PY
|
111
|
+
if config and config.get(INJECTED_COMPONENTS_PY, None):
|
98
112
|
if not custom_code_execution_permitted():
|
99
113
|
raise AirbyteCustomCodeNotPermittedError
|
100
114
|
|
@@ -227,9 +227,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
228
|
FlattenFields as FlattenFieldsModel,
|
229
229
|
)
|
230
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
-
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
232
|
-
)
|
233
230
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
231
|
GzipDecoder as GzipDecoderModel,
|
235
232
|
)
|
@@ -248,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
248
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
250
247
|
)
|
248
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
250
|
+
)
|
251
251
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
252
252
|
InlineSchemaLoader as InlineSchemaLoaderModel,
|
253
253
|
)
|
@@ -382,7 +382,6 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
|
382
382
|
)
|
383
383
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
384
384
|
CartesianProductStreamSlicer,
|
385
|
-
GroupingPartitionRouter,
|
386
385
|
ListPartitionRouter,
|
387
386
|
PartitionRouter,
|
388
387
|
SinglePartitionRouter,
|
@@ -500,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
500
499
|
CustomFormatConcurrentStreamStateConverter,
|
501
500
|
DateTimeStreamStateConverter,
|
502
501
|
)
|
502
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
503
|
+
IncrementingCountStreamStateConverter,
|
504
|
+
)
|
503
505
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
504
506
|
from airbyte_cdk.sources.types import Config
|
505
507
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
@@ -588,6 +590,7 @@ class ModelToComponentFactory:
|
|
588
590
|
FlattenFieldsModel: self.create_flatten_fields,
|
589
591
|
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
590
592
|
IterableDecoderModel: self.create_iterable_decoder,
|
593
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
591
594
|
XmlDecoderModel: self.create_xml_decoder,
|
592
595
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
593
596
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
@@ -628,7 +631,6 @@ class ModelToComponentFactory:
|
|
628
631
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
629
632
|
RateModel: self.create_rate,
|
630
633
|
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
631
|
-
GroupingPartitionRouterModel: self.create_grouping_partition_router,
|
632
634
|
}
|
633
635
|
|
634
636
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -1194,6 +1196,70 @@ class ModelToComponentFactory:
|
|
1194
1196
|
clamping_strategy=clamping_strategy,
|
1195
1197
|
)
|
1196
1198
|
|
1199
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
1200
|
+
self,
|
1201
|
+
model_type: Type[BaseModel],
|
1202
|
+
component_definition: ComponentDefinition,
|
1203
|
+
stream_name: str,
|
1204
|
+
stream_namespace: Optional[str],
|
1205
|
+
config: Config,
|
1206
|
+
message_repository: Optional[MessageRepository] = None,
|
1207
|
+
**kwargs: Any,
|
1208
|
+
) -> ConcurrentCursor:
|
1209
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
1210
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
1211
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
1212
|
+
stream_state = (
|
1213
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
1214
|
+
if "stream_state" not in kwargs
|
1215
|
+
else kwargs["stream_state"]
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
component_type = component_definition.get("type")
|
1219
|
+
if component_definition.get("type") != model_type.__name__:
|
1220
|
+
raise ValueError(
|
1221
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
1225
|
+
|
1226
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
1227
|
+
raise ValueError(
|
1228
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
interpolated_start_value = (
|
1232
|
+
InterpolatedString.create(
|
1233
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
1234
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1235
|
+
)
|
1236
|
+
if incrementing_count_cursor_model.start_value
|
1237
|
+
else 0
|
1238
|
+
)
|
1239
|
+
|
1240
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1241
|
+
incrementing_count_cursor_model.cursor_field,
|
1242
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1243
|
+
)
|
1244
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1245
|
+
|
1246
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
1247
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return ConcurrentCursor(
|
1251
|
+
stream_name=stream_name,
|
1252
|
+
stream_namespace=stream_namespace,
|
1253
|
+
stream_state=stream_state,
|
1254
|
+
message_repository=message_repository or self._message_repository,
|
1255
|
+
connector_state_manager=self._connector_state_manager,
|
1256
|
+
connector_state_converter=connector_state_converter,
|
1257
|
+
cursor_field=cursor_field,
|
1258
|
+
slice_boundary_fields=None,
|
1259
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1260
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1261
|
+
)
|
1262
|
+
|
1197
1263
|
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1198
1264
|
match weekday:
|
1199
1265
|
case "MONDAY":
|
@@ -1627,6 +1693,31 @@ class ModelToComponentFactory:
|
|
1627
1693
|
config=config,
|
1628
1694
|
parameters=model.parameters or {},
|
1629
1695
|
)
|
1696
|
+
elif model.incremental_sync and isinstance(
|
1697
|
+
model.incremental_sync, IncrementingCountCursorModel
|
1698
|
+
):
|
1699
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1700
|
+
|
1701
|
+
start_time_option = (
|
1702
|
+
self._create_component_from_model(
|
1703
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1704
|
+
config,
|
1705
|
+
parameters=cursor_model.parameters or {},
|
1706
|
+
)
|
1707
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1708
|
+
else None
|
1709
|
+
)
|
1710
|
+
|
1711
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
1712
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
1713
|
+
partition_field_start = "start"
|
1714
|
+
|
1715
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1716
|
+
start_time_option=start_time_option,
|
1717
|
+
partition_field_start=partition_field_start,
|
1718
|
+
config=config,
|
1719
|
+
parameters=model.parameters or {},
|
1720
|
+
)
|
1630
1721
|
else:
|
1631
1722
|
request_options_provider = None
|
1632
1723
|
|
@@ -2096,10 +2187,10 @@ class ModelToComponentFactory:
|
|
2096
2187
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2097
2188
|
return JsonDecoder(parameters={})
|
2098
2189
|
|
2099
|
-
|
2100
|
-
def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2190
|
+
def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2101
2191
|
return CompositeRawDecoder(
|
2102
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2192
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
2193
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
2103
2194
|
)
|
2104
2195
|
|
2105
2196
|
@staticmethod
|
@@ -2108,10 +2199,28 @@ class ModelToComponentFactory:
|
|
2108
2199
|
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2109
2200
|
)
|
2110
2201
|
|
2111
|
-
|
2112
|
-
|
2202
|
+
def create_gzip_decoder(
|
2203
|
+
self, model: GzipDecoderModel, config: Config, **kwargs: Any
|
2204
|
+
) -> Decoder:
|
2113
2205
|
return CompositeRawDecoder(
|
2114
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2206
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
2207
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
2208
|
+
)
|
2209
|
+
|
2210
|
+
@staticmethod
|
2211
|
+
def create_incrementing_count_cursor(
|
2212
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
2213
|
+
) -> DatetimeBasedCursor:
|
2214
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
2215
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
2216
|
+
# IncrementingCountCursor.
|
2217
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
2218
|
+
return DatetimeBasedCursor(
|
2219
|
+
cursor_field=model.cursor_field,
|
2220
|
+
datetime_format="%Y-%m-%d",
|
2221
|
+
start_datetime="2024-12-12",
|
2222
|
+
config=config,
|
2223
|
+
parameters={},
|
2115
2224
|
)
|
2116
2225
|
|
2117
2226
|
@staticmethod
|
@@ -2632,6 +2741,47 @@ class ModelToComponentFactory:
|
|
2632
2741
|
transformations: List[RecordTransformation],
|
2633
2742
|
**kwargs: Any,
|
2634
2743
|
) -> AsyncRetriever:
|
2744
|
+
def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
|
2745
|
+
record_selector = RecordSelector(
|
2746
|
+
extractor=download_extractor,
|
2747
|
+
name=name,
|
2748
|
+
record_filter=None,
|
2749
|
+
transformations=transformations,
|
2750
|
+
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2751
|
+
config=config,
|
2752
|
+
parameters={},
|
2753
|
+
)
|
2754
|
+
paginator = (
|
2755
|
+
self._create_component_from_model(
|
2756
|
+
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2757
|
+
)
|
2758
|
+
if model.download_paginator
|
2759
|
+
else NoPagination(parameters={})
|
2760
|
+
)
|
2761
|
+
maximum_number_of_slices = self._limit_slices_fetched or 5
|
2762
|
+
|
2763
|
+
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
2764
|
+
return SimpleRetrieverTestReadDecorator(
|
2765
|
+
requester=download_requester,
|
2766
|
+
record_selector=record_selector,
|
2767
|
+
primary_key=None,
|
2768
|
+
name=job_download_components_name,
|
2769
|
+
paginator=paginator,
|
2770
|
+
config=config,
|
2771
|
+
parameters={},
|
2772
|
+
maximum_number_of_slices=maximum_number_of_slices,
|
2773
|
+
)
|
2774
|
+
|
2775
|
+
return SimpleRetriever(
|
2776
|
+
requester=download_requester,
|
2777
|
+
record_selector=record_selector,
|
2778
|
+
primary_key=None,
|
2779
|
+
name=job_download_components_name,
|
2780
|
+
paginator=paginator,
|
2781
|
+
config=config,
|
2782
|
+
parameters={},
|
2783
|
+
)
|
2784
|
+
|
2635
2785
|
decoder = (
|
2636
2786
|
self._create_component_from_model(model=model.decoder, config=config)
|
2637
2787
|
if model.decoder
|
@@ -2685,29 +2835,7 @@ class ModelToComponentFactory:
|
|
2685
2835
|
config=config,
|
2686
2836
|
name=job_download_components_name,
|
2687
2837
|
)
|
2688
|
-
download_retriever =
|
2689
|
-
requester=download_requester,
|
2690
|
-
record_selector=RecordSelector(
|
2691
|
-
extractor=download_extractor,
|
2692
|
-
name=name,
|
2693
|
-
record_filter=None,
|
2694
|
-
transformations=transformations,
|
2695
|
-
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2696
|
-
config=config,
|
2697
|
-
parameters={},
|
2698
|
-
),
|
2699
|
-
primary_key=None,
|
2700
|
-
name=job_download_components_name,
|
2701
|
-
paginator=(
|
2702
|
-
self._create_component_from_model(
|
2703
|
-
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2704
|
-
)
|
2705
|
-
if model.download_paginator
|
2706
|
-
else NoPagination(parameters={})
|
2707
|
-
),
|
2708
|
-
config=config,
|
2709
|
-
parameters={},
|
2710
|
-
)
|
2838
|
+
download_retriever = _get_download_retriever()
|
2711
2839
|
abort_requester = (
|
2712
2840
|
self._create_component_from_model(
|
2713
2841
|
model=model.abort_requester,
|
@@ -2728,32 +2856,32 @@ class ModelToComponentFactory:
|
|
2728
2856
|
if model.delete_requester
|
2729
2857
|
else None
|
2730
2858
|
)
|
2731
|
-
|
2859
|
+
download_target_requester = (
|
2732
2860
|
self._create_component_from_model(
|
2733
|
-
model=model.
|
2861
|
+
model=model.download_target_requester,
|
2734
2862
|
decoder=decoder,
|
2735
2863
|
config=config,
|
2736
2864
|
name=f"job extract_url - {name}",
|
2737
2865
|
)
|
2738
|
-
if model.
|
2866
|
+
if model.download_target_requester
|
2739
2867
|
else None
|
2740
2868
|
)
|
2741
2869
|
status_extractor = self._create_component_from_model(
|
2742
2870
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2743
2871
|
)
|
2744
|
-
|
2745
|
-
model=model.
|
2872
|
+
download_target_extractor = self._create_component_from_model(
|
2873
|
+
model=model.download_target_extractor, decoder=decoder, config=config, name=name
|
2746
2874
|
)
|
2747
2875
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2748
2876
|
creation_requester=creation_requester,
|
2749
2877
|
polling_requester=polling_requester,
|
2750
2878
|
download_retriever=download_retriever,
|
2751
|
-
|
2879
|
+
download_target_requester=download_target_requester,
|
2752
2880
|
abort_requester=abort_requester,
|
2753
2881
|
delete_requester=delete_requester,
|
2754
2882
|
status_extractor=status_extractor,
|
2755
2883
|
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
2756
|
-
|
2884
|
+
download_target_extractor=download_target_extractor,
|
2757
2885
|
)
|
2758
2886
|
|
2759
2887
|
async_job_partition_router = AsyncJobPartitionRouter(
|
@@ -3029,8 +3157,9 @@ class ModelToComponentFactory:
|
|
3029
3157
|
)
|
3030
3158
|
|
3031
3159
|
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
3160
|
+
interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
|
3032
3161
|
return Rate(
|
3033
|
-
limit=
|
3162
|
+
limit=int(interpolated_limit.eval(config=config)),
|
3034
3163
|
interval=parse_duration(model.interval),
|
3035
3164
|
)
|
3036
3165
|
|
@@ -3049,31 +3178,3 @@ class ModelToComponentFactory:
|
|
3049
3178
|
self._api_budget = self.create_component(
|
3050
3179
|
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3051
3180
|
)
|
3052
|
-
|
3053
|
-
def create_grouping_partition_router(
|
3054
|
-
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
3055
|
-
) -> GroupingPartitionRouter:
|
3056
|
-
underlying_router = self._create_component_from_model(
|
3057
|
-
model=model.underlying_partition_router, config=config
|
3058
|
-
)
|
3059
|
-
if model.group_size < 1:
|
3060
|
-
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|
3061
|
-
|
3062
|
-
if not isinstance(underlying_router, PartitionRouter):
|
3063
|
-
raise ValueError(
|
3064
|
-
f"Underlying partition router must be a PartitionRouter subclass, got {type(underlying_router)}"
|
3065
|
-
)
|
3066
|
-
|
3067
|
-
if isinstance(underlying_router, SubstreamPartitionRouter):
|
3068
|
-
if any(
|
3069
|
-
parent_config.request_option
|
3070
|
-
for parent_config in underlying_router.parent_stream_configs
|
3071
|
-
):
|
3072
|
-
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3073
|
-
|
3074
|
-
return GroupingPartitionRouter(
|
3075
|
-
group_size=model.group_size,
|
3076
|
-
underlying_partition_router=underlying_router,
|
3077
|
-
deduplicate=model.deduplicate if model.deduplicate is not None else True,
|
3078
|
-
config=config,
|
3079
|
-
)
|
@@ -8,9 +8,6 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
|
|
8
8
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
9
|
CartesianProductStreamSlicer,
|
10
10
|
)
|
11
|
-
from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
|
12
|
-
GroupingPartitionRouter,
|
13
|
-
)
|
14
11
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
15
12
|
ListPartitionRouter,
|
16
13
|
)
|
@@ -25,7 +22,6 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
|
|
25
22
|
__all__ = [
|
26
23
|
"AsyncJobPartitionRouter",
|
27
24
|
"CartesianProductStreamSlicer",
|
28
|
-
"GroupingPartitionRouter",
|
29
25
|
"ListPartitionRouter",
|
30
26
|
"SinglePartitionRouter",
|
31
27
|
"SubstreamPartitionRouter",
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# AsyncHttpJobRepository sequence diagram
|
2
2
|
|
3
3
|
- Components marked as optional are not required and can be ignored.
|
4
|
-
- if `
|
5
|
-
- interpolation_context, e.g. `
|
4
|
+
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
5
|
+
- interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
|
6
6
|
|
7
7
|
```mermaid
|
8
8
|
---
|
@@ -12,7 +12,7 @@ sequenceDiagram
|
|
12
12
|
participant AsyncHttpJobRepository as AsyncOrchestrator
|
13
13
|
participant CreationRequester as creation_requester
|
14
14
|
participant PollingRequester as polling_requester
|
15
|
-
participant UrlRequester as
|
15
|
+
participant UrlRequester as download_target_requester (Optional)
|
16
16
|
participant DownloadRetriever as download_retriever
|
17
17
|
participant AbortRequester as abort_requester (Optional)
|
18
18
|
participant DeleteRequester as delete_requester (Optional)
|
@@ -25,14 +25,14 @@ sequenceDiagram
|
|
25
25
|
|
26
26
|
loop Poll for job status
|
27
27
|
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
28
|
-
PollingRequester ->> Reporting Server: Status request (interpolation_context: `
|
28
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
|
29
29
|
Reporting Server -->> PollingRequester: Status response
|
30
30
|
PollingRequester -->> AsyncHttpJobRepository: Job status
|
31
31
|
end
|
32
32
|
|
33
33
|
alt Status: Ready
|
34
34
|
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
35
|
-
UrlRequester ->> Reporting Server: URL request (interpolation_context: `
|
35
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
|
36
36
|
Reporting Server -->> UrlRequester: Download URLs
|
37
37
|
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
38
|
|
@@ -23,6 +23,7 @@ from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor impor
|
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
25
25
|
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
|
26
|
+
from airbyte_cdk.sources.http_logger import format_http_message
|
26
27
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
27
28
|
from airbyte_cdk.utils import AirbyteTracedException
|
28
29
|
|
@@ -42,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
42
43
|
delete_requester: Optional[Requester]
|
43
44
|
status_extractor: DpathExtractor
|
44
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
45
|
-
|
46
|
+
download_target_extractor: DpathExtractor
|
46
47
|
|
47
48
|
job_timeout: Optional[timedelta] = None
|
48
49
|
record_extractor: RecordExtractor = field(
|
49
50
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
50
51
|
)
|
51
|
-
|
52
|
+
download_target_requester: Optional[Requester] = (
|
52
53
|
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
53
54
|
)
|
54
55
|
|
@@ -71,7 +72,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
71
72
|
"""
|
72
73
|
|
73
74
|
polling_response: Optional[requests.Response] = self.polling_requester.send_request(
|
74
|
-
stream_slice=stream_slice
|
75
|
+
stream_slice=stream_slice,
|
76
|
+
log_formatter=lambda polling_response: format_http_message(
|
77
|
+
response=polling_response,
|
78
|
+
title="Async Job -- Polling",
|
79
|
+
description="Poll the status of the server-side async job.",
|
80
|
+
stream_name=None,
|
81
|
+
is_auxiliary=True,
|
82
|
+
type="ASYNC_POLL",
|
83
|
+
),
|
75
84
|
)
|
76
85
|
if polling_response is None:
|
77
86
|
raise AirbyteTracedException(
|
@@ -118,8 +127,17 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
118
127
|
"""
|
119
128
|
|
120
129
|
response: Optional[requests.Response] = self.creation_requester.send_request(
|
121
|
-
stream_slice=stream_slice
|
130
|
+
stream_slice=stream_slice,
|
131
|
+
log_formatter=lambda response: format_http_message(
|
132
|
+
response=response,
|
133
|
+
title="Async Job -- Create",
|
134
|
+
description="Create the server-side async job.",
|
135
|
+
stream_name=None,
|
136
|
+
is_auxiliary=True,
|
137
|
+
type="ASYNC_CREATE",
|
138
|
+
),
|
122
139
|
)
|
140
|
+
|
123
141
|
if not response:
|
124
142
|
raise AirbyteTracedException(
|
125
143
|
internal_message="Always expect a response or an exception from creation_requester",
|
@@ -193,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
193
211
|
|
194
212
|
"""
|
195
213
|
|
196
|
-
for
|
214
|
+
for target_url in self._get_download_targets(job):
|
197
215
|
job_slice = job.job_parameters()
|
198
216
|
stream_slice = StreamSlice(
|
199
217
|
partition=job_slice.partition,
|
200
218
|
cursor_slice=job_slice.cursor_slice,
|
201
|
-
extra_fields={
|
219
|
+
extra_fields={
|
220
|
+
**job_slice.extra_fields,
|
221
|
+
"download_target": target_url,
|
222
|
+
},
|
202
223
|
)
|
203
224
|
for message in self.download_retriever.read_records({}, stream_slice):
|
204
225
|
if isinstance(message, Record):
|
@@ -217,13 +238,33 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
217
238
|
if not self.abort_requester:
|
218
239
|
return
|
219
240
|
|
220
|
-
self.abort_requester.send_request(
|
241
|
+
abort_response = self.abort_requester.send_request(
|
242
|
+
stream_slice=self._get_create_job_stream_slice(job),
|
243
|
+
log_formatter=lambda abort_response: format_http_message(
|
244
|
+
response=abort_response,
|
245
|
+
title="Async Job -- Abort",
|
246
|
+
description="Abort the running server-side async job.",
|
247
|
+
stream_name=None,
|
248
|
+
is_auxiliary=True,
|
249
|
+
type="ASYNC_ABORT",
|
250
|
+
),
|
251
|
+
)
|
221
252
|
|
222
253
|
def delete(self, job: AsyncJob) -> None:
|
223
254
|
if not self.delete_requester:
|
224
255
|
return
|
225
256
|
|
226
|
-
self.delete_requester.send_request(
|
257
|
+
delete_job_reponse = self.delete_requester.send_request(
|
258
|
+
stream_slice=self._get_create_job_stream_slice(job),
|
259
|
+
log_formatter=lambda delete_job_reponse: format_http_message(
|
260
|
+
response=delete_job_reponse,
|
261
|
+
title="Async Job -- Delete",
|
262
|
+
description="Delete the specified job from the list of Jobs.",
|
263
|
+
stream_name=None,
|
264
|
+
is_auxiliary=True,
|
265
|
+
type="ASYNC_DELETE",
|
266
|
+
),
|
267
|
+
)
|
227
268
|
self._clean_up_job(job.api_job_id())
|
228
269
|
|
229
270
|
def _clean_up_job(self, job_id: str) -> None:
|
@@ -231,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
231
272
|
del self._polling_job_response_by_id[job_id]
|
232
273
|
|
233
274
|
def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
|
275
|
+
creation_response = self._create_job_response_by_id[job.api_job_id()].json()
|
234
276
|
stream_slice = StreamSlice(
|
235
|
-
partition={
|
277
|
+
partition={},
|
236
278
|
cursor_slice={},
|
279
|
+
extra_fields={"creation_response": creation_response},
|
237
280
|
)
|
238
281
|
return stream_slice
|
239
282
|
|
240
|
-
def
|
241
|
-
if not self.
|
283
|
+
def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
|
284
|
+
if not self.download_target_requester:
|
242
285
|
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
243
286
|
else:
|
287
|
+
polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
|
244
288
|
stream_slice: StreamSlice = StreamSlice(
|
245
|
-
partition={
|
246
|
-
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
247
|
-
},
|
289
|
+
partition={},
|
248
290
|
cursor_slice={},
|
291
|
+
extra_fields={"polling_response": polling_response},
|
249
292
|
)
|
250
|
-
url_response = self.
|
293
|
+
url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
251
294
|
if not url_response:
|
252
295
|
raise AirbyteTracedException(
|
253
|
-
internal_message="Always expect a response or an exception from
|
296
|
+
internal_message="Always expect a response or an exception from download_target_requester",
|
254
297
|
failure_type=FailureType.system_error,
|
255
298
|
)
|
256
299
|
|
257
|
-
yield from self.
|
300
|
+
yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
|