airbyte-cdk 6.43.1__py3-none-any.whl → 6.45.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
- airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +139 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +94 -8
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +219 -11
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/RECORD +20 -13
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from __future__ import annotations
|
@@ -54,7 +54,11 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
54
54
|
SessionTokenProvider,
|
55
55
|
TokenProvider,
|
56
56
|
)
|
57
|
-
from airbyte_cdk.sources.declarative.checks import
|
57
|
+
from airbyte_cdk.sources.declarative.checks import (
|
58
|
+
CheckDynamicStream,
|
59
|
+
CheckStream,
|
60
|
+
DynamicStreamCheckConfig,
|
61
|
+
)
|
58
62
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
63
|
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
60
64
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -218,6 +222,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
218
222
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
219
223
|
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
220
224
|
)
|
225
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
226
|
+
DynamicStreamCheckConfig as DynamicStreamCheckConfigModel,
|
227
|
+
)
|
221
228
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
222
229
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
223
230
|
)
|
@@ -227,6 +234,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
234
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
235
|
FlattenFields as FlattenFieldsModel,
|
229
236
|
)
|
237
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
238
|
+
GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
|
239
|
+
)
|
230
240
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
241
|
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
232
242
|
)
|
@@ -317,6 +327,18 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
317
327
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
318
328
|
ParentStreamConfig as ParentStreamConfigModel,
|
319
329
|
)
|
330
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
331
|
+
PropertiesFromEndpoint as PropertiesFromEndpointModel,
|
332
|
+
)
|
333
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
334
|
+
PropertyChunking as PropertyChunkingModel,
|
335
|
+
)
|
336
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
337
|
+
PropertyLimitType as PropertyLimitTypeModel,
|
338
|
+
)
|
339
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
340
|
+
QueryProperties as QueryPropertiesModel,
|
341
|
+
)
|
320
342
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
321
343
|
Rate as RateModel,
|
322
344
|
)
|
@@ -425,6 +447,17 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
|
425
447
|
PageIncrement,
|
426
448
|
StopConditionPaginationStrategyDecorator,
|
427
449
|
)
|
450
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
451
|
+
PropertiesFromEndpoint,
|
452
|
+
PropertyChunking,
|
453
|
+
QueryProperties,
|
454
|
+
)
|
455
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
|
456
|
+
PropertyLimitType,
|
457
|
+
)
|
458
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import (
|
459
|
+
GroupByKey,
|
460
|
+
)
|
428
461
|
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
|
429
462
|
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
430
463
|
DatetimeBasedRequestOptionsProvider,
|
@@ -559,6 +592,7 @@ class ModelToComponentFactory:
|
|
559
592
|
BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
|
560
593
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
561
594
|
CheckStreamModel: self.create_check_stream,
|
595
|
+
DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
|
562
596
|
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
563
597
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
564
598
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
@@ -588,6 +622,7 @@ class ModelToComponentFactory:
|
|
588
622
|
ResponseToFileExtractorModel: self.create_response_to_file_extractor,
|
589
623
|
ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
|
590
624
|
SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
|
625
|
+
GroupByKeyMergeStrategyModel: self.create_group_by_key,
|
591
626
|
HttpRequesterModel: self.create_http_requester,
|
592
627
|
HttpResponseFilterModel: self.create_http_response_filter,
|
593
628
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
@@ -617,6 +652,9 @@ class ModelToComponentFactory:
|
|
617
652
|
OffsetIncrementModel: self.create_offset_increment,
|
618
653
|
PageIncrementModel: self.create_page_increment,
|
619
654
|
ParentStreamConfigModel: self.create_parent_stream_config,
|
655
|
+
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
656
|
+
PropertyChunkingModel: self.create_property_chunking,
|
657
|
+
QueryPropertiesModel: self.create_query_properties,
|
620
658
|
RecordFilterModel: self.create_record_filter,
|
621
659
|
RecordSelectorModel: self.create_record_selector,
|
622
660
|
RemoveFieldsModel: self.create_remove_fields,
|
@@ -936,8 +974,36 @@ class ModelToComponentFactory:
|
|
936
974
|
)
|
937
975
|
|
938
976
|
@staticmethod
|
939
|
-
def
|
940
|
-
|
977
|
+
def create_dynamic_stream_check_config(
|
978
|
+
model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
|
979
|
+
) -> DynamicStreamCheckConfig:
|
980
|
+
return DynamicStreamCheckConfig(
|
981
|
+
dynamic_stream_name=model.dynamic_stream_name,
|
982
|
+
stream_count=model.stream_count or 0,
|
983
|
+
)
|
984
|
+
|
985
|
+
def create_check_stream(
|
986
|
+
self, model: CheckStreamModel, config: Config, **kwargs: Any
|
987
|
+
) -> CheckStream:
|
988
|
+
if model.dynamic_streams_check_configs is None and model.stream_names is None:
|
989
|
+
raise ValueError(
|
990
|
+
"Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
|
991
|
+
)
|
992
|
+
|
993
|
+
dynamic_streams_check_configs = (
|
994
|
+
[
|
995
|
+
self._create_component_from_model(model=dynamic_stream_check_config, config=config)
|
996
|
+
for dynamic_stream_check_config in model.dynamic_streams_check_configs
|
997
|
+
]
|
998
|
+
if model.dynamic_streams_check_configs
|
999
|
+
else []
|
1000
|
+
)
|
1001
|
+
|
1002
|
+
return CheckStream(
|
1003
|
+
stream_names=model.stream_names or [],
|
1004
|
+
dynamic_streams_check_configs=dynamic_streams_check_configs,
|
1005
|
+
parameters={},
|
1006
|
+
)
|
941
1007
|
|
942
1008
|
@staticmethod
|
943
1009
|
def create_check_dynamic_stream(
|
@@ -2047,8 +2113,8 @@ class ModelToComponentFactory:
|
|
2047
2113
|
parameters=model.parameters or {},
|
2048
2114
|
)
|
2049
2115
|
|
2116
|
+
@staticmethod
|
2050
2117
|
def create_response_to_file_extractor(
|
2051
|
-
self,
|
2052
2118
|
model: ResponseToFileExtractorModel,
|
2053
2119
|
**kwargs: Any,
|
2054
2120
|
) -> ResponseToFileExtractor:
|
@@ -2062,11 +2128,17 @@ class ModelToComponentFactory:
|
|
2062
2128
|
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
2063
2129
|
)
|
2064
2130
|
|
2131
|
+
@staticmethod
|
2132
|
+
def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
|
2133
|
+
return GroupByKey(model.key, config=config, parameters=model.parameters or {})
|
2134
|
+
|
2065
2135
|
def create_http_requester(
|
2066
2136
|
self,
|
2067
2137
|
model: HttpRequesterModel,
|
2068
2138
|
config: Config,
|
2069
2139
|
decoder: Decoder = JsonDecoder(parameters={}),
|
2140
|
+
query_properties_key: Optional[str] = None,
|
2141
|
+
use_cache: Optional[bool] = None,
|
2070
2142
|
*,
|
2071
2143
|
name: str,
|
2072
2144
|
) -> HttpRequester:
|
@@ -2099,6 +2171,7 @@ class ModelToComponentFactory:
|
|
2099
2171
|
request_body_json=model.request_body_json,
|
2100
2172
|
request_headers=model.request_headers,
|
2101
2173
|
request_parameters=model.request_parameters,
|
2174
|
+
query_properties_key=query_properties_key,
|
2102
2175
|
config=config,
|
2103
2176
|
parameters=model.parameters or {},
|
2104
2177
|
)
|
@@ -2106,7 +2179,7 @@ class ModelToComponentFactory:
|
|
2106
2179
|
assert model.use_cache is not None # for mypy
|
2107
2180
|
assert model.http_method is not None # for mypy
|
2108
2181
|
|
2109
|
-
|
2182
|
+
should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
|
2110
2183
|
|
2111
2184
|
return HttpRequester(
|
2112
2185
|
name=name,
|
@@ -2121,7 +2194,7 @@ class ModelToComponentFactory:
|
|
2121
2194
|
disable_retries=self._disable_retries,
|
2122
2195
|
parameters=model.parameters or {},
|
2123
2196
|
message_repository=self._message_repository,
|
2124
|
-
use_cache=
|
2197
|
+
use_cache=should_use_cache,
|
2125
2198
|
decoder=decoder,
|
2126
2199
|
stream_response=decoder.is_stream_response() if decoder else False,
|
2127
2200
|
)
|
@@ -2225,10 +2298,11 @@ class ModelToComponentFactory:
|
|
2225
2298
|
retriever = self._create_component_from_model(
|
2226
2299
|
model=model.retriever,
|
2227
2300
|
config=config,
|
2228
|
-
name="",
|
2301
|
+
name="dynamic_properties",
|
2229
2302
|
primary_key=None,
|
2230
2303
|
stream_slicer=combined_slicers,
|
2231
2304
|
transformations=[],
|
2305
|
+
use_cache=True,
|
2232
2306
|
)
|
2233
2307
|
schema_type_identifier = self._create_component_from_model(
|
2234
2308
|
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
@@ -2566,6 +2640,79 @@ class ModelToComponentFactory:
|
|
2566
2640
|
lazy_read_pointer=model_lazy_read_pointer,
|
2567
2641
|
)
|
2568
2642
|
|
2643
|
+
def create_properties_from_endpoint(
|
2644
|
+
self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
|
2645
|
+
) -> PropertiesFromEndpoint:
|
2646
|
+
retriever = self._create_component_from_model(
|
2647
|
+
model=model.retriever,
|
2648
|
+
config=config,
|
2649
|
+
name="dynamic_properties",
|
2650
|
+
primary_key=None,
|
2651
|
+
stream_slicer=None,
|
2652
|
+
transformations=[],
|
2653
|
+
use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
|
2654
|
+
)
|
2655
|
+
return PropertiesFromEndpoint(
|
2656
|
+
property_field_path=model.property_field_path,
|
2657
|
+
retriever=retriever,
|
2658
|
+
config=config,
|
2659
|
+
parameters=model.parameters or {},
|
2660
|
+
)
|
2661
|
+
|
2662
|
+
def create_property_chunking(
|
2663
|
+
self, model: PropertyChunkingModel, config: Config, **kwargs: Any
|
2664
|
+
) -> PropertyChunking:
|
2665
|
+
record_merge_strategy = (
|
2666
|
+
self._create_component_from_model(
|
2667
|
+
model=model.record_merge_strategy, config=config, **kwargs
|
2668
|
+
)
|
2669
|
+
if model.record_merge_strategy
|
2670
|
+
else None
|
2671
|
+
)
|
2672
|
+
|
2673
|
+
property_limit_type: PropertyLimitType
|
2674
|
+
match model.property_limit_type:
|
2675
|
+
case PropertyLimitTypeModel.property_count:
|
2676
|
+
property_limit_type = PropertyLimitType.property_count
|
2677
|
+
case PropertyLimitTypeModel.characters:
|
2678
|
+
property_limit_type = PropertyLimitType.characters
|
2679
|
+
case _:
|
2680
|
+
raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
|
2681
|
+
|
2682
|
+
return PropertyChunking(
|
2683
|
+
property_limit_type=property_limit_type,
|
2684
|
+
property_limit=model.property_limit,
|
2685
|
+
record_merge_strategy=record_merge_strategy,
|
2686
|
+
config=config,
|
2687
|
+
parameters=model.parameters or {},
|
2688
|
+
)
|
2689
|
+
|
2690
|
+
def create_query_properties(
|
2691
|
+
self, model: QueryPropertiesModel, config: Config, **kwargs: Any
|
2692
|
+
) -> QueryProperties:
|
2693
|
+
if isinstance(model.property_list, list):
|
2694
|
+
property_list = model.property_list
|
2695
|
+
else:
|
2696
|
+
property_list = self._create_component_from_model(
|
2697
|
+
model=model.property_list, config=config, **kwargs
|
2698
|
+
)
|
2699
|
+
|
2700
|
+
property_chunking = (
|
2701
|
+
self._create_component_from_model(
|
2702
|
+
model=model.property_chunking, config=config, **kwargs
|
2703
|
+
)
|
2704
|
+
if model.property_chunking
|
2705
|
+
else None
|
2706
|
+
)
|
2707
|
+
|
2708
|
+
return QueryProperties(
|
2709
|
+
property_list=property_list,
|
2710
|
+
always_include_properties=model.always_include_properties,
|
2711
|
+
property_chunking=property_chunking,
|
2712
|
+
config=config,
|
2713
|
+
parameters=model.parameters or {},
|
2714
|
+
)
|
2715
|
+
|
2569
2716
|
@staticmethod
|
2570
2717
|
def create_record_filter(
|
2571
2718
|
model: RecordFilterModel, config: Config, **kwargs: Any
|
@@ -2711,6 +2858,7 @@ class ModelToComponentFactory:
|
|
2711
2858
|
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
2712
2859
|
]
|
2713
2860
|
] = None,
|
2861
|
+
use_cache: Optional[bool] = None,
|
2714
2862
|
**kwargs: Any,
|
2715
2863
|
) -> SimpleRetriever:
|
2716
2864
|
decoder = (
|
@@ -2718,9 +2866,6 @@ class ModelToComponentFactory:
|
|
2718
2866
|
if model.decoder
|
2719
2867
|
else JsonDecoder(parameters={})
|
2720
2868
|
)
|
2721
|
-
requester = self._create_component_from_model(
|
2722
|
-
model=model.requester, decoder=decoder, config=config, name=name
|
2723
|
-
)
|
2724
2869
|
record_selector = self._create_component_from_model(
|
2725
2870
|
model=model.record_selector,
|
2726
2871
|
name=name,
|
@@ -2729,6 +2874,57 @@ class ModelToComponentFactory:
|
|
2729
2874
|
transformations=transformations,
|
2730
2875
|
client_side_incremental_sync=client_side_incremental_sync,
|
2731
2876
|
)
|
2877
|
+
|
2878
|
+
query_properties: Optional[QueryProperties] = None
|
2879
|
+
query_properties_key: Optional[str] = None
|
2880
|
+
if (
|
2881
|
+
hasattr(model.requester, "request_parameters")
|
2882
|
+
and model.requester.request_parameters
|
2883
|
+
and isinstance(model.requester.request_parameters, Mapping)
|
2884
|
+
):
|
2885
|
+
query_properties_definitions = []
|
2886
|
+
for key, request_parameter in model.requester.request_parameters.items():
|
2887
|
+
# When translating JSON schema into Pydantic models, enforcing types for arrays containing both
|
2888
|
+
# concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any].
|
2889
|
+
# This adds the extra validation that we couldn't get for free in Pydantic model generation
|
2890
|
+
if (
|
2891
|
+
isinstance(request_parameter, Mapping)
|
2892
|
+
and request_parameter.get("type") == "QueryProperties"
|
2893
|
+
):
|
2894
|
+
query_properties_key = key
|
2895
|
+
query_properties_definitions.append(request_parameter)
|
2896
|
+
elif not isinstance(request_parameter, str):
|
2897
|
+
raise ValueError(
|
2898
|
+
f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
|
2899
|
+
)
|
2900
|
+
|
2901
|
+
if len(query_properties_definitions) > 1:
|
2902
|
+
raise ValueError(
|
2903
|
+
f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
|
2904
|
+
)
|
2905
|
+
|
2906
|
+
if len(query_properties_definitions) == 1:
|
2907
|
+
query_properties = self.create_component(
|
2908
|
+
model_type=QueryPropertiesModel,
|
2909
|
+
component_definition=query_properties_definitions[0],
|
2910
|
+
config=config,
|
2911
|
+
)
|
2912
|
+
|
2913
|
+
# Removes QueryProperties components from the interpolated mappings because it will be resolved in
|
2914
|
+
# the provider from the slice directly instead of through jinja interpolation
|
2915
|
+
if isinstance(model.requester.request_parameters, Mapping):
|
2916
|
+
model.requester.request_parameters = self._remove_query_properties(
|
2917
|
+
model.requester.request_parameters
|
2918
|
+
)
|
2919
|
+
|
2920
|
+
requester = self._create_component_from_model(
|
2921
|
+
model=model.requester,
|
2922
|
+
decoder=decoder,
|
2923
|
+
name=name,
|
2924
|
+
query_properties_key=query_properties_key,
|
2925
|
+
use_cache=use_cache,
|
2926
|
+
config=config,
|
2927
|
+
)
|
2732
2928
|
url_base = (
|
2733
2929
|
model.requester.url_base
|
2734
2930
|
if hasattr(model.requester, "url_base")
|
@@ -2834,9 +3030,21 @@ class ModelToComponentFactory:
|
|
2834
3030
|
cursor=cursor,
|
2835
3031
|
config=config,
|
2836
3032
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3033
|
+
additional_query_properties=query_properties,
|
2837
3034
|
parameters=model.parameters or {},
|
2838
3035
|
)
|
2839
3036
|
|
3037
|
+
@staticmethod
|
3038
|
+
def _remove_query_properties(
|
3039
|
+
request_parameters: Mapping[str, Union[Any, str]],
|
3040
|
+
) -> Mapping[str, Union[Any, str]]:
|
3041
|
+
return {
|
3042
|
+
parameter_field: request_parameter
|
3043
|
+
for parameter_field, request_parameter in request_parameters.items()
|
3044
|
+
if not isinstance(request_parameter, Mapping)
|
3045
|
+
or not request_parameter.get("type") == "QueryProperties"
|
3046
|
+
}
|
3047
|
+
|
2840
3048
|
def create_state_delegating_stream(
|
2841
3049
|
self,
|
2842
3050
|
model: StateDelegatingStreamModel,
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.properties_from_endpoint import (
|
4
|
+
PropertiesFromEndpoint,
|
5
|
+
)
|
6
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
|
7
|
+
PropertyChunking,
|
8
|
+
)
|
9
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.query_properties import (
|
10
|
+
QueryProperties,
|
11
|
+
)
|
12
|
+
|
13
|
+
__all__ = ["PropertiesFromEndpoint", "PropertyChunking", "QueryProperties"]
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
5
|
+
|
6
|
+
import dpath
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
9
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class PropertiesFromEndpoint:
|
15
|
+
"""
|
16
|
+
Component that defines the behavior around how to dynamically retrieve a set of request properties from an
|
17
|
+
API endpoint. The set retrieved can then be injected into the requests to extract records from an API source.
|
18
|
+
"""
|
19
|
+
|
20
|
+
property_field_path: List[str]
|
21
|
+
retriever: Retriever
|
22
|
+
config: Config
|
23
|
+
parameters: InitVar[Mapping[str, Any]]
|
24
|
+
|
25
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
26
|
+
self._property_field_path = [
|
27
|
+
InterpolatedString(string=property_field, parameters=parameters)
|
28
|
+
for property_field in self.property_field_path
|
29
|
+
]
|
30
|
+
|
31
|
+
def get_properties_from_endpoint(self, stream_slice: Optional[StreamSlice]) -> Iterable[str]:
|
32
|
+
response_properties = self.retriever.read_records(
|
33
|
+
records_schema={}, stream_slice=stream_slice
|
34
|
+
)
|
35
|
+
for property_obj in response_properties:
|
36
|
+
path = [
|
37
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
38
|
+
for node in self._property_field_path
|
39
|
+
]
|
40
|
+
yield dpath.get(property_obj, path, default=[]) # type: ignore # extracted will be a MutableMapping, given input data structure
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import GroupByKey
|
8
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
9
|
+
RecordMergeStrategy,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.types import Config, Record
|
12
|
+
|
13
|
+
|
14
|
+
class PropertyLimitType(Enum):
|
15
|
+
"""
|
16
|
+
The heuristic that determines when the maximum size of the current chunk of properties and when a new
|
17
|
+
one should be started.
|
18
|
+
"""
|
19
|
+
|
20
|
+
characters = "characters"
|
21
|
+
property_count = "property_count"
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class PropertyChunking:
|
26
|
+
"""
|
27
|
+
Defines the behavior for how the complete list of properties to query for are broken down into smaller groups
|
28
|
+
that will be used for multiple requests to the target API.
|
29
|
+
"""
|
30
|
+
|
31
|
+
property_limit_type: PropertyLimitType
|
32
|
+
property_limit: Optional[int]
|
33
|
+
record_merge_strategy: Optional[RecordMergeStrategy]
|
34
|
+
parameters: InitVar[Mapping[str, Any]]
|
35
|
+
config: Config
|
36
|
+
|
37
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
38
|
+
self._record_merge_strategy = self.record_merge_strategy or GroupByKey(
|
39
|
+
key="id", config=self.config, parameters=parameters
|
40
|
+
)
|
41
|
+
|
42
|
+
def get_request_property_chunks(
|
43
|
+
self, property_fields: Iterable[str], always_include_properties: Optional[List[str]]
|
44
|
+
) -> Iterable[List[str]]:
|
45
|
+
if not self.property_limit:
|
46
|
+
single_property_chunk = list(property_fields)
|
47
|
+
if always_include_properties:
|
48
|
+
single_property_chunk.extend(always_include_properties)
|
49
|
+
yield single_property_chunk
|
50
|
+
return
|
51
|
+
current_chunk = list(always_include_properties) if always_include_properties else []
|
52
|
+
chunk_size = 0
|
53
|
+
for property_field in property_fields:
|
54
|
+
# If property_limit_type is not defined, we default to property_count which is just an incrementing count
|
55
|
+
property_field_size = (
|
56
|
+
len(property_field)
|
57
|
+
if self.property_limit_type == PropertyLimitType.characters
|
58
|
+
else 1
|
59
|
+
)
|
60
|
+
if chunk_size + property_field_size > self.property_limit:
|
61
|
+
yield current_chunk
|
62
|
+
current_chunk = list(always_include_properties) if always_include_properties else []
|
63
|
+
chunk_size = 0
|
64
|
+
current_chunk.append(property_field)
|
65
|
+
chunk_size += property_field_size
|
66
|
+
yield current_chunk
|
67
|
+
|
68
|
+
def get_merge_key(self, record: Record) -> Optional[str]:
|
69
|
+
return self._record_merge_strategy.get_group_key(record=record)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Union
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
7
|
+
PropertiesFromEndpoint,
|
8
|
+
PropertyChunking,
|
9
|
+
)
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class QueryProperties:
|
15
|
+
"""
|
16
|
+
Low-code component that encompasses the behavior to inject additional property values into the outbound API
|
17
|
+
requests. Property values can be defined statically within the manifest or dynamically by making requests
|
18
|
+
to a partner API to retrieve the properties. Query properties also allow for splitting of the total set of
|
19
|
+
properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved
|
20
|
+
"""
|
21
|
+
|
22
|
+
property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
|
23
|
+
always_include_properties: Optional[List[str]]
|
24
|
+
property_chunking: Optional[PropertyChunking]
|
25
|
+
config: Config
|
26
|
+
parameters: InitVar[Mapping[str, Any]]
|
27
|
+
|
28
|
+
def get_request_property_chunks(
|
29
|
+
self, stream_slice: Optional[StreamSlice] = None
|
30
|
+
) -> Iterable[List[str]]:
|
31
|
+
"""
|
32
|
+
Uses the defined property_list to fetch the total set of properties dynamically or from a static list
|
33
|
+
and based on the resulting properties, performs property chunking if applicable.
|
34
|
+
:param stream_slice: The StreamSlice of the current partition being processed during the sync. This is included
|
35
|
+
because subcomponents of QueryProperties can make use of interpolation of the top-level StreamSlice object
|
36
|
+
"""
|
37
|
+
fields: Union[Iterable[str], List[str]]
|
38
|
+
if isinstance(self.property_list, PropertiesFromEndpoint):
|
39
|
+
fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice)
|
40
|
+
else:
|
41
|
+
fields = self.property_list if self.property_list else []
|
42
|
+
|
43
|
+
if self.property_chunking:
|
44
|
+
yield from self.property_chunking.get_request_property_chunks(
|
45
|
+
property_fields=fields, always_include_properties=self.always_include_properties
|
46
|
+
)
|
47
|
+
else:
|
48
|
+
yield list(fields)
|
49
|
+
|
50
|
+
# delete later, but leaving this to keep the discussion thread on the PR from getting hidden
|
51
|
+
def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool:
|
52
|
+
property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice))
|
53
|
+
try:
|
54
|
+
next(property_chunks)
|
55
|
+
next(property_chunks)
|
56
|
+
return True
|
57
|
+
except StopIteration:
|
58
|
+
return False
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.group_by_key import (
|
4
|
+
GroupByKey,
|
5
|
+
)
|
6
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
7
|
+
RecordMergeStrategy,
|
8
|
+
)
|
9
|
+
|
10
|
+
__all__ = ["GroupByKey", "RecordMergeStrategy"]
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, List, Mapping, Optional, Union
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
7
|
+
RecordMergeStrategy,
|
8
|
+
)
|
9
|
+
from airbyte_cdk.sources.types import Config, Record
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class GroupByKey(RecordMergeStrategy):
|
14
|
+
"""
|
15
|
+
Record merge strategy that combines records together according to values on the record for one or many keys.
|
16
|
+
"""
|
17
|
+
|
18
|
+
key: Union[str, List[str]]
|
19
|
+
parameters: InitVar[Mapping[str, Any]]
|
20
|
+
config: Config
|
21
|
+
|
22
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
23
|
+
self._keys = [self.key] if isinstance(self.key, str) else self.key
|
24
|
+
|
25
|
+
def get_group_key(self, record: Record) -> Optional[str]:
|
26
|
+
resolved_keys = []
|
27
|
+
for key in self._keys:
|
28
|
+
key_value = record.data.get(key)
|
29
|
+
if key_value:
|
30
|
+
resolved_keys.append(key_value)
|
31
|
+
else:
|
32
|
+
return None
|
33
|
+
return ",".join(resolved_keys)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.types import Record
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class RecordMergeStrategy(ABC):
|
12
|
+
"""
|
13
|
+
Describe the interface for how records that required multiple requests to get the complete set of fields
|
14
|
+
should be merged back into a single record.
|
15
|
+
"""
|
16
|
+
|
17
|
+
@abstractmethod
|
18
|
+
def get_group_key(self, record: Record) -> Optional[str]:
|
19
|
+
pass
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from typing import Any, Mapping, MutableMapping, Optional, Union
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
9
9
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
@@ -40,6 +40,7 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
|
|
40
40
|
request_headers: Optional[RequestInput] = None
|
41
41
|
request_body_data: Optional[RequestInput] = None
|
42
42
|
request_body_json: Optional[NestedMapping] = None
|
43
|
+
query_properties_key: Optional[str] = None
|
43
44
|
|
44
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
45
46
|
if self.request_parameters is None:
|
@@ -83,6 +84,28 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
|
|
83
84
|
valid_value_types=ValidRequestTypes,
|
84
85
|
)
|
85
86
|
if isinstance(interpolated_value, dict):
|
87
|
+
if self.query_properties_key:
|
88
|
+
if not stream_slice:
|
89
|
+
raise ValueError(
|
90
|
+
"stream_slice should not be None if query properties in requests is enabled. Please contact Airbyte Support"
|
91
|
+
)
|
92
|
+
elif (
|
93
|
+
"query_properties" not in stream_slice.extra_fields
|
94
|
+
or stream_slice.extra_fields.get("query_properties") is None
|
95
|
+
):
|
96
|
+
raise ValueError(
|
97
|
+
"QueryProperties component is defined but stream_partition does not contain query_properties. Please contact Airbyte Support"
|
98
|
+
)
|
99
|
+
elif not isinstance(stream_slice.extra_fields.get("query_properties"), List):
|
100
|
+
raise ValueError(
|
101
|
+
"QueryProperties component is defined but stream_slice.extra_fields.query_properties is not a List. Please contact Airbyte Support"
|
102
|
+
)
|
103
|
+
interpolated_value = {
|
104
|
+
**interpolated_value,
|
105
|
+
self.query_properties_key: ",".join(
|
106
|
+
stream_slice.extra_fields.get("query_properties") # type: ignore # Earlier type checks validate query_properties type
|
107
|
+
),
|
108
|
+
}
|
86
109
|
return interpolated_value
|
87
110
|
return {}
|
88
111
|
|