airbyte-cdk 6.43.1__py3-none-any.whl → 6.45.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
  2. airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
  3. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +139 -2
  4. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +94 -8
  5. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +219 -11
  6. airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
  7. airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
  8. airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
  9. airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
  10. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
  11. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
  12. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
  13. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
  14. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
  15. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/RECORD +20 -13
  17. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/LICENSE.txt +0 -0
  18. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/LICENSE_SHORT +0 -0
  19. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/WHEEL +0 -0
  20. {airbyte_cdk-6.43.1.dist-info → airbyte_cdk-6.45.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  from __future__ import annotations
@@ -54,7 +54,11 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
54
54
  SessionTokenProvider,
55
55
  TokenProvider,
56
56
  )
57
- from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
57
+ from airbyte_cdk.sources.declarative.checks import (
58
+ CheckDynamicStream,
59
+ CheckStream,
60
+ DynamicStreamCheckConfig,
61
+ )
58
62
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
63
  from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
60
64
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
@@ -218,6 +222,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
218
222
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
219
223
  DynamicSchemaLoader as DynamicSchemaLoaderModel,
220
224
  )
225
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
226
+ DynamicStreamCheckConfig as DynamicStreamCheckConfigModel,
227
+ )
221
228
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
222
229
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
223
230
  )
@@ -227,6 +234,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
227
234
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
228
235
  FlattenFields as FlattenFieldsModel,
229
236
  )
237
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
238
+ GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
239
+ )
230
240
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
241
  GroupingPartitionRouter as GroupingPartitionRouterModel,
232
242
  )
@@ -317,6 +327,18 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
317
327
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
318
328
  ParentStreamConfig as ParentStreamConfigModel,
319
329
  )
330
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
331
+ PropertiesFromEndpoint as PropertiesFromEndpointModel,
332
+ )
333
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
334
+ PropertyChunking as PropertyChunkingModel,
335
+ )
336
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
337
+ PropertyLimitType as PropertyLimitTypeModel,
338
+ )
339
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
340
+ QueryProperties as QueryPropertiesModel,
341
+ )
320
342
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
321
343
  Rate as RateModel,
322
344
  )
@@ -425,6 +447,17 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
425
447
  PageIncrement,
426
448
  StopConditionPaginationStrategyDecorator,
427
449
  )
450
+ from airbyte_cdk.sources.declarative.requesters.query_properties import (
451
+ PropertiesFromEndpoint,
452
+ PropertyChunking,
453
+ QueryProperties,
454
+ )
455
+ from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
456
+ PropertyLimitType,
457
+ )
458
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import (
459
+ GroupByKey,
460
+ )
428
461
  from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
429
462
  from airbyte_cdk.sources.declarative.requesters.request_options import (
430
463
  DatetimeBasedRequestOptionsProvider,
@@ -559,6 +592,7 @@ class ModelToComponentFactory:
559
592
  BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
560
593
  BearerAuthenticatorModel: self.create_bearer_authenticator,
561
594
  CheckStreamModel: self.create_check_stream,
595
+ DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
562
596
  CheckDynamicStreamModel: self.create_check_dynamic_stream,
563
597
  CompositeErrorHandlerModel: self.create_composite_error_handler,
564
598
  ConcurrencyLevelModel: self.create_concurrency_level,
@@ -588,6 +622,7 @@ class ModelToComponentFactory:
588
622
  ResponseToFileExtractorModel: self.create_response_to_file_extractor,
589
623
  ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
590
624
  SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
625
+ GroupByKeyMergeStrategyModel: self.create_group_by_key,
591
626
  HttpRequesterModel: self.create_http_requester,
592
627
  HttpResponseFilterModel: self.create_http_response_filter,
593
628
  InlineSchemaLoaderModel: self.create_inline_schema_loader,
@@ -617,6 +652,9 @@ class ModelToComponentFactory:
617
652
  OffsetIncrementModel: self.create_offset_increment,
618
653
  PageIncrementModel: self.create_page_increment,
619
654
  ParentStreamConfigModel: self.create_parent_stream_config,
655
+ PropertiesFromEndpointModel: self.create_properties_from_endpoint,
656
+ PropertyChunkingModel: self.create_property_chunking,
657
+ QueryPropertiesModel: self.create_query_properties,
620
658
  RecordFilterModel: self.create_record_filter,
621
659
  RecordSelectorModel: self.create_record_selector,
622
660
  RemoveFieldsModel: self.create_remove_fields,
@@ -936,8 +974,36 @@ class ModelToComponentFactory:
936
974
  )
937
975
 
938
976
  @staticmethod
939
- def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
940
- return CheckStream(stream_names=model.stream_names, parameters={})
977
+ def create_dynamic_stream_check_config(
978
+ model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
979
+ ) -> DynamicStreamCheckConfig:
980
+ return DynamicStreamCheckConfig(
981
+ dynamic_stream_name=model.dynamic_stream_name,
982
+ stream_count=model.stream_count or 0,
983
+ )
984
+
985
+ def create_check_stream(
986
+ self, model: CheckStreamModel, config: Config, **kwargs: Any
987
+ ) -> CheckStream:
988
+ if model.dynamic_streams_check_configs is None and model.stream_names is None:
989
+ raise ValueError(
990
+ "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
991
+ )
992
+
993
+ dynamic_streams_check_configs = (
994
+ [
995
+ self._create_component_from_model(model=dynamic_stream_check_config, config=config)
996
+ for dynamic_stream_check_config in model.dynamic_streams_check_configs
997
+ ]
998
+ if model.dynamic_streams_check_configs
999
+ else []
1000
+ )
1001
+
1002
+ return CheckStream(
1003
+ stream_names=model.stream_names or [],
1004
+ dynamic_streams_check_configs=dynamic_streams_check_configs,
1005
+ parameters={},
1006
+ )
941
1007
 
942
1008
  @staticmethod
943
1009
  def create_check_dynamic_stream(
@@ -2047,8 +2113,8 @@ class ModelToComponentFactory:
2047
2113
  parameters=model.parameters or {},
2048
2114
  )
2049
2115
 
2116
+ @staticmethod
2050
2117
  def create_response_to_file_extractor(
2051
- self,
2052
2118
  model: ResponseToFileExtractorModel,
2053
2119
  **kwargs: Any,
2054
2120
  ) -> ResponseToFileExtractor:
@@ -2062,11 +2128,17 @@ class ModelToComponentFactory:
2062
2128
  factor=model.factor or 5, parameters=model.parameters or {}, config=config
2063
2129
  )
2064
2130
 
2131
+ @staticmethod
2132
+ def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2133
+ return GroupByKey(model.key, config=config, parameters=model.parameters or {})
2134
+
2065
2135
  def create_http_requester(
2066
2136
  self,
2067
2137
  model: HttpRequesterModel,
2068
2138
  config: Config,
2069
2139
  decoder: Decoder = JsonDecoder(parameters={}),
2140
+ query_properties_key: Optional[str] = None,
2141
+ use_cache: Optional[bool] = None,
2070
2142
  *,
2071
2143
  name: str,
2072
2144
  ) -> HttpRequester:
@@ -2099,6 +2171,7 @@ class ModelToComponentFactory:
2099
2171
  request_body_json=model.request_body_json,
2100
2172
  request_headers=model.request_headers,
2101
2173
  request_parameters=model.request_parameters,
2174
+ query_properties_key=query_properties_key,
2102
2175
  config=config,
2103
2176
  parameters=model.parameters or {},
2104
2177
  )
@@ -2106,7 +2179,7 @@ class ModelToComponentFactory:
2106
2179
  assert model.use_cache is not None # for mypy
2107
2180
  assert model.http_method is not None # for mypy
2108
2181
 
2109
- use_cache = model.use_cache and not self._disable_cache
2182
+ should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2110
2183
 
2111
2184
  return HttpRequester(
2112
2185
  name=name,
@@ -2121,7 +2194,7 @@ class ModelToComponentFactory:
2121
2194
  disable_retries=self._disable_retries,
2122
2195
  parameters=model.parameters or {},
2123
2196
  message_repository=self._message_repository,
2124
- use_cache=use_cache,
2197
+ use_cache=should_use_cache,
2125
2198
  decoder=decoder,
2126
2199
  stream_response=decoder.is_stream_response() if decoder else False,
2127
2200
  )
@@ -2225,10 +2298,11 @@ class ModelToComponentFactory:
2225
2298
  retriever = self._create_component_from_model(
2226
2299
  model=model.retriever,
2227
2300
  config=config,
2228
- name="",
2301
+ name="dynamic_properties",
2229
2302
  primary_key=None,
2230
2303
  stream_slicer=combined_slicers,
2231
2304
  transformations=[],
2305
+ use_cache=True,
2232
2306
  )
2233
2307
  schema_type_identifier = self._create_component_from_model(
2234
2308
  model.schema_type_identifier, config=config, parameters=model.parameters or {}
@@ -2566,6 +2640,79 @@ class ModelToComponentFactory:
2566
2640
  lazy_read_pointer=model_lazy_read_pointer,
2567
2641
  )
2568
2642
 
2643
+ def create_properties_from_endpoint(
2644
+ self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2645
+ ) -> PropertiesFromEndpoint:
2646
+ retriever = self._create_component_from_model(
2647
+ model=model.retriever,
2648
+ config=config,
2649
+ name="dynamic_properties",
2650
+ primary_key=None,
2651
+ stream_slicer=None,
2652
+ transformations=[],
2653
+ use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2654
+ )
2655
+ return PropertiesFromEndpoint(
2656
+ property_field_path=model.property_field_path,
2657
+ retriever=retriever,
2658
+ config=config,
2659
+ parameters=model.parameters or {},
2660
+ )
2661
+
2662
+ def create_property_chunking(
2663
+ self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2664
+ ) -> PropertyChunking:
2665
+ record_merge_strategy = (
2666
+ self._create_component_from_model(
2667
+ model=model.record_merge_strategy, config=config, **kwargs
2668
+ )
2669
+ if model.record_merge_strategy
2670
+ else None
2671
+ )
2672
+
2673
+ property_limit_type: PropertyLimitType
2674
+ match model.property_limit_type:
2675
+ case PropertyLimitTypeModel.property_count:
2676
+ property_limit_type = PropertyLimitType.property_count
2677
+ case PropertyLimitTypeModel.characters:
2678
+ property_limit_type = PropertyLimitType.characters
2679
+ case _:
2680
+ raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2681
+
2682
+ return PropertyChunking(
2683
+ property_limit_type=property_limit_type,
2684
+ property_limit=model.property_limit,
2685
+ record_merge_strategy=record_merge_strategy,
2686
+ config=config,
2687
+ parameters=model.parameters or {},
2688
+ )
2689
+
2690
+ def create_query_properties(
2691
+ self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2692
+ ) -> QueryProperties:
2693
+ if isinstance(model.property_list, list):
2694
+ property_list = model.property_list
2695
+ else:
2696
+ property_list = self._create_component_from_model(
2697
+ model=model.property_list, config=config, **kwargs
2698
+ )
2699
+
2700
+ property_chunking = (
2701
+ self._create_component_from_model(
2702
+ model=model.property_chunking, config=config, **kwargs
2703
+ )
2704
+ if model.property_chunking
2705
+ else None
2706
+ )
2707
+
2708
+ return QueryProperties(
2709
+ property_list=property_list,
2710
+ always_include_properties=model.always_include_properties,
2711
+ property_chunking=property_chunking,
2712
+ config=config,
2713
+ parameters=model.parameters or {},
2714
+ )
2715
+
2569
2716
  @staticmethod
2570
2717
  def create_record_filter(
2571
2718
  model: RecordFilterModel, config: Config, **kwargs: Any
@@ -2711,6 +2858,7 @@ class ModelToComponentFactory:
2711
2858
  IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2712
2859
  ]
2713
2860
  ] = None,
2861
+ use_cache: Optional[bool] = None,
2714
2862
  **kwargs: Any,
2715
2863
  ) -> SimpleRetriever:
2716
2864
  decoder = (
@@ -2718,9 +2866,6 @@ class ModelToComponentFactory:
2718
2866
  if model.decoder
2719
2867
  else JsonDecoder(parameters={})
2720
2868
  )
2721
- requester = self._create_component_from_model(
2722
- model=model.requester, decoder=decoder, config=config, name=name
2723
- )
2724
2869
  record_selector = self._create_component_from_model(
2725
2870
  model=model.record_selector,
2726
2871
  name=name,
@@ -2729,6 +2874,57 @@ class ModelToComponentFactory:
2729
2874
  transformations=transformations,
2730
2875
  client_side_incremental_sync=client_side_incremental_sync,
2731
2876
  )
2877
+
2878
+ query_properties: Optional[QueryProperties] = None
2879
+ query_properties_key: Optional[str] = None
2880
+ if (
2881
+ hasattr(model.requester, "request_parameters")
2882
+ and model.requester.request_parameters
2883
+ and isinstance(model.requester.request_parameters, Mapping)
2884
+ ):
2885
+ query_properties_definitions = []
2886
+ for key, request_parameter in model.requester.request_parameters.items():
2887
+ # When translating JSON schema into Pydantic models, enforcing types for arrays containing both
2888
+ # concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any].
2889
+ # This adds the extra validation that we couldn't get for free in Pydantic model generation
2890
+ if (
2891
+ isinstance(request_parameter, Mapping)
2892
+ and request_parameter.get("type") == "QueryProperties"
2893
+ ):
2894
+ query_properties_key = key
2895
+ query_properties_definitions.append(request_parameter)
2896
+ elif not isinstance(request_parameter, str):
2897
+ raise ValueError(
2898
+ f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
2899
+ )
2900
+
2901
+ if len(query_properties_definitions) > 1:
2902
+ raise ValueError(
2903
+ f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
2904
+ )
2905
+
2906
+ if len(query_properties_definitions) == 1:
2907
+ query_properties = self.create_component(
2908
+ model_type=QueryPropertiesModel,
2909
+ component_definition=query_properties_definitions[0],
2910
+ config=config,
2911
+ )
2912
+
2913
+ # Removes QueryProperties components from the interpolated mappings because it will be resolved in
2914
+ # the provider from the slice directly instead of through jinja interpolation
2915
+ if isinstance(model.requester.request_parameters, Mapping):
2916
+ model.requester.request_parameters = self._remove_query_properties(
2917
+ model.requester.request_parameters
2918
+ )
2919
+
2920
+ requester = self._create_component_from_model(
2921
+ model=model.requester,
2922
+ decoder=decoder,
2923
+ name=name,
2924
+ query_properties_key=query_properties_key,
2925
+ use_cache=use_cache,
2926
+ config=config,
2927
+ )
2732
2928
  url_base = (
2733
2929
  model.requester.url_base
2734
2930
  if hasattr(model.requester, "url_base")
@@ -2834,9 +3030,21 @@ class ModelToComponentFactory:
2834
3030
  cursor=cursor,
2835
3031
  config=config,
2836
3032
  ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3033
+ additional_query_properties=query_properties,
2837
3034
  parameters=model.parameters or {},
2838
3035
  )
2839
3036
 
3037
+ @staticmethod
3038
+ def _remove_query_properties(
3039
+ request_parameters: Mapping[str, Union[Any, str]],
3040
+ ) -> Mapping[str, Union[Any, str]]:
3041
+ return {
3042
+ parameter_field: request_parameter
3043
+ for parameter_field, request_parameter in request_parameters.items()
3044
+ if not isinstance(request_parameter, Mapping)
3045
+ or not request_parameter.get("type") == "QueryProperties"
3046
+ }
3047
+
2840
3048
  def create_state_delegating_stream(
2841
3049
  self,
2842
3050
  model: StateDelegatingStreamModel,
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from airbyte_cdk.sources.declarative.requesters.query_properties.properties_from_endpoint import (
4
+ PropertiesFromEndpoint,
5
+ )
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
7
+ PropertyChunking,
8
+ )
9
+ from airbyte_cdk.sources.declarative.requesters.query_properties.query_properties import (
10
+ QueryProperties,
11
+ )
12
+
13
+ __all__ = ["PropertiesFromEndpoint", "PropertyChunking", "QueryProperties"]
@@ -0,0 +1,40 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, Iterable, List, Mapping, Optional
5
+
6
+ import dpath
7
+
8
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
9
+ from airbyte_cdk.sources.declarative.retrievers import Retriever
10
+ from airbyte_cdk.sources.types import Config, StreamSlice
11
+
12
+
13
+ @dataclass
14
+ class PropertiesFromEndpoint:
15
+ """
16
+ Component that defines the behavior around how to dynamically retrieve a set of request properties from an
17
+ API endpoint. The set retrieved can then be injected into the requests to extract records from an API source.
18
+ """
19
+
20
+ property_field_path: List[str]
21
+ retriever: Retriever
22
+ config: Config
23
+ parameters: InitVar[Mapping[str, Any]]
24
+
25
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
26
+ self._property_field_path = [
27
+ InterpolatedString(string=property_field, parameters=parameters)
28
+ for property_field in self.property_field_path
29
+ ]
30
+
31
+ def get_properties_from_endpoint(self, stream_slice: Optional[StreamSlice]) -> Iterable[str]:
32
+ response_properties = self.retriever.read_records(
33
+ records_schema={}, stream_slice=stream_slice
34
+ )
35
+ for property_obj in response_properties:
36
+ path = [
37
+ node.eval(self.config) if not isinstance(node, str) else node
38
+ for node in self._property_field_path
39
+ ]
40
+ yield dpath.get(property_obj, path, default=[]) # type: ignore # extracted will be a MutableMapping, given input data structure
@@ -0,0 +1,69 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from enum import Enum
5
+ from typing import Any, Iterable, List, Mapping, Optional
6
+
7
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import GroupByKey
8
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
9
+ RecordMergeStrategy,
10
+ )
11
+ from airbyte_cdk.sources.types import Config, Record
12
+
13
+
14
+ class PropertyLimitType(Enum):
15
+ """
16
+ The heuristic that determines when the maximum size of the current chunk of properties and when a new
17
+ one should be started.
18
+ """
19
+
20
+ characters = "characters"
21
+ property_count = "property_count"
22
+
23
+
24
+ @dataclass
25
+ class PropertyChunking:
26
+ """
27
+ Defines the behavior for how the complete list of properties to query for are broken down into smaller groups
28
+ that will be used for multiple requests to the target API.
29
+ """
30
+
31
+ property_limit_type: PropertyLimitType
32
+ property_limit: Optional[int]
33
+ record_merge_strategy: Optional[RecordMergeStrategy]
34
+ parameters: InitVar[Mapping[str, Any]]
35
+ config: Config
36
+
37
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
38
+ self._record_merge_strategy = self.record_merge_strategy or GroupByKey(
39
+ key="id", config=self.config, parameters=parameters
40
+ )
41
+
42
+ def get_request_property_chunks(
43
+ self, property_fields: Iterable[str], always_include_properties: Optional[List[str]]
44
+ ) -> Iterable[List[str]]:
45
+ if not self.property_limit:
46
+ single_property_chunk = list(property_fields)
47
+ if always_include_properties:
48
+ single_property_chunk.extend(always_include_properties)
49
+ yield single_property_chunk
50
+ return
51
+ current_chunk = list(always_include_properties) if always_include_properties else []
52
+ chunk_size = 0
53
+ for property_field in property_fields:
54
+ # If property_limit_type is not defined, we default to property_count which is just an incrementing count
55
+ property_field_size = (
56
+ len(property_field)
57
+ if self.property_limit_type == PropertyLimitType.characters
58
+ else 1
59
+ )
60
+ if chunk_size + property_field_size > self.property_limit:
61
+ yield current_chunk
62
+ current_chunk = list(always_include_properties) if always_include_properties else []
63
+ chunk_size = 0
64
+ current_chunk.append(property_field)
65
+ chunk_size += property_field_size
66
+ yield current_chunk
67
+
68
+ def get_merge_key(self, record: Record) -> Optional[str]:
69
+ return self._record_merge_strategy.get_group_key(record=record)
@@ -0,0 +1,58 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, Iterable, List, Mapping, Optional, Union
5
+
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties import (
7
+ PropertiesFromEndpoint,
8
+ PropertyChunking,
9
+ )
10
+ from airbyte_cdk.sources.types import Config, StreamSlice
11
+
12
+
13
+ @dataclass
14
+ class QueryProperties:
15
+ """
16
+ Low-code component that encompasses the behavior to inject additional property values into the outbound API
17
+ requests. Property values can be defined statically within the manifest or dynamically by making requests
18
+ to a partner API to retrieve the properties. Query properties also allow for splitting of the total set of
19
+ properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved
20
+ """
21
+
22
+ property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
23
+ always_include_properties: Optional[List[str]]
24
+ property_chunking: Optional[PropertyChunking]
25
+ config: Config
26
+ parameters: InitVar[Mapping[str, Any]]
27
+
28
+ def get_request_property_chunks(
29
+ self, stream_slice: Optional[StreamSlice] = None
30
+ ) -> Iterable[List[str]]:
31
+ """
32
+ Uses the defined property_list to fetch the total set of properties dynamically or from a static list
33
+ and based on the resulting properties, performs property chunking if applicable.
34
+ :param stream_slice: The StreamSlice of the current partition being processed during the sync. This is included
35
+ because subcomponents of QueryProperties can make use of interpolation of the top-level StreamSlice object
36
+ """
37
+ fields: Union[Iterable[str], List[str]]
38
+ if isinstance(self.property_list, PropertiesFromEndpoint):
39
+ fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice)
40
+ else:
41
+ fields = self.property_list if self.property_list else []
42
+
43
+ if self.property_chunking:
44
+ yield from self.property_chunking.get_request_property_chunks(
45
+ property_fields=fields, always_include_properties=self.always_include_properties
46
+ )
47
+ else:
48
+ yield list(fields)
49
+
50
+ # delete later, but leaving this to keep the discussion thread on the PR from getting hidden
51
+ def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool:
52
+ property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice))
53
+ try:
54
+ next(property_chunks)
55
+ next(property_chunks)
56
+ return True
57
+ except StopIteration:
58
+ return False
@@ -0,0 +1,10 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.group_by_key import (
4
+ GroupByKey,
5
+ )
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
7
+ RecordMergeStrategy,
8
+ )
9
+
10
+ __all__ = ["GroupByKey", "RecordMergeStrategy"]
@@ -0,0 +1,33 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, List, Mapping, Optional, Union
5
+
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
7
+ RecordMergeStrategy,
8
+ )
9
+ from airbyte_cdk.sources.types import Config, Record
10
+
11
+
12
+ @dataclass
13
+ class GroupByKey(RecordMergeStrategy):
14
+ """
15
+ Record merge strategy that combines records together according to values on the record for one or many keys.
16
+ """
17
+
18
+ key: Union[str, List[str]]
19
+ parameters: InitVar[Mapping[str, Any]]
20
+ config: Config
21
+
22
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
23
+ self._keys = [self.key] if isinstance(self.key, str) else self.key
24
+
25
+ def get_group_key(self, record: Record) -> Optional[str]:
26
+ resolved_keys = []
27
+ for key in self._keys:
28
+ key_value = record.data.get(key)
29
+ if key_value:
30
+ resolved_keys.append(key_value)
31
+ else:
32
+ return None
33
+ return ",".join(resolved_keys)
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ from airbyte_cdk.sources.types import Record
8
+
9
+
10
+ @dataclass
11
+ class RecordMergeStrategy(ABC):
12
+ """
13
+ Describe the interface for how records that required multiple requests to get the complete set of fields
14
+ should be merged back into a single record.
15
+ """
16
+
17
+ @abstractmethod
18
+ def get_group_key(self, record: Record) -> Optional[str]:
19
+ pass
@@ -1,9 +1,9 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from typing import Any, Mapping, MutableMapping, Optional, Union
6
+ from typing import Any, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
9
9
  from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
@@ -40,6 +40,7 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
40
40
  request_headers: Optional[RequestInput] = None
41
41
  request_body_data: Optional[RequestInput] = None
42
42
  request_body_json: Optional[NestedMapping] = None
43
+ query_properties_key: Optional[str] = None
43
44
 
44
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
45
46
  if self.request_parameters is None:
@@ -83,6 +84,28 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
83
84
  valid_value_types=ValidRequestTypes,
84
85
  )
85
86
  if isinstance(interpolated_value, dict):
87
+ if self.query_properties_key:
88
+ if not stream_slice:
89
+ raise ValueError(
90
+ "stream_slice should not be None if query properties in requests is enabled. Please contact Airbyte Support"
91
+ )
92
+ elif (
93
+ "query_properties" not in stream_slice.extra_fields
94
+ or stream_slice.extra_fields.get("query_properties") is None
95
+ ):
96
+ raise ValueError(
97
+ "QueryProperties component is defined but stream_partition does not contain query_properties. Please contact Airbyte Support"
98
+ )
99
+ elif not isinstance(stream_slice.extra_fields.get("query_properties"), List):
100
+ raise ValueError(
101
+ "QueryProperties component is defined but stream_slice.extra_fields.query_properties is not a List. Please contact Airbyte Support"
102
+ )
103
+ interpolated_value = {
104
+ **interpolated_value,
105
+ self.query_properties_key: ",".join(
106
+ stream_slice.extra_fields.get("query_properties") # type: ignore # Earlier type checks validate query_properties type
107
+ ),
108
+ }
86
109
  return interpolated_value
87
110
  return {}
88
111