airbyte-cdk 6.45.0.dev4107__py3-none-any.whl → 6.45.0.post20.dev14369762306__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +45 -6
  2. airbyte_cdk/connector_builder/main.py +5 -2
  3. airbyte_cdk/models/__init__.py +0 -1
  4. airbyte_cdk/models/airbyte_protocol.py +3 -1
  5. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  6. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
  7. airbyte_cdk/sources/declarative/async_job/job.py +6 -0
  8. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  9. airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
  10. airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
  11. airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
  12. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -8
  13. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +210 -50
  14. airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
  15. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
  16. airbyte_cdk/sources/declarative/interpolation/macros.py +10 -4
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +23 -2
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +142 -43
  19. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +16 -4
  20. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +263 -50
  21. airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
  22. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
  23. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
  24. airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
  25. airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
  26. airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
  27. airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
  28. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
  29. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
  30. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
  31. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
  33. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +1 -1
  34. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
  35. airbyte_cdk/sources/declarative/transformations/add_fields.py +3 -1
  36. airbyte_cdk/sources/file_based/file_based_stream_reader.py +15 -38
  37. airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
  38. airbyte_cdk/sources/file_based/schema_helpers.py +1 -9
  39. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
  40. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +31 -16
  41. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
  42. airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
  43. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
  44. airbyte_cdk/sources/types.py +2 -11
  45. airbyte_cdk/sources/utils/record_helper.py +8 -8
  46. airbyte_cdk/test/declarative/__init__.py +6 -0
  47. airbyte_cdk/test/declarative/models/__init__.py +7 -0
  48. airbyte_cdk/test/declarative/models/scenario.py +74 -0
  49. airbyte_cdk/test/declarative/test_suites/__init__.py +24 -0
  50. airbyte_cdk/test/declarative/test_suites/connector_base.py +197 -0
  51. airbyte_cdk/test/declarative/test_suites/declarative_sources.py +47 -0
  52. airbyte_cdk/test/declarative/test_suites/destination_base.py +12 -0
  53. airbyte_cdk/test/declarative/test_suites/source_base.py +129 -0
  54. airbyte_cdk/test/declarative/utils/__init__.py +0 -0
  55. airbyte_cdk/test/declarative/utils/job_runner.py +128 -0
  56. airbyte_cdk/test/entrypoint_wrapper.py +4 -0
  57. airbyte_cdk/test/fixtures/__init__.py +0 -0
  58. airbyte_cdk/test/fixtures/auto.py +14 -0
  59. airbyte_cdk/test/fixtures/general.py +15 -0
  60. airbyte_cdk/test/mock_http/response_builder.py +0 -8
  61. airbyte_cdk/test/pytest_config/plugin.py +40 -0
  62. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/METADATA +2 -2
  63. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/RECORD +67 -47
  64. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
  65. airbyte_cdk/sources/file_based/file_record_data.py +0 -22
  66. airbyte_cdk/sources/utils/files_directory.py +0 -15
  67. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE.txt +0 -0
  68. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE_SHORT +0 -0
  69. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/WHEEL +0 -0
  70. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class GroupingPartitionRouter(PartitionRouter):
14
+ """
15
+ A partition router that groups partitions from an underlying partition router into batches of a specified size.
16
+ This is useful for APIs that support filtering by multiple partition keys in a single request.
17
+
18
+ Attributes:
19
+ group_size (int): The number of partitions to include in each group.
20
+ underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
21
+ deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
22
+ config (Config): The connector configuration.
23
+ parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
24
+ """
25
+
26
+ group_size: int
27
+ underlying_partition_router: PartitionRouter
28
+ config: Config
29
+ deduplicate: bool = True
30
+
31
+ def __post_init__(self) -> None:
32
+ self._state: Optional[Mapping[str, StreamState]] = {}
33
+
34
+ def stream_slices(self) -> Iterable[StreamSlice]:
35
+ """
36
+ Lazily groups partitions from the underlying partition router into batches of size `group_size`.
37
+
38
+ This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
39
+ When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
40
+ If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
41
+
42
+ Yields:
43
+ Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
44
+ """
45
+ batch = []
46
+ seen_keys = set()
47
+
48
+ # Iterate over partitions lazily from the underlying router
49
+ for partition in self.underlying_partition_router.stream_slices():
50
+ # Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
51
+ partition_keys = list(partition.partition.keys())
52
+ # skip parent_slice as it is part of SubstreamPartitionRouter partition
53
+ if "parent_slice" in partition_keys:
54
+ partition_keys.remove("parent_slice")
55
+ if len(partition_keys) != 1:
56
+ raise ValueError(
57
+ f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
58
+ )
59
+ key = partition.partition[partition_keys[0]]
60
+
61
+ # Skip duplicates if deduplication is enabled
62
+ if self.deduplicate and key in seen_keys:
63
+ continue
64
+
65
+ # Add partition to the batch
66
+ batch.append(partition)
67
+ if self.deduplicate:
68
+ seen_keys.add(key)
69
+
70
+ # Yield the batch when it reaches the group_size
71
+ if len(batch) == self.group_size:
72
+ self._state = self.underlying_partition_router.get_stream_state()
73
+ yield self._create_grouped_slice(batch)
74
+ batch = [] # Reset the batch
75
+
76
+ self._state = self.underlying_partition_router.get_stream_state()
77
+ # Yield any remaining partitions if the batch isn't empty
78
+ if batch:
79
+ yield self._create_grouped_slice(batch)
80
+
81
+ def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
82
+ """
83
+ Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
84
+
85
+ Args:
86
+ batch (list[StreamSlice]): A list of StreamSlice objects to group.
87
+
88
+ Returns:
89
+ StreamSlice: A single StreamSlice with combined partition and extra field values.
90
+ """
91
+ # Combine partition values into a single dict with lists
92
+ grouped_partition = {
93
+ key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
94
+ }
95
+
96
+ # Aggregate extra fields into a dict with list values
97
+ extra_fields_dict = (
98
+ {
99
+ key: [p.extra_fields.get(key) for p in batch]
100
+ for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
101
+ }
102
+ if any(p.extra_fields for p in batch)
103
+ else {}
104
+ )
105
+ return StreamSlice(
106
+ partition=grouped_partition,
107
+ cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
108
+ extra_fields=extra_fields_dict,
109
+ )
110
+
111
+ def get_request_params(
112
+ self,
113
+ stream_state: Optional[StreamState] = None,
114
+ stream_slice: Optional[StreamSlice] = None,
115
+ next_page_token: Optional[Mapping[str, Any]] = None,
116
+ ) -> Mapping[str, Any]:
117
+ return {}
118
+
119
+ def get_request_headers(
120
+ self,
121
+ stream_state: Optional[StreamState] = None,
122
+ stream_slice: Optional[StreamSlice] = None,
123
+ next_page_token: Optional[Mapping[str, Any]] = None,
124
+ ) -> Mapping[str, Any]:
125
+ return {}
126
+
127
+ def get_request_body_data(
128
+ self,
129
+ stream_state: Optional[StreamState] = None,
130
+ stream_slice: Optional[StreamSlice] = None,
131
+ next_page_token: Optional[Mapping[str, Any]] = None,
132
+ ) -> Mapping[str, Any]:
133
+ return {}
134
+
135
+ def get_request_body_json(
136
+ self,
137
+ stream_state: Optional[StreamState] = None,
138
+ stream_slice: Optional[StreamSlice] = None,
139
+ next_page_token: Optional[Mapping[str, Any]] = None,
140
+ ) -> Mapping[str, Any]:
141
+ return {}
142
+
143
+ def set_initial_state(self, stream_state: StreamState) -> None:
144
+ """Delegate state initialization to the underlying partition router."""
145
+ self.underlying_partition_router.set_initial_state(stream_state)
146
+ self._state = self.underlying_partition_router.get_stream_state()
147
+
148
+ def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
149
+ """Delegate state retrieval to the underlying partition router."""
150
+ return self._state
@@ -374,7 +374,11 @@ class SubstreamPartitionRouter(PartitionRouter):
374
374
  # Ignore per-partition states or invalid formats.
375
375
  if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
376
376
  # If a global state is present under the key "state", use its first value.
377
- if "state" in stream_state and isinstance(stream_state["state"], dict):
377
+ if (
378
+ "state" in stream_state
379
+ and isinstance(stream_state["state"], dict)
380
+ and stream_state["state"] != {}
381
+ ):
378
382
  substream_state = list(stream_state["state"].values())[0]
379
383
  else:
380
384
  return {}
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from airbyte_cdk.sources.declarative.requesters.query_properties.properties_from_endpoint import (
4
+ PropertiesFromEndpoint,
5
+ )
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
7
+ PropertyChunking,
8
+ )
9
+ from airbyte_cdk.sources.declarative.requesters.query_properties.query_properties import (
10
+ QueryProperties,
11
+ )
12
+
13
+ __all__ = ["PropertiesFromEndpoint", "PropertyChunking", "QueryProperties"]
@@ -0,0 +1,40 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, Iterable, List, Mapping, Optional
5
+
6
+ import dpath
7
+
8
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
9
+ from airbyte_cdk.sources.declarative.retrievers import Retriever
10
+ from airbyte_cdk.sources.types import Config, StreamSlice
11
+
12
+
13
+ @dataclass
14
+ class PropertiesFromEndpoint:
15
+ """
16
+ Component that defines the behavior around how to dynamically retrieve a set of request properties from an
17
+ API endpoint. The set retrieved can then be injected into the requests to extract records from an API source.
18
+ """
19
+
20
+ property_field_path: List[str]
21
+ retriever: Retriever
22
+ config: Config
23
+ parameters: InitVar[Mapping[str, Any]]
24
+
25
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
26
+ self._property_field_path = [
27
+ InterpolatedString(string=property_field, parameters=parameters)
28
+ for property_field in self.property_field_path
29
+ ]
30
+
31
+ def get_properties_from_endpoint(self, stream_slice: Optional[StreamSlice]) -> Iterable[str]:
32
+ response_properties = self.retriever.read_records(
33
+ records_schema={}, stream_slice=stream_slice
34
+ )
35
+ for property_obj in response_properties:
36
+ path = [
37
+ node.eval(self.config) if not isinstance(node, str) else node
38
+ for node in self._property_field_path
39
+ ]
40
+ yield dpath.get(property_obj, path, default=[]) # type: ignore # extracted will be a MutableMapping, given input data structure
@@ -0,0 +1,69 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from enum import Enum
5
+ from typing import Any, Iterable, List, Mapping, Optional
6
+
7
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import GroupByKey
8
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
9
+ RecordMergeStrategy,
10
+ )
11
+ from airbyte_cdk.sources.types import Config, Record
12
+
13
+
14
+ class PropertyLimitType(Enum):
15
+ """
16
+ The heuristic that determines when the maximum size of the current chunk of properties and when a new
17
+ one should be started.
18
+ """
19
+
20
+ characters = "characters"
21
+ property_count = "property_count"
22
+
23
+
24
+ @dataclass
25
+ class PropertyChunking:
26
+ """
27
+ Defines the behavior for how the complete list of properties to query for are broken down into smaller groups
28
+ that will be used for multiple requests to the target API.
29
+ """
30
+
31
+ property_limit_type: PropertyLimitType
32
+ property_limit: Optional[int]
33
+ record_merge_strategy: Optional[RecordMergeStrategy]
34
+ parameters: InitVar[Mapping[str, Any]]
35
+ config: Config
36
+
37
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
38
+ self._record_merge_strategy = self.record_merge_strategy or GroupByKey(
39
+ key="id", config=self.config, parameters=parameters
40
+ )
41
+
42
+ def get_request_property_chunks(
43
+ self, property_fields: Iterable[str], always_include_properties: Optional[List[str]]
44
+ ) -> Iterable[List[str]]:
45
+ if not self.property_limit:
46
+ single_property_chunk = list(property_fields)
47
+ if always_include_properties:
48
+ single_property_chunk.extend(always_include_properties)
49
+ yield single_property_chunk
50
+ return
51
+ current_chunk = list(always_include_properties) if always_include_properties else []
52
+ chunk_size = 0
53
+ for property_field in property_fields:
54
+ # If property_limit_type is not defined, we default to property_count which is just an incrementing count
55
+ property_field_size = (
56
+ len(property_field)
57
+ if self.property_limit_type == PropertyLimitType.characters
58
+ else 1
59
+ )
60
+ if chunk_size + property_field_size > self.property_limit:
61
+ yield current_chunk
62
+ current_chunk = list(always_include_properties) if always_include_properties else []
63
+ chunk_size = 0
64
+ current_chunk.append(property_field)
65
+ chunk_size += property_field_size
66
+ yield current_chunk
67
+
68
+ def get_merge_key(self, record: Record) -> Optional[str]:
69
+ return self._record_merge_strategy.get_group_key(record=record)
@@ -0,0 +1,58 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, Iterable, List, Mapping, Optional, Union
5
+
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties import (
7
+ PropertiesFromEndpoint,
8
+ PropertyChunking,
9
+ )
10
+ from airbyte_cdk.sources.types import Config, StreamSlice
11
+
12
+
13
+ @dataclass
14
+ class QueryProperties:
15
+ """
16
+ Low-code component that encompasses the behavior to inject additional property values into the outbound API
17
+ requests. Property values can be defined statically within the manifest or dynamically by making requests
18
+ to a partner API to retrieve the properties. Query properties also allow for splitting of the total set of
19
+ properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved
20
+ """
21
+
22
+ property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
23
+ always_include_properties: Optional[List[str]]
24
+ property_chunking: Optional[PropertyChunking]
25
+ config: Config
26
+ parameters: InitVar[Mapping[str, Any]]
27
+
28
+ def get_request_property_chunks(
29
+ self, stream_slice: Optional[StreamSlice] = None
30
+ ) -> Iterable[List[str]]:
31
+ """
32
+ Uses the defined property_list to fetch the total set of properties dynamically or from a static list
33
+ and based on the resulting properties, performs property chunking if applicable.
34
+ :param stream_slice: The StreamSlice of the current partition being processed during the sync. This is included
35
+ because subcomponents of QueryProperties can make use of interpolation of the top-level StreamSlice object
36
+ """
37
+ fields: Union[Iterable[str], List[str]]
38
+ if isinstance(self.property_list, PropertiesFromEndpoint):
39
+ fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice)
40
+ else:
41
+ fields = self.property_list if self.property_list else []
42
+
43
+ if self.property_chunking:
44
+ yield from self.property_chunking.get_request_property_chunks(
45
+ property_fields=fields, always_include_properties=self.always_include_properties
46
+ )
47
+ else:
48
+ yield list(fields)
49
+
50
+ # delete later, but leaving this to keep the discussion thread on the PR from getting hidden
51
+ def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool:
52
+ property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice))
53
+ try:
54
+ next(property_chunks)
55
+ next(property_chunks)
56
+ return True
57
+ except StopIteration:
58
+ return False
@@ -0,0 +1,10 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.group_by_key import (
4
+ GroupByKey,
5
+ )
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
7
+ RecordMergeStrategy,
8
+ )
9
+
10
+ __all__ = ["GroupByKey", "RecordMergeStrategy"]
@@ -0,0 +1,33 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass
4
+ from typing import Any, List, Mapping, Optional, Union
5
+
6
+ from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
7
+ RecordMergeStrategy,
8
+ )
9
+ from airbyte_cdk.sources.types import Config, Record
10
+
11
+
12
+ @dataclass
13
+ class GroupByKey(RecordMergeStrategy):
14
+ """
15
+ Record merge strategy that combines records together according to values on the record for one or many keys.
16
+ """
17
+
18
+ key: Union[str, List[str]]
19
+ parameters: InitVar[Mapping[str, Any]]
20
+ config: Config
21
+
22
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
23
+ self._keys = [self.key] if isinstance(self.key, str) else self.key
24
+
25
+ def get_group_key(self, record: Record) -> Optional[str]:
26
+ resolved_keys = []
27
+ for key in self._keys:
28
+ key_value = record.data.get(key)
29
+ if key_value:
30
+ resolved_keys.append(key_value)
31
+ else:
32
+ return None
33
+ return ",".join(resolved_keys)
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ from airbyte_cdk.sources.types import Record
8
+
9
+
10
+ @dataclass
11
+ class RecordMergeStrategy(ABC):
12
+ """
13
+ Describe the interface for how records that required multiple requests to get the complete set of fields
14
+ should be merged back into a single record.
15
+ """
16
+
17
+ @abstractmethod
18
+ def get_group_key(self, record: Record) -> Optional[str]:
19
+ pass
@@ -1,9 +1,9 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from typing import Any, Mapping, MutableMapping, Optional, Union
6
+ from typing import Any, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
9
9
  from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
@@ -40,6 +40,7 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
40
40
  request_headers: Optional[RequestInput] = None
41
41
  request_body_data: Optional[RequestInput] = None
42
42
  request_body_json: Optional[NestedMapping] = None
43
+ query_properties_key: Optional[str] = None
43
44
 
44
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
45
46
  if self.request_parameters is None:
@@ -83,6 +84,28 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
83
84
  valid_value_types=ValidRequestTypes,
84
85
  )
85
86
  if isinstance(interpolated_value, dict):
87
+ if self.query_properties_key:
88
+ if not stream_slice:
89
+ raise ValueError(
90
+ "stream_slice should not be None if query properties in requests is enabled. Please contact Airbyte Support"
91
+ )
92
+ elif (
93
+ "query_properties" not in stream_slice.extra_fields
94
+ or stream_slice.extra_fields.get("query_properties") is None
95
+ ):
96
+ raise ValueError(
97
+ "QueryProperties component is defined but stream_partition does not contain query_properties. Please contact Airbyte Support"
98
+ )
99
+ elif not isinstance(stream_slice.extra_fields.get("query_properties"), List):
100
+ raise ValueError(
101
+ "QueryProperties component is defined but stream_slice.extra_fields.query_properties is not a List. Please contact Airbyte Support"
102
+ )
103
+ interpolated_value = {
104
+ **interpolated_value,
105
+ self.query_properties_key: ",".join(
106
+ stream_slice.extra_fields.get("query_properties") # type: ignore # Earlier type checks validate query_properties type
107
+ ),
108
+ }
86
109
  return interpolated_value
87
110
  return {}
88
111
 
@@ -1,8 +1,9 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  import json
6
+ from collections import defaultdict
6
7
  from dataclasses import InitVar, dataclass, field
7
8
  from functools import partial
8
9
  from itertools import islice
@@ -12,6 +13,7 @@ from typing import (
12
13
  Iterable,
13
14
  List,
14
15
  Mapping,
16
+ MutableMapping,
15
17
  Optional,
16
18
  Set,
17
19
  Tuple,
@@ -31,6 +33,7 @@ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router i
31
33
  )
32
34
  from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination
33
35
  from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
36
+ from airbyte_cdk.sources.declarative.requesters.query_properties import QueryProperties
34
37
  from airbyte_cdk.sources.declarative.requesters.request_options import (
35
38
  DefaultRequestOptionsProvider,
36
39
  RequestOptionsProvider,
@@ -88,6 +91,7 @@ class SimpleRetriever(Retriever):
88
91
  )
89
92
  cursor: Optional[DeclarativeCursor] = None
90
93
  ignore_stream_slicer_parameters_on_paginated_requests: bool = False
94
+ additional_query_properties: Optional[QueryProperties] = None
91
95
 
92
96
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
93
97
  self._paginator = self.paginator or NoPagination(parameters=parameters)
@@ -445,43 +449,110 @@ class SimpleRetriever(Retriever):
445
449
  :param stream_slice: The stream slice to read data for
446
450
  :return: The records read from the API source
447
451
  """
448
- _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
449
452
 
450
- most_recent_record_from_slice = None
451
- record_generator = partial(
452
- self._parse_records,
453
- stream_slice=stream_slice,
454
- stream_state=self.state or {},
455
- records_schema=records_schema,
453
+ property_chunks = (
454
+ list(
455
+ self.additional_query_properties.get_request_property_chunks(
456
+ stream_slice=stream_slice
457
+ )
458
+ )
459
+ if self.additional_query_properties
460
+ else []
456
461
  )
462
+ records_without_merge_key = []
463
+ merged_records: MutableMapping[str, Any] = defaultdict(dict)
457
464
 
458
- if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
459
- stream_state = self.state
460
-
461
- # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
462
- # fetch more records. The platform deletes stream state for full refresh streams before starting a
463
- # new job, so we don't need to worry about this value existing for the initial attempt
464
- if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
465
- return
465
+ _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
466
+ most_recent_record_from_slice = None
466
467
 
467
- yield from self._read_single_page(record_generator, stream_state, _slice)
468
- else:
469
- for stream_data in self._read_pages(record_generator, self.state, _slice):
470
- current_record = self._extract_record(stream_data, _slice)
471
- if self.cursor and current_record:
472
- self.cursor.observe(_slice, current_record)
473
-
474
- # Latest record read, not necessarily within slice boundaries.
475
- # TODO Remove once all custom components implement `observe` method.
476
- # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
477
- most_recent_record_from_slice = self._get_most_recent_record(
478
- most_recent_record_from_slice, current_record, _slice
468
+ if self.additional_query_properties:
469
+ for properties in property_chunks:
470
+ _slice = StreamSlice(
471
+ partition=_slice.partition or {},
472
+ cursor_slice=_slice.cursor_slice or {},
473
+ extra_fields={"query_properties": properties},
474
+ ) # None-check
475
+
476
+ record_generator = partial(
477
+ self._parse_records,
478
+ stream_slice=_slice,
479
+ stream_state=self.state or {},
480
+ records_schema=records_schema,
479
481
  )
480
- yield stream_data
481
482
 
483
+ for stream_data in self._read_pages(record_generator, self.state, _slice):
484
+ current_record = self._extract_record(stream_data, _slice)
485
+ if self.cursor and current_record:
486
+ self.cursor.observe(_slice, current_record)
487
+
488
+ # Latest record read, not necessarily within slice boundaries.
489
+ # TODO Remove once all custom components implement `observe` method.
490
+ # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
491
+ most_recent_record_from_slice = self._get_most_recent_record(
492
+ most_recent_record_from_slice, current_record, _slice
493
+ )
494
+
495
+ if current_record and self.additional_query_properties.property_chunking:
496
+ merge_key = (
497
+ self.additional_query_properties.property_chunking.get_merge_key(
498
+ current_record
499
+ )
500
+ )
501
+ if merge_key:
502
+ merged_records[merge_key].update(current_record)
503
+ else:
504
+ # We should still emit records even if the record did not have a merge key
505
+ records_without_merge_key.append(current_record)
506
+ else:
507
+ yield stream_data
482
508
  if self.cursor:
483
509
  self.cursor.close_slice(_slice, most_recent_record_from_slice)
484
- return
510
+
511
+ if len(merged_records) > 0:
512
+ yield from [
513
+ Record(data=merged_record, stream_name=self.name, associated_slice=stream_slice)
514
+ for merged_record in merged_records.values()
515
+ ]
516
+ if len(records_without_merge_key) > 0:
517
+ yield from records_without_merge_key
518
+ else:
519
+ _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
520
+
521
+ most_recent_record_from_slice = None
522
+ record_generator = partial(
523
+ self._parse_records,
524
+ stream_slice=stream_slice,
525
+ stream_state=self.state or {},
526
+ records_schema=records_schema,
527
+ )
528
+
529
+ if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
530
+ stream_state = self.state
531
+
532
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
533
+ # fetch more records. The platform deletes stream state for full refresh streams before starting a
534
+ # new job, so we don't need to worry about this value existing for the initial attempt
535
+ if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
536
+ return
537
+
538
+ yield from self._read_single_page(record_generator, stream_state, _slice)
539
+ else:
540
+ for stream_data in self._read_pages(record_generator, self.state, _slice):
541
+ current_record = self._extract_record(stream_data, _slice)
542
+ if self.cursor and current_record:
543
+ self.cursor.observe(_slice, current_record)
544
+
545
+ # Latest record read, not necessarily within slice boundaries.
546
+ # TODO Remove once all custom components implement `observe` method.
547
+ # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
548
+ most_recent_record_from_slice = self._get_most_recent_record(
549
+ most_recent_record_from_slice, current_record, _slice
550
+ )
551
+ yield stream_data
552
+
553
+ if self.cursor:
554
+ self.cursor.close_slice(_slice, most_recent_record_from_slice)
555
+ return
485
556
 
486
557
  def _get_most_recent_record(
487
558
  self,
@@ -37,7 +37,7 @@ class DefaultSchemaLoader(SchemaLoader):
37
37
 
38
38
  try:
39
39
  return self.default_loader.get_json_schema()
40
- except OSError:
40
+ except (OSError, ValueError):
41
41
  # A slight hack since we don't directly have the stream name. However, when building the default filepath we assume the
42
42
  # runtime options stores stream name 'name' so we'll do the same here
43
43
  stream_name = self._parameters.get("name", "")
@@ -58,16 +58,11 @@ class DeclarativePartition(Partition):
58
58
  def read(self) -> Iterable[Record]:
59
59
  for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
60
60
  if isinstance(stream_data, Mapping):
61
- record = (
62
- stream_data
63
- if isinstance(stream_data, Record)
64
- else Record(
65
- data=stream_data,
66
- stream_name=self.stream_name(),
67
- associated_slice=self._stream_slice,
68
- )
61
+ yield Record(
62
+ data=stream_data,
63
+ stream_name=self.stream_name(),
64
+ associated_slice=self._stream_slice,
69
65
  )
70
- yield record
71
66
  else:
72
67
  self._message_repository.emit_message(stream_data)
73
68