airbyte-cdk 6.26.0.dev4106__py3-none-any.whl → 6.26.0.dev4108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +22 -13
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +71 -34
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +33 -4
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +93 -27
  13. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  14. airbyte_cdk/sources/declarative/manifest_declarative_source.py +5 -3
  15. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +22 -5
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +138 -38
  17. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  18. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  19. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +49 -25
  20. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  21. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  22. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  23. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  24. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  25. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  26. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -1
  27. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  28. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  29. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +18 -11
  30. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +51 -0
  31. airbyte_cdk/sources/file_based/file_based_source.py +16 -55
  32. airbyte_cdk/sources/file_based/file_based_stream_reader.py +19 -31
  33. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -7
  34. airbyte_cdk/sources/file_based/stream/identities_stream.py +5 -2
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/http.py +1 -2
  38. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  39. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +166 -83
  40. airbyte_cdk/sources/types.py +4 -2
  41. airbyte_cdk/sources/utils/transform.py +23 -2
  42. airbyte_cdk/utils/datetime_helpers.py +499 -0
  43. airbyte_cdk/utils/mapping_helpers.py +86 -27
  44. airbyte_cdk/utils/slice_hasher.py +8 -1
  45. airbyte_cdk-6.26.0.dev4108.dist-info/LICENSE_SHORT +1 -0
  46. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/METADATA +5 -5
  47. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/RECORD +50 -48
  48. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/WHEEL +1 -1
  49. airbyte_cdk/sources/file_based/config/permissions.py +0 -34
  50. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/entry_points.txt +0 -0
@@ -4,9 +4,9 @@ from dataclasses import InitVar, dataclass, field
4
4
  from typing import Any, Callable, Iterable, Mapping, Optional
5
5
 
6
6
  from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
7
8
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
8
9
  AsyncJobOrchestrator,
9
- AsyncPartition,
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
12
  SinglePartitionRouter,
@@ -42,12 +42,12 @@ class AsyncJobPartitionRouter(StreamSlicer):
42
42
 
43
43
  for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
44
  yield StreamSlice(
45
- partition=dict(completed_partition.stream_slice.partition)
46
- | {"partition": completed_partition},
45
+ partition=dict(completed_partition.stream_slice.partition),
47
46
  cursor_slice=completed_partition.stream_slice.cursor_slice,
47
+ extra_fields={"jobs": list(completed_partition.jobs)},
48
48
  )
49
49
 
50
- def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
50
+ def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
51
51
  """
52
52
  This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
53
  be responsible for. However, this was added in because the JobOrchestrator is required to
@@ -62,4 +62,4 @@ class AsyncJobPartitionRouter(StreamSlicer):
62
62
  failure_type=FailureType.system_error,
63
63
  )
64
64
 
65
- return self._job_orchestrator.fetch_records(partition=partition)
65
+ return self._job_orchestrator.fetch_records(async_jobs=async_jobs)
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass
6
- from typing import Any, Iterable, List, Mapping, Optional, Union
6
+ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
9
9
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
@@ -100,7 +100,9 @@ class ListPartitionRouter(PartitionRouter):
100
100
  ):
101
101
  slice_value = stream_slice.get(self._cursor_field.eval(self.config))
102
102
  if slice_value:
103
- return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString
103
+ options: MutableMapping[str, Any] = {}
104
+ self.request_option.inject_into_request(options, slice_value, self.config)
105
+ return options
104
106
  else:
105
107
  return {}
106
108
  else:
@@ -4,7 +4,7 @@
4
4
  import copy
5
5
  import logging
6
6
  from dataclasses import InitVar, dataclass
7
- from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union
7
+ from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
8
8
 
9
9
  import dpath
10
10
 
@@ -118,7 +118,7 @@ class SubstreamPartitionRouter(PartitionRouter):
118
118
  def _get_request_option(
119
119
  self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
120
120
  ) -> Mapping[str, Any]:
121
- params = {}
121
+ params: MutableMapping[str, Any] = {}
122
122
  if stream_slice:
123
123
  for parent_config in self.parent_stream_configs:
124
124
  if (
@@ -128,13 +128,7 @@ class SubstreamPartitionRouter(PartitionRouter):
128
128
  key = parent_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string
129
129
  value = stream_slice.get(key)
130
130
  if value:
131
- params.update(
132
- {
133
- parent_config.request_option.field_name.eval( # type: ignore [union-attr]
134
- config=self.config
135
- ): value
136
- }
137
- )
131
+ parent_config.request_option.inject_into_request(params, value, self.config)
138
132
  return params
139
133
 
140
134
  def stream_slices(self) -> Iterable[StreamSlice]:
@@ -295,28 +289,58 @@ class SubstreamPartitionRouter(PartitionRouter):
295
289
  return
296
290
 
297
291
  if not parent_state and incremental_dependency:
298
- # Attempt to retrieve child state
299
- substream_state_values = list(stream_state.values())
300
- substream_state = substream_state_values[0] if substream_state_values else {}
301
- # Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
302
- if isinstance(substream_state, (list, dict)):
303
- substream_state = {}
304
-
305
- parent_state = {}
306
-
307
- # Copy child state to parent streams with incremental dependencies
308
- if substream_state:
309
- for parent_config in self.parent_stream_configs:
310
- if parent_config.incremental_dependency:
311
- parent_state[parent_config.stream.name] = {
312
- parent_config.stream.cursor_field: substream_state
313
- }
292
+ # Migrate child state to parent state format
293
+ parent_state = self._migrate_child_state_to_parent_state(stream_state)
314
294
 
315
295
  # Set state for each parent stream with an incremental dependency
316
296
  for parent_config in self.parent_stream_configs:
317
297
  if parent_config.incremental_dependency:
318
298
  parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
319
299
 
300
+ def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
301
+ """
302
+ Migrate the child stream state to the parent stream's state format.
303
+
304
+ This method converts the global or child state into a format compatible with parent
305
+ streams. The migration occurs only for parent streams with incremental dependencies.
306
+ The method filters out per-partition states and retains only the global state in the
307
+ format `{cursor_field: cursor_value}`.
308
+
309
+ Args:
310
+ stream_state (StreamState): The state to migrate. Expected formats include:
311
+ - {"updated_at": "2023-05-27T00:00:00Z"}
312
+ - {"states": [...] } (ignored during migration)
313
+
314
+ Returns:
315
+ StreamState: A migrated state for parent streams in the format:
316
+ {
317
+ "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
318
+ }
319
+
320
+ Example:
321
+ Input: {"updated_at": "2023-05-27T00:00:00Z"}
322
+ Output: {
323
+ "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
324
+ }
325
+ """
326
+ substream_state_values = list(stream_state.values())
327
+ substream_state = substream_state_values[0] if substream_state_values else {}
328
+
329
+ # Ignore per-partition states or invalid formats
330
+ if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
331
+ return {}
332
+
333
+ # Copy child state to parent streams with incremental dependencies
334
+ parent_state = {}
335
+ if substream_state:
336
+ for parent_config in self.parent_stream_configs:
337
+ if parent_config.incremental_dependency:
338
+ parent_state[parent_config.stream.name] = {
339
+ parent_config.stream.cursor_field: substream_state
340
+ }
341
+
342
+ return parent_state
343
+
320
344
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
321
345
  """
322
346
  Get the state of the parent streams.
@@ -151,16 +151,16 @@ class HttpResponseFilter:
151
151
  :param response: The HTTP response which can be used during interpolation
152
152
  :return: The evaluated error message string to be emitted
153
153
  """
154
- return self.error_message.eval( # type: ignore [no-any-return, union-attr]
154
+ return self.error_message.eval( # type: ignore[no-any-return, union-attr]
155
155
  self.config, response=self._safe_response_json(response), headers=response.headers
156
156
  )
157
157
 
158
158
  def _response_matches_predicate(self, response: requests.Response) -> bool:
159
159
  return (
160
160
  bool(
161
- self.predicate.condition # type: ignore [union-attr]
162
- and self.predicate.eval( # type: ignore [union-attr]
163
- None, # type: ignore [arg-type]
161
+ self.predicate.condition # type:ignore[union-attr]
162
+ and self.predicate.eval( # type:ignore[union-attr]
163
+ None, # type: ignore[arg-type]
164
164
  response=self._safe_response_json(response),
165
165
  headers=response.headers,
166
166
  )
@@ -199,6 +199,9 @@ class HttpRequester(Requester):
199
199
  Raise a ValueError if there's a key collision
200
200
  Returned merged mapping otherwise
201
201
  """
202
+
203
+ is_body_json = requester_method.__name__ == "get_request_body_json"
204
+
202
205
  return combine_mappings(
203
206
  [
204
207
  requester_method(
@@ -208,7 +211,8 @@ class HttpRequester(Requester):
208
211
  ),
209
212
  auth_options_method(),
210
213
  extra_options,
211
- ]
214
+ ],
215
+ allow_same_value_merge=is_body_json,
212
216
  )
213
217
 
214
218
  def _request_headers(
@@ -187,7 +187,7 @@ class DefaultPaginator(Paginator):
187
187
  def _get_request_options(
188
188
  self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
189
189
  ) -> MutableMapping[str, Any]:
190
- options = {}
190
+ options: MutableMapping[str, Any] = {}
191
191
 
192
192
  token = next_page_token.get("next_page_token") if next_page_token else None
193
193
  if (
@@ -196,15 +196,16 @@ class DefaultPaginator(Paginator):
196
196
  and isinstance(self.page_token_option, RequestOption)
197
197
  and self.page_token_option.inject_into == option_type
198
198
  ):
199
- options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
199
+ self.page_token_option.inject_into_request(options, token, self.config)
200
+
200
201
  if (
201
202
  self.page_size_option
202
203
  and self.pagination_strategy.get_page_size()
203
204
  and self.page_size_option.inject_into == option_type
204
205
  ):
205
- options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr]
206
- self.pagination_strategy.get_page_size()
207
- ) # type: ignore # field_name is always cast to an interpolated string
206
+ page_size = self.pagination_strategy.get_page_size()
207
+ self.page_size_option.inject_into_request(options, page_size, self.config)
208
+
208
209
  return options
209
210
 
210
211
 
@@ -4,9 +4,10 @@
4
4
 
5
5
  from dataclasses import InitVar, dataclass
6
6
  from enum import Enum
7
- from typing import Any, Mapping, Union
7
+ from typing import Any, List, Literal, Mapping, MutableMapping, Optional, Union
8
8
 
9
9
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
+ from airbyte_cdk.sources.types import Config
10
11
 
11
12
 
12
13
  class RequestOptionType(Enum):
@@ -26,13 +27,91 @@ class RequestOption:
26
27
  Describes an option to set on a request
27
28
 
28
29
  Attributes:
29
- field_name (str): Describes the name of the parameter to inject
30
+ field_name (str): Describes the name of the parameter to inject. Mutually exclusive with field_path.
31
+ field_path (list(str)): Describes the path to a nested field as a list of field names.
32
+ Only valid for body_json injection type, and mutually exclusive with field_name.
30
33
  inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter
31
34
  """
32
35
 
33
- field_name: Union[InterpolatedString, str]
34
36
  inject_into: RequestOptionType
35
37
  parameters: InitVar[Mapping[str, Any]]
38
+ field_name: Optional[Union[InterpolatedString, str]] = None
39
+ field_path: Optional[List[Union[InterpolatedString, str]]] = None
36
40
 
37
41
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
38
- self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
42
+ # Validate inputs. We should expect either field_name or field_path, but not both
43
+ if self.field_name is None and self.field_path is None:
44
+ raise ValueError("RequestOption requires either a field_name or field_path")
45
+
46
+ if self.field_name is not None and self.field_path is not None:
47
+ raise ValueError(
48
+ "Only one of field_name or field_path can be provided to RequestOption"
49
+ )
50
+
51
+ # Nested field injection is only supported for body JSON injection
52
+ if self.field_path is not None and self.inject_into != RequestOptionType.body_json:
53
+ raise ValueError(
54
+ "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
55
+ )
56
+
57
+ # Convert field_name and field_path into InterpolatedString objects if they are strings
58
+ if self.field_name is not None:
59
+ self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
60
+ elif self.field_path is not None:
61
+ self.field_path = [
62
+ InterpolatedString.create(segment, parameters=parameters)
63
+ for segment in self.field_path
64
+ ]
65
+
66
+ @property
67
+ def _is_field_path(self) -> bool:
68
+ """Returns whether this option is a field path (ie, a nested field)"""
69
+ return self.field_path is not None
70
+
71
+ def inject_into_request(
72
+ self,
73
+ target: MutableMapping[str, Any],
74
+ value: Any,
75
+ config: Config,
76
+ ) -> None:
77
+ """
78
+ Inject a request option value into a target request structure using either field_name or field_path.
79
+ For non-body-json injection, only top-level field names are supported.
80
+ For body-json injection, both field names and nested field paths are supported.
81
+
82
+ Args:
83
+ target: The request structure to inject the value into
84
+ value: The value to inject
85
+ config: The config object to use for interpolation
86
+ """
87
+ if self._is_field_path:
88
+ if self.inject_into != RequestOptionType.body_json:
89
+ raise ValueError(
90
+ "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
91
+ )
92
+
93
+ assert self.field_path is not None # for type checker
94
+ current = target
95
+ # Convert path segments into strings, evaluating any interpolated segments
96
+ # Example: ["data", "{{ config[user_type] }}", "id"] -> ["data", "admin", "id"]
97
+ *path_parts, final_key = [
98
+ str(
99
+ segment.eval(config=config)
100
+ if isinstance(segment, InterpolatedString)
101
+ else segment
102
+ )
103
+ for segment in self.field_path
104
+ ]
105
+
106
+ # Build a nested dictionary structure and set the final value at the deepest level
107
+ for part in path_parts:
108
+ current = current.setdefault(part, {})
109
+ current[final_key] = value
110
+ else:
111
+ # For non-nested fields, evaluate the field name if it's an interpolated string
112
+ key = (
113
+ self.field_name.eval(config=config)
114
+ if isinstance(self.field_name, InterpolatedString)
115
+ else self.field_name
116
+ )
117
+ target[str(key)] = value
@@ -80,12 +80,13 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
80
80
  options: MutableMapping[str, Any] = {}
81
81
  if not stream_slice:
82
82
  return options
83
+
83
84
  if self.start_time_option and self.start_time_option.inject_into == option_type:
84
- options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
85
- self._partition_field_start.eval(self.config)
86
- )
85
+ start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
86
+ self.start_time_option.inject_into_request(options, start_time_value, self.config)
87
+
87
88
  if self.end_time_option and self.end_time_option.inject_into == option_type:
88
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
89
- self._partition_field_end.eval(self.config)
90
- )
89
+ end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
90
+ self.end_time_option.inject_into_request(options, end_time_value, self.config)
91
+
91
92
  return options
@@ -6,7 +6,7 @@ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
9
- from airbyte_cdk.models import FailureType
9
+ from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
10
10
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
11
11
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
12
12
  from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
@@ -16,7 +16,6 @@ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
16
16
  from airbyte_cdk.sources.source import ExperimentalClassWarning
17
17
  from airbyte_cdk.sources.streams.core import StreamData
18
18
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
19
- from airbyte_cdk.utils.traced_exception import AirbyteTracedException
20
19
 
21
20
 
22
21
  @deprecated(
@@ -57,9 +56,9 @@ class AsyncRetriever(Retriever):
57
56
 
58
57
  return self.state
59
58
 
60
- def _validate_and_get_stream_slice_partition(
59
+ def _validate_and_get_stream_slice_jobs(
61
60
  self, stream_slice: Optional[StreamSlice] = None
62
- ) -> AsyncPartition:
61
+ ) -> Iterable[AsyncJob]:
63
62
  """
64
63
  Validates the stream_slice argument and returns the partition from it.
65
64
 
@@ -73,12 +72,7 @@ class AsyncRetriever(Retriever):
73
72
  AirbyteTracedException: If the stream_slice is not an instance of StreamSlice or if the partition is not present in the stream_slice.
74
73
 
75
74
  """
76
- if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
77
- raise AirbyteTracedException(
78
- message="Invalid arguments to AsyncJobRetriever.read_records: stream_slice is no optional. Please contact Airbyte Support",
79
- failure_type=FailureType.system_error,
80
- )
81
- return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
75
+ return stream_slice.extra_fields.get("jobs", []) if stream_slice else []
82
76
 
83
77
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
84
78
  return self.stream_slicer.stream_slices()
@@ -89,8 +83,8 @@ class AsyncRetriever(Retriever):
89
83
  stream_slice: Optional[StreamSlice] = None,
90
84
  ) -> Iterable[StreamData]:
91
85
  stream_state: StreamState = self._get_stream_state()
92
- partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
93
- records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
86
+ jobs: Iterable[AsyncJob] = self._validate_and_get_stream_slice_jobs(stream_slice)
87
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(jobs)
94
88
 
95
89
  yield from self.record_selector.filter_and_transform(
96
90
  all_data=records,
@@ -128,6 +128,9 @@ class SimpleRetriever(Retriever):
128
128
  Returned merged mapping otherwise
129
129
  """
130
130
  # FIXME we should eventually remove the usage of stream_state as part of the interpolation
131
+
132
+ is_body_json = paginator_method.__name__ == "get_request_body_json"
133
+
131
134
  mappings = [
132
135
  paginator_method(
133
136
  stream_state=stream_state,
@@ -143,7 +146,7 @@ class SimpleRetriever(Retriever):
143
146
  next_page_token=next_page_token,
144
147
  )
145
148
  )
146
- return combine_mappings(mappings)
149
+ return combine_mappings(mappings, allow_same_value_merge=is_body_json)
147
150
 
148
151
  def _request_headers(
149
152
  self,
@@ -4,6 +4,7 @@
4
4
 
5
5
  from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
6
6
  from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
7
+ ComplexFieldType,
7
8
  DynamicSchemaLoader,
8
9
  SchemaTypeIdentifier,
9
10
  TypesMap,
@@ -18,6 +19,7 @@ __all__ = [
18
19
  "SchemaLoader",
19
20
  "InlineSchemaLoader",
20
21
  "DynamicSchemaLoader",
22
+ "ComplexFieldType",
21
23
  "TypesMap",
22
24
  "SchemaTypeIdentifier",
23
25
  ]
@@ -18,7 +18,7 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
18
18
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
19
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
20
20
 
21
- AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
21
+ AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
22
22
  "string": {"type": ["null", "string"]},
23
23
  "boolean": {"type": ["null", "boolean"]},
24
24
  "date": {"type": ["null", "string"], "format": "date"},
@@ -45,6 +45,25 @@ AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
45
45
  }
46
46
 
47
47
 
48
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
49
+ @dataclass(frozen=True)
50
+ class ComplexFieldType:
51
+ """
52
+ Identifies complex field type
53
+ """
54
+
55
+ field_type: str
56
+ items: Optional[Union[str, "ComplexFieldType"]] = None
57
+
58
+ def __post_init__(self) -> None:
59
+ """
60
+ Enforces that `items` is only used when `field_type` is a array
61
+ """
62
+ # `items_type` is valid only for array target types
63
+ if self.items and self.field_type != "array":
64
+ raise ValueError("'items' can only be used when 'field_type' is an array.")
65
+
66
+
48
67
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
49
68
  @dataclass(frozen=True)
50
69
  class TypesMap:
@@ -52,7 +71,7 @@ class TypesMap:
52
71
  Represents a mapping between a current type and its corresponding target type.
53
72
  """
54
73
 
55
- target_type: Union[List[str], str]
74
+ target_type: Union[List[str], str, ComplexFieldType]
56
75
  current_type: Union[List[str], str]
57
76
  condition: Optional[str]
58
77
 
@@ -135,8 +154,9 @@ class DynamicSchemaLoader(SchemaLoader):
135
154
  transformed_properties = self._transform(properties, {})
136
155
 
137
156
  return {
138
- "$schema": "http://json-schema.org/draft-07/schema#",
157
+ "$schema": "https://json-schema.org/draft-07/schema#",
139
158
  "type": "object",
159
+ "additionalProperties": True,
140
160
  "properties": transformed_properties,
141
161
  }
142
162
 
@@ -188,18 +208,37 @@ class DynamicSchemaLoader(SchemaLoader):
188
208
  first_type = self._get_airbyte_type(mapped_field_type[0])
189
209
  second_type = self._get_airbyte_type(mapped_field_type[1])
190
210
  return {"oneOf": [first_type, second_type]}
211
+
191
212
  elif isinstance(mapped_field_type, str):
192
213
  return self._get_airbyte_type(mapped_field_type)
214
+
215
+ elif isinstance(mapped_field_type, ComplexFieldType):
216
+ return self._resolve_complex_type(mapped_field_type)
217
+
193
218
  else:
194
219
  raise ValueError(
195
220
  f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
196
221
  )
197
222
 
223
+ def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
224
+ if not complex_type.items:
225
+ return self._get_airbyte_type(complex_type.field_type)
226
+
227
+ field_type = self._get_airbyte_type(complex_type.field_type)
228
+
229
+ field_type["items"] = (
230
+ self._get_airbyte_type(complex_type.items)
231
+ if isinstance(complex_type.items, str)
232
+ else self._resolve_complex_type(complex_type.items)
233
+ )
234
+
235
+ return field_type
236
+
198
237
  def _replace_type_if_not_valid(
199
238
  self,
200
239
  field_type: Union[List[str], str],
201
240
  raw_schema: MutableMapping[str, Any],
202
- ) -> Union[List[str], str]:
241
+ ) -> Union[List[str], str, ComplexFieldType]:
203
242
  """
204
243
  Replaces a field type if it matches a type mapping in `types_map`.
205
244
  """
@@ -216,7 +255,7 @@ class DynamicSchemaLoader(SchemaLoader):
216
255
  return field_type
217
256
 
218
257
  @staticmethod
219
- def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
258
+ def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
220
259
  """
221
260
  Maps a field type to its corresponding Airbyte type definition.
222
261
  """
@@ -14,6 +14,23 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileB
14
14
  from airbyte_cdk.sources.utils import schema_helpers
15
15
 
16
16
 
17
+ class DeliverPermissions(BaseModel):
18
+ class Config(OneOfOptionConfig):
19
+ title = "Replicate Permissions ACL"
20
+ description = "Sends one identity stream and one for more permissions (ACL) streams to the destination. This data can be used in downstream systems to recreate permission restrictions mirroring the original source."
21
+ discriminator = "delivery_type"
22
+
23
+ delivery_type: Literal["use_permissions_transfer"] = Field(
24
+ "use_permissions_transfer", const=True
25
+ )
26
+
27
+ include_identities_stream: bool = Field(
28
+ title="Include Identity Stream",
29
+ description="This data can be used in downstream systems to recreate permission restrictions mirroring the original source",
30
+ default=True,
31
+ )
32
+
33
+
17
34
  class DeliverRecords(BaseModel):
18
35
  class Config(OneOfOptionConfig):
19
36
  title = "Replicate Records"
@@ -22,16 +39,6 @@ class DeliverRecords(BaseModel):
22
39
 
23
40
  delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
24
41
 
25
- sync_acl_permissions: bool = Field(
26
- title="Include ACL Permissions",
27
- description="Joins Document allowlists to each stream.",
28
- default=False,
29
- airbyte_hidden=True,
30
- )
31
- domain: Optional[str] = Field(
32
- title="Domain", description="The domain of the identities.", airbyte_hidden=True
33
- )
34
-
35
42
 
36
43
  class DeliverRawFiles(BaseModel):
37
44
  class Config(OneOfOptionConfig):
@@ -75,7 +82,7 @@ class AbstractFileBasedSpec(BaseModel):
75
82
  order=10,
76
83
  )
77
84
 
78
- delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
85
+ delivery_method: Union[DeliverRecords, DeliverRawFiles, DeliverPermissions] = Field(
79
86
  title="Delivery Method",
80
87
  discriminator="delivery_type",
81
88
  type="object",
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
6
+
7
+
8
+ def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
9
+ return (
10
+ hasattr(parsed_config.delivery_method, "delivery_type")
11
+ and parsed_config.delivery_method.delivery_type == "use_file_transfer"
12
+ )
13
+
14
+
15
+ def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
16
+ """
17
+ Determines whether to preserve directory structure during file transfer.
18
+
19
+ When enabled, files maintain their subdirectory paths in the destination.
20
+ When disabled, files are flattened to the root of the destination.
21
+
22
+ Args:
23
+ parsed_config: The parsed configuration containing delivery method settings
24
+
25
+ Returns:
26
+ True if directory structure should be preserved (default), False otherwise
27
+ """
28
+ if (
29
+ use_file_transfer(parsed_config)
30
+ and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
31
+ and parsed_config.delivery_method.preserve_directory_structure is not None
32
+ ):
33
+ return parsed_config.delivery_method.preserve_directory_structure
34
+ return True
35
+
36
+
37
+ def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
38
+ return (
39
+ hasattr(parsed_config.delivery_method, "delivery_type")
40
+ and parsed_config.delivery_method.delivery_type == "use_permissions_transfer"
41
+ )
42
+
43
+
44
+ def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
45
+ if (
46
+ use_permissions_transfer(parsed_config)
47
+ and hasattr(parsed_config.delivery_method, "include_identities_stream")
48
+ and parsed_config.delivery_method.include_identities_stream is not None
49
+ ):
50
+ return parsed_config.delivery_method.include_identities_stream
51
+ return False