airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/utils/datetime_helpers.py +66 -48
  37. airbyte_cdk/utils/mapping_helpers.py +26 -126
  38. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  39. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
  40. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  41. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  42. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  43. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  44. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  45. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  46. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  47. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  48. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  49. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  50. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -4,7 +4,7 @@
4
4
  import copy
5
5
  import logging
6
6
  from dataclasses import InitVar, dataclass
7
- from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
7
+ from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union
8
8
 
9
9
  import dpath
10
10
 
@@ -118,7 +118,7 @@ class SubstreamPartitionRouter(PartitionRouter):
118
118
  def _get_request_option(
119
119
  self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
120
120
  ) -> Mapping[str, Any]:
121
- params: MutableMapping[str, Any] = {}
121
+ params = {}
122
122
  if stream_slice:
123
123
  for parent_config in self.parent_stream_configs:
124
124
  if (
@@ -128,7 +128,13 @@ class SubstreamPartitionRouter(PartitionRouter):
128
128
  key = parent_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string
129
129
  value = stream_slice.get(key)
130
130
  if value:
131
- parent_config.request_option.inject_into_request(params, value, self.config)
131
+ params.update(
132
+ {
133
+ parent_config.request_option.field_name.eval( # type: ignore [union-attr]
134
+ config=self.config
135
+ ): value
136
+ }
137
+ )
132
138
  return params
133
139
 
134
140
  def stream_slices(self) -> Iterable[StreamSlice]:
@@ -299,33 +305,23 @@ class SubstreamPartitionRouter(PartitionRouter):
299
305
 
300
306
  def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
301
307
  """
302
- Migrate the child or global stream state into the parent stream's state format.
303
-
304
- This method converts the child stream state—or, if present, the global state—into a format that is
305
- compatible with parent streams that use incremental synchronization. The migration occurs only for
306
- parent streams with incremental dependencies. It filters out per-partition states and retains only the
307
- global state in the form {cursor_field: cursor_value}.
308
+ Migrate the child stream state to the parent stream's state format.
308
309
 
309
- The method supports multiple input formats:
310
- - A simple global state, e.g.:
311
- {"updated_at": "2023-05-27T00:00:00Z"}
312
- - A state object that contains a "state" key (which is assumed to hold the global state), e.g.:
313
- {"state": {"updated_at": "2023-05-27T00:00:00Z"}, ...}
314
- In this case, the migration uses the first value from the "state" dictionary.
315
- - Any per-partition state formats or other non-simple structures are ignored during migration.
310
+ This method converts the global or child state into a format compatible with parent
311
+ streams. The migration occurs only for parent streams with incremental dependencies.
312
+ The method filters out per-partition states and retains only the global state in the
313
+ format `{cursor_field: cursor_value}`.
316
314
 
317
315
  Args:
318
316
  stream_state (StreamState): The state to migrate. Expected formats include:
319
317
  - {"updated_at": "2023-05-27T00:00:00Z"}
320
- - {"state": {"updated_at": "2023-05-27T00:00:00Z"}, ...}
321
- (In this format, only the first global state value is used, and per-partition states are ignored.)
318
+ - {"states": [...] } (ignored during migration)
322
319
 
323
320
  Returns:
324
321
  StreamState: A migrated state for parent streams in the format:
325
322
  {
326
323
  "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
327
324
  }
328
- where each parent stream with an incremental dependency is assigned its corresponding cursor value.
329
325
 
330
326
  Example:
331
327
  Input: {"updated_at": "2023-05-27T00:00:00Z"}
@@ -336,15 +332,11 @@ class SubstreamPartitionRouter(PartitionRouter):
336
332
  substream_state_values = list(stream_state.values())
337
333
  substream_state = substream_state_values[0] if substream_state_values else {}
338
334
 
339
- # Ignore per-partition states or invalid formats.
335
+ # Ignore per-partition states or invalid formats
340
336
  if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
341
- # If a global state is present under the key "state", use its first value.
342
- if "state" in stream_state and isinstance(stream_state["state"], dict):
343
- substream_state = list(stream_state["state"].values())[0]
344
- else:
345
- return {}
337
+ return {}
346
338
 
347
- # Build the parent state for all parent streams with incremental dependencies.
339
+ # Copy child state to parent streams with incremental dependencies
348
340
  parent_state = {}
349
341
  if substream_state:
350
342
  for parent_config in self.parent_stream_configs:
@@ -22,7 +22,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_req
22
22
  )
23
23
  from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
24
24
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
25
- from airbyte_cdk.sources.streams.call_rate import APIBudget
26
25
  from airbyte_cdk.sources.streams.http import HttpClient
27
26
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
28
27
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -56,7 +55,6 @@ class HttpRequester(Requester):
56
55
  http_method: Union[str, HttpMethod] = HttpMethod.GET
57
56
  request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
58
57
  error_handler: Optional[ErrorHandler] = None
59
- api_budget: Optional[APIBudget] = None
60
58
  disable_retries: bool = False
61
59
  message_repository: MessageRepository = NoopMessageRepository()
62
60
  use_cache: bool = False
@@ -93,7 +91,6 @@ class HttpRequester(Requester):
93
91
  name=self.name,
94
92
  logger=self.logger,
95
93
  error_handler=self.error_handler,
96
- api_budget=self.api_budget,
97
94
  authenticator=self._authenticator,
98
95
  use_cache=self.use_cache,
99
96
  backoff_strategy=backoff_strategies,
@@ -202,9 +199,6 @@ class HttpRequester(Requester):
202
199
  Raise a ValueError if there's a key collision
203
200
  Returned merged mapping otherwise
204
201
  """
205
-
206
- is_body_json = requester_method.__name__ == "get_request_body_json"
207
-
208
202
  return combine_mappings(
209
203
  [
210
204
  requester_method(
@@ -214,8 +208,7 @@ class HttpRequester(Requester):
214
208
  ),
215
209
  auth_options_method(),
216
210
  extra_options,
217
- ],
218
- allow_same_value_merge=is_body_json,
211
+ ]
219
212
  )
220
213
 
221
214
  def _request_headers(
@@ -23,9 +23,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
23
23
  )
24
24
  from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
25
25
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
26
- from airbyte_cdk.utils.mapping_helpers import (
27
- _validate_component_request_option_paths,
28
- )
29
26
 
30
27
 
31
28
  @dataclass
@@ -116,13 +113,6 @@ class DefaultPaginator(Paginator):
116
113
  if isinstance(self.url_base, str):
117
114
  self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
118
115
 
119
- if self.page_token_option and not isinstance(self.page_token_option, RequestPath):
120
- _validate_component_request_option_paths(
121
- self.config,
122
- self.page_size_option,
123
- self.page_token_option,
124
- )
125
-
126
116
  def get_initial_token(self) -> Optional[Any]:
127
117
  """
128
118
  Return the page token that should be used for the first request of a stream
@@ -197,7 +187,7 @@ class DefaultPaginator(Paginator):
197
187
  def _get_request_options(
198
188
  self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
199
189
  ) -> MutableMapping[str, Any]:
200
- options: MutableMapping[str, Any] = {}
190
+ options = {}
201
191
 
202
192
  token = next_page_token.get("next_page_token") if next_page_token else None
203
193
  if (
@@ -206,16 +196,15 @@ class DefaultPaginator(Paginator):
206
196
  and isinstance(self.page_token_option, RequestOption)
207
197
  and self.page_token_option.inject_into == option_type
208
198
  ):
209
- self.page_token_option.inject_into_request(options, token, self.config)
210
-
199
+ options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
211
200
  if (
212
201
  self.page_size_option
213
202
  and self.pagination_strategy.get_page_size()
214
203
  and self.page_size_option.inject_into == option_type
215
204
  ):
216
- page_size = self.pagination_strategy.get_page_size()
217
- self.page_size_option.inject_into_request(options, page_size, self.config)
218
-
205
+ options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr]
206
+ self.pagination_strategy.get_page_size()
207
+ ) # type: ignore # field_name is always cast to an interpolated string
219
208
  return options
220
209
 
221
210
 
@@ -4,10 +4,9 @@
4
4
 
5
5
  from dataclasses import InitVar, dataclass
6
6
  from enum import Enum
7
- from typing import Any, List, Literal, Mapping, MutableMapping, Optional, Union
7
+ from typing import Any, Mapping, Union
8
8
 
9
9
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
- from airbyte_cdk.sources.types import Config
11
10
 
12
11
 
13
12
  class RequestOptionType(Enum):
@@ -27,91 +26,13 @@ class RequestOption:
27
26
  Describes an option to set on a request
28
27
 
29
28
  Attributes:
30
- field_name (str): Describes the name of the parameter to inject. Mutually exclusive with field_path.
31
- field_path (list(str)): Describes the path to a nested field as a list of field names.
32
- Only valid for body_json injection type, and mutually exclusive with field_name.
29
+ field_name (str): Describes the name of the parameter to inject
33
30
  inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter
34
31
  """
35
32
 
33
+ field_name: Union[InterpolatedString, str]
36
34
  inject_into: RequestOptionType
37
35
  parameters: InitVar[Mapping[str, Any]]
38
- field_name: Optional[Union[InterpolatedString, str]] = None
39
- field_path: Optional[List[Union[InterpolatedString, str]]] = None
40
36
 
41
37
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
42
- # Validate inputs. We should expect either field_name or field_path, but not both
43
- if self.field_name is None and self.field_path is None:
44
- raise ValueError("RequestOption requires either a field_name or field_path")
45
-
46
- if self.field_name is not None and self.field_path is not None:
47
- raise ValueError(
48
- "Only one of field_name or field_path can be provided to RequestOption"
49
- )
50
-
51
- # Nested field injection is only supported for body JSON injection
52
- if self.field_path is not None and self.inject_into != RequestOptionType.body_json:
53
- raise ValueError(
54
- "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
55
- )
56
-
57
- # Convert field_name and field_path into InterpolatedString objects if they are strings
58
- if self.field_name is not None:
59
- self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
60
- elif self.field_path is not None:
61
- self.field_path = [
62
- InterpolatedString.create(segment, parameters=parameters)
63
- for segment in self.field_path
64
- ]
65
-
66
- @property
67
- def _is_field_path(self) -> bool:
68
- """Returns whether this option is a field path (ie, a nested field)"""
69
- return self.field_path is not None
70
-
71
- def inject_into_request(
72
- self,
73
- target: MutableMapping[str, Any],
74
- value: Any,
75
- config: Config,
76
- ) -> None:
77
- """
78
- Inject a request option value into a target request structure using either field_name or field_path.
79
- For non-body-json injection, only top-level field names are supported.
80
- For body-json injection, both field names and nested field paths are supported.
81
-
82
- Args:
83
- target: The request structure to inject the value into
84
- value: The value to inject
85
- config: The config object to use for interpolation
86
- """
87
- if self._is_field_path:
88
- if self.inject_into != RequestOptionType.body_json:
89
- raise ValueError(
90
- "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
91
- )
92
-
93
- assert self.field_path is not None # for type checker
94
- current = target
95
- # Convert path segments into strings, evaluating any interpolated segments
96
- # Example: ["data", "{{ config[user_type] }}", "id"] -> ["data", "admin", "id"]
97
- *path_parts, final_key = [
98
- str(
99
- segment.eval(config=config)
100
- if isinstance(segment, InterpolatedString)
101
- else segment
102
- )
103
- for segment in self.field_path
104
- ]
105
-
106
- # Build a nested dictionary structure and set the final value at the deepest level
107
- for part in path_parts:
108
- current = current.setdefault(part, {})
109
- current[final_key] = value
110
- else:
111
- # For non-nested fields, evaluate the field name if it's an interpolated string
112
- key = (
113
- self.field_name.eval(config=config)
114
- if isinstance(self.field_name, InterpolatedString)
115
- else self.field_name
116
- )
117
- target[str(key)] = value
38
+ self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
@@ -80,13 +80,12 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
80
80
  options: MutableMapping[str, Any] = {}
81
81
  if not stream_slice:
82
82
  return options
83
-
84
83
  if self.start_time_option and self.start_time_option.inject_into == option_type:
85
- start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
86
- self.start_time_option.inject_into_request(options, start_time_value, self.config)
87
-
84
+ options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
85
+ self._partition_field_start.eval(self.config)
86
+ )
88
87
  if self.end_time_option and self.end_time_option.inject_into == option_type:
89
- end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
90
- self.end_time_option.inject_into_request(options, end_time_value, self.config)
91
-
88
+ options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
89
+ self._partition_field_end.eval(self.config)
90
+ )
92
91
  return options
@@ -6,7 +6,7 @@ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
9
- from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
9
+ from airbyte_cdk.models import FailureType
10
10
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
11
11
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
12
12
  from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
@@ -16,6 +16,7 @@ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
16
16
  from airbyte_cdk.sources.source import ExperimentalClassWarning
17
17
  from airbyte_cdk.sources.streams.core import StreamData
18
18
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
19
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
19
20
 
20
21
 
21
22
  @deprecated(
@@ -56,9 +57,9 @@ class AsyncRetriever(Retriever):
56
57
 
57
58
  return self.state
58
59
 
59
- def _validate_and_get_stream_slice_jobs(
60
+ def _validate_and_get_stream_slice_partition(
60
61
  self, stream_slice: Optional[StreamSlice] = None
61
- ) -> Iterable[AsyncJob]:
62
+ ) -> AsyncPartition:
62
63
  """
63
64
  Validates the stream_slice argument and returns the partition from it.
64
65
 
@@ -72,7 +73,12 @@ class AsyncRetriever(Retriever):
72
73
  AirbyteTracedException: If the stream_slice is not an instance of StreamSlice or if the partition is not present in the stream_slice.
73
74
 
74
75
  """
75
- return stream_slice.extra_fields.get("jobs", []) if stream_slice else []
76
+ if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
77
+ raise AirbyteTracedException(
78
+ message="Invalid arguments to AsyncRetriever.read_records: stream_slice is not optional. Please contact Airbyte Support",
79
+ failure_type=FailureType.system_error,
80
+ )
81
+ return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
76
82
 
77
83
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
78
84
  return self.stream_slicer.stream_slices()
@@ -83,8 +89,8 @@ class AsyncRetriever(Retriever):
83
89
  stream_slice: Optional[StreamSlice] = None,
84
90
  ) -> Iterable[StreamData]:
85
91
  stream_state: StreamState = self._get_stream_state()
86
- jobs: Iterable[AsyncJob] = self._validate_and_get_stream_slice_jobs(stream_slice)
87
- records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(jobs)
92
+ partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
93
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
88
94
 
89
95
  yield from self.record_selector.filter_and_transform(
90
96
  all_data=records,
@@ -128,9 +128,6 @@ class SimpleRetriever(Retriever):
128
128
  Returned merged mapping otherwise
129
129
  """
130
130
  # FIXME we should eventually remove the usage of stream_state as part of the interpolation
131
-
132
- is_body_json = paginator_method.__name__ == "get_request_body_json"
133
-
134
131
  mappings = [
135
132
  paginator_method(
136
133
  stream_state=stream_state,
@@ -146,7 +143,7 @@ class SimpleRetriever(Retriever):
146
143
  next_page_token=next_page_token,
147
144
  )
148
145
  )
149
- return combine_mappings(mappings, allow_same_value_merge=is_body_json)
146
+ return combine_mappings(mappings)
150
147
 
151
148
  def _request_headers(
152
149
  self,
@@ -11,7 +11,6 @@ from pydantic.v1 import AnyUrl, BaseModel, Field
11
11
 
12
12
  from airbyte_cdk import OneOfOptionConfig
13
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
14
- from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
15
14
  from airbyte_cdk.sources.utils import schema_helpers
16
15
 
17
16
 
@@ -66,7 +65,7 @@ class AbstractFileBasedSpec(BaseModel):
66
65
  order=10,
67
66
  )
68
67
 
69
- delivery_method: Union[DeliverRecords, DeliverRawFiles, DeliverPermissions] = Field(
68
+ delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
70
69
  title="Delivery Method",
71
70
  discriminator="delivery_type",
72
71
  type="object",
@@ -33,12 +33,6 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
33
33
  FileBasedStreamConfig,
34
34
  ValidationPolicy,
35
35
  )
36
- from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
37
- include_identities_stream,
38
- preserve_directory_structure,
39
- use_file_transfer,
40
- use_permissions_transfer,
41
- )
42
36
  from airbyte_cdk.sources.file_based.discovery_policy import (
43
37
  AbstractDiscoveryPolicy,
44
38
  DefaultDiscoveryPolicy,
@@ -55,12 +49,7 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
55
49
  DEFAULT_SCHEMA_VALIDATION_POLICIES,
56
50
  AbstractSchemaValidationPolicy,
57
51
  )
58
- from airbyte_cdk.sources.file_based.stream import (
59
- AbstractFileBasedStream,
60
- DefaultFileBasedStream,
61
- FileIdentitiesStream,
62
- PermissionsFileBasedStream,
63
- )
52
+ from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
64
53
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
65
54
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
66
55
  AbstractConcurrentFileBasedCursor,
@@ -77,7 +66,6 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
77
66
  DEFAULT_CONCURRENCY = 100
78
67
  MAX_CONCURRENCY = 100
79
68
  INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
80
- IDENTITIES_STREAM = "identities"
81
69
 
82
70
 
83
71
  class FileBasedSource(ConcurrentSourceAdapter, ABC):
@@ -169,20 +157,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
169
157
  errors = []
170
158
  tracebacks = []
171
159
  for stream in streams:
172
- if isinstance(stream, FileIdentitiesStream):
173
- identity = next(iter(stream.load_identity_groups()))
174
- if not identity:
175
- errors.append(
176
- "Unable to get identities for current configuration, please check your credentials"
177
- )
178
- continue
179
160
  if not isinstance(stream, AbstractFileBasedStream):
180
161
  raise ValueError(f"Stream {stream} is not a file-based stream.")
181
162
  try:
182
163
  parsed_config = self._get_parsed_config(config)
183
164
  availability_method = (
184
165
  stream.availability_strategy.check_availability
185
- if use_file_transfer(parsed_config) or use_permissions_transfer(parsed_config)
166
+ if self._use_file_transfer(parsed_config)
186
167
  else stream.availability_strategy.check_availability_and_parsability
187
168
  )
188
169
  (
@@ -258,7 +239,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
258
239
  message_repository=self.message_repository,
259
240
  )
260
241
  stream = FileBasedStreamFacade.create_from_stream(
261
- stream=self._make_file_based_stream(
242
+ stream=self._make_default_stream(
262
243
  stream_config=stream_config,
263
244
  cursor=cursor,
264
245
  parsed_config=parsed_config,
@@ -289,7 +270,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
289
270
  CursorField(DefaultFileBasedStream.ab_last_mod_col),
290
271
  )
291
272
  stream = FileBasedStreamFacade.create_from_stream(
292
- stream=self._make_file_based_stream(
273
+ stream=self._make_default_stream(
293
274
  stream_config=stream_config,
294
275
  cursor=cursor,
295
276
  parsed_config=parsed_config,
@@ -301,17 +282,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
301
282
  )
302
283
  else:
303
284
  cursor = self.cursor_cls(stream_config)
304
- stream = self._make_file_based_stream(
285
+ stream = self._make_default_stream(
305
286
  stream_config=stream_config,
306
287
  cursor=cursor,
307
288
  parsed_config=parsed_config,
308
289
  )
309
290
 
310
291
  streams.append(stream)
311
-
312
- if include_identities_stream(parsed_config):
313
- identities_stream = self._make_identities_stream()
314
- streams.append(identities_stream)
315
292
  return streams
316
293
 
317
294
  except ValidationError as exc:
@@ -333,48 +310,8 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
333
310
  validation_policy=self._validate_and_get_validation_policy(stream_config),
334
311
  errors_collector=self.errors_collector,
335
312
  cursor=cursor,
336
- use_file_transfer=use_file_transfer(parsed_config),
337
- preserve_directory_structure=preserve_directory_structure(parsed_config),
338
- )
339
-
340
- def _make_permissions_stream(
341
- self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
342
- ) -> AbstractFileBasedStream:
343
- return PermissionsFileBasedStream(
344
- config=stream_config,
345
- catalog_schema=self.stream_schemas.get(stream_config.name),
346
- stream_reader=self.stream_reader,
347
- availability_strategy=self.availability_strategy,
348
- discovery_policy=self.discovery_policy,
349
- parsers=self.parsers,
350
- validation_policy=self._validate_and_get_validation_policy(stream_config),
351
- errors_collector=self.errors_collector,
352
- cursor=cursor,
353
- )
354
-
355
- def _make_file_based_stream(
356
- self,
357
- stream_config: FileBasedStreamConfig,
358
- cursor: Optional[AbstractFileBasedCursor],
359
- parsed_config: AbstractFileBasedSpec,
360
- ) -> AbstractFileBasedStream:
361
- """
362
- Creates different streams depending on the type of the transfer mode selected
363
- """
364
- if use_permissions_transfer(parsed_config):
365
- return self._make_permissions_stream(stream_config, cursor)
366
- # we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
367
- else:
368
- return self._make_default_stream(stream_config, cursor, parsed_config)
369
-
370
- def _make_identities_stream(
371
- self,
372
- ) -> Stream:
373
- return FileIdentitiesStream(
374
- catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
375
- stream_reader=self.stream_reader,
376
- discovery_policy=self.discovery_policy,
377
- errors_collector=self.errors_collector,
313
+ use_file_transfer=self._use_file_transfer(parsed_config),
314
+ preserve_directory_structure=self._preserve_directory_structure(parsed_config),
378
315
  )
379
316
 
380
317
  def _get_stream_from_catalog(
@@ -441,3 +378,33 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
441
378
  "`input_schema` and `schemaless` options cannot both be set",
442
379
  model=FileBasedStreamConfig,
443
380
  )
381
+
382
+ @staticmethod
383
+ def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
384
+ use_file_transfer = (
385
+ hasattr(parsed_config.delivery_method, "delivery_type")
386
+ and parsed_config.delivery_method.delivery_type == "use_file_transfer"
387
+ )
388
+ return use_file_transfer
389
+
390
+ @staticmethod
391
+ def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392
+ """
393
+ Determines whether to preserve directory structure during file transfer.
394
+
395
+ When enabled, files maintain their subdirectory paths in the destination.
396
+ When disabled, files are flattened to the root of the destination.
397
+
398
+ Args:
399
+ parsed_config: The parsed configuration containing delivery method settings
400
+
401
+ Returns:
402
+ True if directory structure should be preserved (default), False otherwise
403
+ """
404
+ if (
405
+ FileBasedSource._use_file_transfer(parsed_config)
406
+ and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
407
+ and parsed_config.delivery_method.preserve_directory_structure is not None
408
+ ):
409
+ return parsed_config.delivery_method.preserve_directory_structure
410
+ return True