airbyte-cdk 6.34.0.dev0__py3-none-any.whl → 6.34.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
  2. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  3. airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
  4. airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
  5. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  6. airbyte_cdk/connector_builder/test_reader/types.py +75 -0
  7. airbyte_cdk/entrypoint.py +6 -6
  8. airbyte_cdk/logger.py +1 -4
  9. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
  10. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +5 -0
  11. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +10 -0
  12. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  13. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  14. airbyte_cdk/sources/file_based/file_based_source.py +70 -37
  15. airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
  16. airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
  17. airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
  18. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
  19. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  20. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  21. airbyte_cdk/utils/mapping_helpers.py +43 -2
  22. airbyte_cdk/utils/print_buffer.py +0 -4
  23. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/METADATA +1 -1
  24. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/RECORD +28 -19
  25. airbyte_cdk/connector_builder/message_grouper.py +0 -448
  26. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/LICENSE.txt +0 -0
  27. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/LICENSE_SHORT +0 -0
  28. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-6.34.0.dev0.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
22
22
 
23
23
  from airbyte_cdk.connector import TConfig
24
24
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
25
- from airbyte_cdk.logger import PRINT_BUFFER, init_logger
25
+ from airbyte_cdk.logger import init_logger
26
26
  from airbyte_cdk.models import (
27
27
  AirbyteConnectionStatus,
28
28
  AirbyteMessage,
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
337
337
  parsed_args = source_entrypoint.parse_args(args)
338
338
  # temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
339
339
  # Refer to: https://github.com/airbytehq/oncall/issues/6235
340
- with PRINT_BUFFER:
341
- for message in source_entrypoint.run(parsed_args):
342
- # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
- # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
- print(f"{message}\n", end="")
340
+ # with PrintBuffer():
341
+ for message in source_entrypoint.run(parsed_args):
342
+ # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
+ # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
+ print(f"{message}\n", end="", flush=True)
345
345
 
346
346
 
347
347
  def _init_internal_request_filter() -> None:
airbyte_cdk/logger.py CHANGED
@@ -16,11 +16,8 @@ from airbyte_cdk.models import (
16
16
  Level,
17
17
  Type,
18
18
  )
19
- from airbyte_cdk.utils import PrintBuffer
20
19
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
21
20
 
22
- PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
23
-
24
21
  LOGGING_CONFIG = {
25
22
  "version": 1,
26
23
  "disable_existing_loggers": False,
@@ -30,7 +27,7 @@ LOGGING_CONFIG = {
30
27
  "handlers": {
31
28
  "console": {
32
29
  "class": "logging.StreamHandler",
33
- "stream": PRINT_BUFFER,
30
+ "stream": "ext://sys.stdout",
34
31
  "formatter": "airbyte",
35
32
  },
36
33
  },
@@ -5,6 +5,7 @@
5
5
  import copy
6
6
  import logging
7
7
  import threading
8
+ import time
8
9
  from collections import OrderedDict
9
10
  from copy import deepcopy
10
11
  from datetime import timedelta
@@ -58,7 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
58
59
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
60
  """
60
61
 
61
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
62
+ DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
63
+ SWITCH_TO_GLOBAL_LIMIT = 10_000
62
64
  _NO_STATE: Mapping[str, Any] = {}
63
65
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
64
66
  _GLOBAL_STATE_KEY = "state"
@@ -99,9 +101,11 @@ class ConcurrentPerPartitionCursor(Cursor):
99
101
  self._new_global_cursor: Optional[StreamState] = None
100
102
  self._lookback_window: int = 0
101
103
  self._parent_state: Optional[StreamState] = None
102
- self._over_limit: int = 0
104
+ self._number_of_partitions: int = 0
103
105
  self._use_global_cursor: bool = False
104
106
  self._partition_serializer = PerPartitionKeySerializer()
107
+ # Track the last time a state message was emitted
108
+ self._last_emission_time: float = 0.0
105
109
 
106
110
  self._set_initial_state(stream_state)
107
111
 
@@ -141,21 +145,16 @@ class ConcurrentPerPartitionCursor(Cursor):
141
145
  raise ValueError("stream_slice cannot be None")
142
146
 
143
147
  partition_key = self._to_partition_key(stream_slice.partition)
144
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
145
148
  with self._lock:
146
149
  self._semaphore_per_partition[partition_key].acquire()
147
- cursor = self._cursor_per_partition[partition_key]
148
- if (
149
- partition_key in self._finished_partitions
150
- and self._semaphore_per_partition[partition_key]._value == 0
151
- ):
150
+ if not self._use_global_cursor:
151
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
152
+ cursor = self._cursor_per_partition[partition_key]
152
153
  if (
153
- self._new_global_cursor is None
154
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
- < cursor.state[self.cursor_field.cursor_field_key]
154
+ partition_key in self._finished_partitions
155
+ and self._semaphore_per_partition[partition_key]._value == 0
156
156
  ):
157
- self._new_global_cursor = copy.deepcopy(cursor.state)
158
- if not self._use_global_cursor:
157
+ self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
159
158
  self._emit_state_message()
160
159
 
161
160
  def ensure_at_least_one_state_emitted(self) -> None:
@@ -169,9 +168,23 @@ class ConcurrentPerPartitionCursor(Cursor):
169
168
  self._global_cursor = self._new_global_cursor
170
169
  self._lookback_window = self._timer.finish()
171
170
  self._parent_state = self._partition_router.get_stream_state()
172
- self._emit_state_message()
171
+ self._emit_state_message(throttle=False)
173
172
 
174
- def _emit_state_message(self) -> None:
173
+ def _throttle_state_message(self) -> Optional[float]:
174
+ """
175
+ Throttles the state message emission to once every 60 seconds.
176
+ """
177
+ current_time = time.time()
178
+ if current_time - self._last_emission_time <= 60:
179
+ return None
180
+ return current_time
181
+
182
+ def _emit_state_message(self, throttle: bool = True) -> None:
183
+ if throttle:
184
+ current_time = self._throttle_state_message()
185
+ if current_time is None:
186
+ return
187
+ self._last_emission_time = current_time
175
188
  self._connector_state_manager.update_state_for_stream(
176
189
  self._stream_name,
177
190
  self._stream_namespace,
@@ -202,6 +215,7 @@ class ConcurrentPerPartitionCursor(Cursor):
202
215
  self._lookback_window if self._global_cursor else 0,
203
216
  )
204
217
  with self._lock:
218
+ self._number_of_partitions += 1
205
219
  self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
220
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
221
  threading.Semaphore(0)
@@ -232,9 +246,15 @@ class ConcurrentPerPartitionCursor(Cursor):
232
246
  - Logs a warning each time a partition is removed, indicating whether it was finished
233
247
  or removed due to being the oldest.
234
248
  """
249
+ if not self._use_global_cursor and self.limit_reached():
250
+ logger.info(
251
+ f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
252
+ f"Switching to global cursor for {self._stream_name}."
253
+ )
254
+ self._use_global_cursor = True
255
+
235
256
  with self._lock:
236
257
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
- self._over_limit += 1
238
258
  # Try removing finished partitions first
239
259
  for partition_key in list(self._cursor_per_partition.keys()):
240
260
  if (
@@ -245,7 +265,7 @@ class ConcurrentPerPartitionCursor(Cursor):
245
265
  partition_key
246
266
  ) # Remove the oldest partition
247
267
  logger.warning(
248
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
268
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
249
269
  )
250
270
  break
251
271
  else:
@@ -254,7 +274,7 @@ class ConcurrentPerPartitionCursor(Cursor):
254
274
  1
255
275
  ] # Remove the oldest partition
256
276
  logger.warning(
257
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
277
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
258
278
  )
259
279
 
260
280
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -314,6 +334,7 @@ class ConcurrentPerPartitionCursor(Cursor):
314
334
  self._lookback_window = int(stream_state.get("lookback_window", 0))
315
335
 
316
336
  for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
337
+ self._number_of_partitions += 1
317
338
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
318
339
  self._create_cursor(state["cursor"])
319
340
  )
@@ -354,16 +375,26 @@ class ConcurrentPerPartitionCursor(Cursor):
354
375
  self._new_global_cursor = deepcopy(fixed_global_state)
355
376
 
356
377
  def observe(self, record: Record) -> None:
357
- if not self._use_global_cursor and self.limit_reached():
358
- self._use_global_cursor = True
359
-
360
378
  if not record.associated_slice:
361
379
  raise ValueError(
362
380
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
363
381
  )
364
- self._cursor_per_partition[
365
- self._to_partition_key(record.associated_slice.partition)
366
- ].observe(record)
382
+
383
+ record_cursor = self._connector_state_converter.output_format(
384
+ self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
385
+ )
386
+ self._update_global_cursor(record_cursor)
387
+ if not self._use_global_cursor:
388
+ self._cursor_per_partition[
389
+ self._to_partition_key(record.associated_slice.partition)
390
+ ].observe(record)
391
+
392
+ def _update_global_cursor(self, value: Any) -> None:
393
+ if (
394
+ self._new_global_cursor is None
395
+ or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
396
+ ):
397
+ self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
367
398
 
368
399
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
369
400
  return self._partition_serializer.to_partition_key(partition)
@@ -397,4 +428,4 @@ class ConcurrentPerPartitionCursor(Cursor):
397
428
  return cursor
398
429
 
399
430
  def limit_reached(self) -> bool:
400
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
431
+ return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
@@ -21,6 +21,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
21
21
  )
22
22
  from airbyte_cdk.sources.message import MessageRepository
23
23
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
24
+ from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
24
25
 
25
26
 
26
27
  @dataclass
@@ -122,6 +123,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
122
123
  if not self.cursor_datetime_formats:
123
124
  self.cursor_datetime_formats = [self.datetime_format]
124
125
 
126
+ _validate_component_request_option_paths(
127
+ self.config, self.start_time_option, self.end_time_option
128
+ )
129
+
125
130
  def get_stream_state(self) -> StreamState:
126
131
  return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
127
132
 
@@ -23,6 +23,9 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
23
23
  )
24
24
  from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
25
25
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
26
+ from airbyte_cdk.utils.mapping_helpers import (
27
+ _validate_component_request_option_paths,
28
+ )
26
29
 
27
30
 
28
31
  @dataclass
@@ -113,6 +116,13 @@ class DefaultPaginator(Paginator):
113
116
  if isinstance(self.url_base, str):
114
117
  self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
115
118
 
119
+ if self.page_token_option and not isinstance(self.page_token_option, RequestPath):
120
+ _validate_component_request_option_paths(
121
+ self.config,
122
+ self.page_size_option,
123
+ self.page_token_option,
124
+ )
125
+
116
126
  def get_initial_token(self) -> Optional[Any]:
117
127
  """
118
128
  Return the page token that should be used for the first request of a stream
@@ -11,6 +11,7 @@ from pydantic.v1 import AnyUrl, BaseModel, Field
11
11
 
12
12
  from airbyte_cdk import OneOfOptionConfig
13
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
14
+ from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
14
15
  from airbyte_cdk.sources.utils import schema_helpers
15
16
 
16
17
 
@@ -65,7 +66,7 @@ class AbstractFileBasedSpec(BaseModel):
65
66
  order=10,
66
67
  )
67
68
 
68
- delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
69
+ delivery_method: Union[DeliverRecords, DeliverRawFiles, DeliverPermissions] = Field(
69
70
  title="Delivery Method",
70
71
  discriminator="delivery_type",
71
72
  type="object",
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
6
+ AbstractFileBasedSpec,
7
+ DeliverRawFiles,
8
+ )
9
+ from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
10
+
11
+ DELIVERY_TYPE_KEY = "delivery_type"
12
+ DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE = "use_permissions_transfer"
13
+ DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE = "use_file_transfer"
14
+ PRESERVE_DIRECTORY_STRUCTURE_KEY = "preserve_directory_structure"
15
+ INCLUDE_IDENTITIES_STREAM_KEY = "include_identities_stream"
16
+
17
+
18
+ def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
19
+ """Returns `True` if the configuration uses file transfer mode."""
20
+ return (
21
+ hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
22
+ and parsed_config.delivery_method.delivery_type == DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE
23
+ )
24
+
25
+
26
+ def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
27
+ """
28
+ Determines whether to preserve directory structure during file transfer.
29
+
30
+ When enabled, files maintain their subdirectory paths in the destination.
31
+ When disabled, files are flattened to the root of the destination.
32
+
33
+ Args:
34
+ parsed_config: The parsed configuration containing delivery method settings
35
+
36
+ Returns:
37
+ True if directory structure should be preserved (default), False otherwise
38
+ """
39
+ if (
40
+ use_file_transfer(parsed_config)
41
+ and hasattr(parsed_config.delivery_method, PRESERVE_DIRECTORY_STRUCTURE_KEY)
42
+ and isinstance(parsed_config.delivery_method, DeliverRawFiles)
43
+ ):
44
+ return parsed_config.delivery_method.preserve_directory_structure
45
+ return True
46
+
47
+
48
+ def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
49
+ """
50
+ Determines whether to use permissions transfer to sync ACLs and Identities
51
+
52
+ Args:
53
+ parsed_config: The parsed configuration containing delivery method settings
54
+
55
+ Returns:
56
+ True if permissions transfer should be enabled, False otherwise
57
+ """
58
+ return (
59
+ hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
60
+ and parsed_config.delivery_method.delivery_type
61
+ == DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE
62
+ )
63
+
64
+
65
+ def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
66
+ """
67
+ There are scenarios where user may not have access to identities but still is valuable to get ACLs
68
+
69
+ Args:
70
+ parsed_config: The parsed configuration containing delivery method settings
71
+
72
+ Returns:
73
+ True if we should include Identities stream.
74
+ """
75
+ if (
76
+ use_permissions_transfer(parsed_config)
77
+ and hasattr(parsed_config.delivery_method, INCLUDE_IDENTITIES_STREAM_KEY)
78
+ and isinstance(parsed_config.delivery_method, DeliverPermissions)
79
+ ):
80
+ return parsed_config.delivery_method.include_identities_stream
81
+ return False
@@ -33,6 +33,12 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
33
33
  FileBasedStreamConfig,
34
34
  ValidationPolicy,
35
35
  )
36
+ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
37
+ include_identities_stream,
38
+ preserve_directory_structure,
39
+ use_file_transfer,
40
+ use_permissions_transfer,
41
+ )
36
42
  from airbyte_cdk.sources.file_based.discovery_policy import (
37
43
  AbstractDiscoveryPolicy,
38
44
  DefaultDiscoveryPolicy,
@@ -49,7 +55,12 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
49
55
  DEFAULT_SCHEMA_VALIDATION_POLICIES,
50
56
  AbstractSchemaValidationPolicy,
51
57
  )
52
- from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
58
+ from airbyte_cdk.sources.file_based.stream import (
59
+ AbstractFileBasedStream,
60
+ DefaultFileBasedStream,
61
+ FileIdentitiesStream,
62
+ PermissionsFileBasedStream,
63
+ )
53
64
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
54
65
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
55
66
  AbstractConcurrentFileBasedCursor,
@@ -66,6 +77,7 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
66
77
  DEFAULT_CONCURRENCY = 100
67
78
  MAX_CONCURRENCY = 100
68
79
  INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
80
+ IDENTITIES_STREAM = "identities"
69
81
 
70
82
 
71
83
  class FileBasedSource(ConcurrentSourceAdapter, ABC):
@@ -157,13 +169,20 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
157
169
  errors = []
158
170
  tracebacks = []
159
171
  for stream in streams:
172
+ if isinstance(stream, FileIdentitiesStream):
173
+ identity = next(iter(stream.load_identity_groups()))
174
+ if not identity:
175
+ errors.append(
176
+ "Unable to get identities for current configuration, please check your credentials"
177
+ )
178
+ continue
160
179
  if not isinstance(stream, AbstractFileBasedStream):
161
180
  raise ValueError(f"Stream {stream} is not a file-based stream.")
162
181
  try:
163
182
  parsed_config = self._get_parsed_config(config)
164
183
  availability_method = (
165
184
  stream.availability_strategy.check_availability
166
- if self._use_file_transfer(parsed_config)
185
+ if use_file_transfer(parsed_config) or use_permissions_transfer(parsed_config)
167
186
  else stream.availability_strategy.check_availability_and_parsability
168
187
  )
169
188
  (
@@ -239,7 +258,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
239
258
  message_repository=self.message_repository,
240
259
  )
241
260
  stream = FileBasedStreamFacade.create_from_stream(
242
- stream=self._make_default_stream(
261
+ stream=self._make_file_based_stream(
243
262
  stream_config=stream_config,
244
263
  cursor=cursor,
245
264
  parsed_config=parsed_config,
@@ -270,7 +289,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
270
289
  CursorField(DefaultFileBasedStream.ab_last_mod_col),
271
290
  )
272
291
  stream = FileBasedStreamFacade.create_from_stream(
273
- stream=self._make_default_stream(
292
+ stream=self._make_file_based_stream(
274
293
  stream_config=stream_config,
275
294
  cursor=cursor,
276
295
  parsed_config=parsed_config,
@@ -282,13 +301,17 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
282
301
  )
283
302
  else:
284
303
  cursor = self.cursor_cls(stream_config)
285
- stream = self._make_default_stream(
304
+ stream = self._make_file_based_stream(
286
305
  stream_config=stream_config,
287
306
  cursor=cursor,
288
307
  parsed_config=parsed_config,
289
308
  )
290
309
 
291
310
  streams.append(stream)
311
+
312
+ if include_identities_stream(parsed_config):
313
+ identities_stream = self._make_identities_stream()
314
+ streams.append(identities_stream)
292
315
  return streams
293
316
 
294
317
  except ValidationError as exc:
@@ -310,8 +333,48 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
310
333
  validation_policy=self._validate_and_get_validation_policy(stream_config),
311
334
  errors_collector=self.errors_collector,
312
335
  cursor=cursor,
313
- use_file_transfer=self._use_file_transfer(parsed_config),
314
- preserve_directory_structure=self._preserve_directory_structure(parsed_config),
336
+ use_file_transfer=use_file_transfer(parsed_config),
337
+ preserve_directory_structure=preserve_directory_structure(parsed_config),
338
+ )
339
+
340
+ def _make_permissions_stream(
341
+ self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
342
+ ) -> AbstractFileBasedStream:
343
+ return PermissionsFileBasedStream(
344
+ config=stream_config,
345
+ catalog_schema=self.stream_schemas.get(stream_config.name),
346
+ stream_reader=self.stream_reader,
347
+ availability_strategy=self.availability_strategy,
348
+ discovery_policy=self.discovery_policy,
349
+ parsers=self.parsers,
350
+ validation_policy=self._validate_and_get_validation_policy(stream_config),
351
+ errors_collector=self.errors_collector,
352
+ cursor=cursor,
353
+ )
354
+
355
+ def _make_file_based_stream(
356
+ self,
357
+ stream_config: FileBasedStreamConfig,
358
+ cursor: Optional[AbstractFileBasedCursor],
359
+ parsed_config: AbstractFileBasedSpec,
360
+ ) -> AbstractFileBasedStream:
361
+ """
362
+ Creates different streams depending on the type of the transfer mode selected
363
+ """
364
+ if use_permissions_transfer(parsed_config):
365
+ return self._make_permissions_stream(stream_config, cursor)
366
+ # we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
367
+ else:
368
+ return self._make_default_stream(stream_config, cursor, parsed_config)
369
+
370
+ def _make_identities_stream(
371
+ self,
372
+ ) -> Stream:
373
+ return FileIdentitiesStream(
374
+ catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
375
+ stream_reader=self.stream_reader,
376
+ discovery_policy=self.discovery_policy,
377
+ errors_collector=self.errors_collector,
315
378
  )
316
379
 
317
380
  def _get_stream_from_catalog(
@@ -378,33 +441,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
378
441
  "`input_schema` and `schemaless` options cannot both be set",
379
442
  model=FileBasedStreamConfig,
380
443
  )
381
-
382
- @staticmethod
383
- def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
384
- use_file_transfer = (
385
- hasattr(parsed_config.delivery_method, "delivery_type")
386
- and parsed_config.delivery_method.delivery_type == "use_file_transfer"
387
- )
388
- return use_file_transfer
389
-
390
- @staticmethod
391
- def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392
- """
393
- Determines whether to preserve directory structure during file transfer.
394
-
395
- When enabled, files maintain their subdirectory paths in the destination.
396
- When disabled, files are flattened to the root of the destination.
397
-
398
- Args:
399
- parsed_config: The parsed configuration containing delivery method settings
400
-
401
- Returns:
402
- True if directory structure should be preserved (default), False otherwise
403
- """
404
- if (
405
- FileBasedSource._use_file_transfer(parsed_config)
406
- and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
407
- and parsed_config.delivery_method.preserve_directory_structure is not None
408
- ):
409
- return parsed_config.delivery_method.preserve_directory_structure
410
- return True