airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
  2. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  3. airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
  4. airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
  5. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  6. airbyte_cdk/connector_builder/test_reader/types.py +75 -0
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  8. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  9. airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
  10. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +213 -100
  13. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  14. airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
  15. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +18 -3
  16. airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
  17. airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
  18. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
  19. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
  20. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
  21. airbyte_cdk/sources/declarative/interpolation/__init__.py +1 -1
  22. airbyte_cdk/sources/declarative/interpolation/filters.py +2 -1
  23. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +1 -1
  24. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +1 -1
  25. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +1 -1
  26. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +1 -1
  27. airbyte_cdk/sources/declarative/interpolation/interpolation.py +2 -1
  28. airbyte_cdk/sources/declarative/interpolation/jinja.py +14 -1
  29. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  30. airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
  31. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
  32. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
  33. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  34. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  35. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
  36. airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
  37. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
  38. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  39. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  40. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
  41. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
  42. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
  43. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  44. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
  45. airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
  46. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  47. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  48. airbyte_cdk/sources/file_based/file_based_source.py +70 -37
  49. airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
  50. airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
  51. airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
  52. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
  53. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  54. airbyte_cdk/sources/streams/call_rate.py +185 -47
  55. airbyte_cdk/sources/streams/http/http.py +1 -2
  56. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
  57. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
  58. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  59. airbyte_cdk/test/mock_http/mocker.py +9 -1
  60. airbyte_cdk/test/mock_http/response.py +6 -3
  61. airbyte_cdk/utils/datetime_helpers.py +48 -66
  62. airbyte_cdk/utils/mapping_helpers.py +126 -26
  63. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/METADATA +1 -1
  64. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/RECORD +68 -59
  65. airbyte_cdk/connector_builder/message_grouper.py +0 -448
  66. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE.txt +0 -0
  67. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE_SHORT +0 -0
  68. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/WHEEL +0 -0
  69. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
6
+ AbstractFileBasedSpec,
7
+ DeliverRawFiles,
8
+ )
9
+ from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
10
+
11
+ DELIVERY_TYPE_KEY = "delivery_type"
12
+ DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE = "use_permissions_transfer"
13
+ DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE = "use_file_transfer"
14
+ PRESERVE_DIRECTORY_STRUCTURE_KEY = "preserve_directory_structure"
15
+ INCLUDE_IDENTITIES_STREAM_KEY = "include_identities_stream"
16
+
17
+
18
+ def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
19
+ """Returns `True` if the configuration uses file transfer mode."""
20
+ return (
21
+ hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
22
+ and parsed_config.delivery_method.delivery_type == DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE
23
+ )
24
+
25
+
26
+ def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
27
+ """
28
+ Determines whether to preserve directory structure during file transfer.
29
+
30
+ When enabled, files maintain their subdirectory paths in the destination.
31
+ When disabled, files are flattened to the root of the destination.
32
+
33
+ Args:
34
+ parsed_config: The parsed configuration containing delivery method settings
35
+
36
+ Returns:
37
+ True if directory structure should be preserved (default), False otherwise
38
+ """
39
+ if (
40
+ use_file_transfer(parsed_config)
41
+ and hasattr(parsed_config.delivery_method, PRESERVE_DIRECTORY_STRUCTURE_KEY)
42
+ and isinstance(parsed_config.delivery_method, DeliverRawFiles)
43
+ ):
44
+ return parsed_config.delivery_method.preserve_directory_structure
45
+ return True
46
+
47
+
48
+ def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
49
+ """
50
+ Determines whether to use permissions transfer to sync ACLs and Identities
51
+
52
+ Args:
53
+ parsed_config: The parsed configuration containing delivery method settings
54
+
55
+ Returns:
56
+ True if permissions transfer should be enabled, False otherwise
57
+ """
58
+ return (
59
+ hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
60
+ and parsed_config.delivery_method.delivery_type
61
+ == DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE
62
+ )
63
+
64
+
65
+ def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
66
+ """
67
+ There are scenarios where user may not have access to identities but still is valuable to get ACLs
68
+
69
+ Args:
70
+ parsed_config: The parsed configuration containing delivery method settings
71
+
72
+ Returns:
73
+ True if we should include Identities stream.
74
+ """
75
+ if (
76
+ use_permissions_transfer(parsed_config)
77
+ and hasattr(parsed_config.delivery_method, INCLUDE_IDENTITIES_STREAM_KEY)
78
+ and isinstance(parsed_config.delivery_method, DeliverPermissions)
79
+ ):
80
+ return parsed_config.delivery_method.include_identities_stream
81
+ return False
@@ -33,6 +33,12 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
33
33
  FileBasedStreamConfig,
34
34
  ValidationPolicy,
35
35
  )
36
+ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
37
+ include_identities_stream,
38
+ preserve_directory_structure,
39
+ use_file_transfer,
40
+ use_permissions_transfer,
41
+ )
36
42
  from airbyte_cdk.sources.file_based.discovery_policy import (
37
43
  AbstractDiscoveryPolicy,
38
44
  DefaultDiscoveryPolicy,
@@ -49,7 +55,12 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
49
55
  DEFAULT_SCHEMA_VALIDATION_POLICIES,
50
56
  AbstractSchemaValidationPolicy,
51
57
  )
52
- from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
58
+ from airbyte_cdk.sources.file_based.stream import (
59
+ AbstractFileBasedStream,
60
+ DefaultFileBasedStream,
61
+ FileIdentitiesStream,
62
+ PermissionsFileBasedStream,
63
+ )
53
64
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
54
65
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
55
66
  AbstractConcurrentFileBasedCursor,
@@ -66,6 +77,7 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
66
77
  DEFAULT_CONCURRENCY = 100
67
78
  MAX_CONCURRENCY = 100
68
79
  INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
80
+ IDENTITIES_STREAM = "identities"
69
81
 
70
82
 
71
83
  class FileBasedSource(ConcurrentSourceAdapter, ABC):
@@ -157,13 +169,20 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
157
169
  errors = []
158
170
  tracebacks = []
159
171
  for stream in streams:
172
+ if isinstance(stream, FileIdentitiesStream):
173
+ identity = next(iter(stream.load_identity_groups()))
174
+ if not identity:
175
+ errors.append(
176
+ "Unable to get identities for current configuration, please check your credentials"
177
+ )
178
+ continue
160
179
  if not isinstance(stream, AbstractFileBasedStream):
161
180
  raise ValueError(f"Stream {stream} is not a file-based stream.")
162
181
  try:
163
182
  parsed_config = self._get_parsed_config(config)
164
183
  availability_method = (
165
184
  stream.availability_strategy.check_availability
166
- if self._use_file_transfer(parsed_config)
185
+ if use_file_transfer(parsed_config) or use_permissions_transfer(parsed_config)
167
186
  else stream.availability_strategy.check_availability_and_parsability
168
187
  )
169
188
  (
@@ -239,7 +258,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
239
258
  message_repository=self.message_repository,
240
259
  )
241
260
  stream = FileBasedStreamFacade.create_from_stream(
242
- stream=self._make_default_stream(
261
+ stream=self._make_file_based_stream(
243
262
  stream_config=stream_config,
244
263
  cursor=cursor,
245
264
  parsed_config=parsed_config,
@@ -270,7 +289,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
270
289
  CursorField(DefaultFileBasedStream.ab_last_mod_col),
271
290
  )
272
291
  stream = FileBasedStreamFacade.create_from_stream(
273
- stream=self._make_default_stream(
292
+ stream=self._make_file_based_stream(
274
293
  stream_config=stream_config,
275
294
  cursor=cursor,
276
295
  parsed_config=parsed_config,
@@ -282,13 +301,17 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
282
301
  )
283
302
  else:
284
303
  cursor = self.cursor_cls(stream_config)
285
- stream = self._make_default_stream(
304
+ stream = self._make_file_based_stream(
286
305
  stream_config=stream_config,
287
306
  cursor=cursor,
288
307
  parsed_config=parsed_config,
289
308
  )
290
309
 
291
310
  streams.append(stream)
311
+
312
+ if include_identities_stream(parsed_config):
313
+ identities_stream = self._make_identities_stream()
314
+ streams.append(identities_stream)
292
315
  return streams
293
316
 
294
317
  except ValidationError as exc:
@@ -310,8 +333,48 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
310
333
  validation_policy=self._validate_and_get_validation_policy(stream_config),
311
334
  errors_collector=self.errors_collector,
312
335
  cursor=cursor,
313
- use_file_transfer=self._use_file_transfer(parsed_config),
314
- preserve_directory_structure=self._preserve_directory_structure(parsed_config),
336
+ use_file_transfer=use_file_transfer(parsed_config),
337
+ preserve_directory_structure=preserve_directory_structure(parsed_config),
338
+ )
339
+
340
+ def _make_permissions_stream(
341
+ self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
342
+ ) -> AbstractFileBasedStream:
343
+ return PermissionsFileBasedStream(
344
+ config=stream_config,
345
+ catalog_schema=self.stream_schemas.get(stream_config.name),
346
+ stream_reader=self.stream_reader,
347
+ availability_strategy=self.availability_strategy,
348
+ discovery_policy=self.discovery_policy,
349
+ parsers=self.parsers,
350
+ validation_policy=self._validate_and_get_validation_policy(stream_config),
351
+ errors_collector=self.errors_collector,
352
+ cursor=cursor,
353
+ )
354
+
355
+ def _make_file_based_stream(
356
+ self,
357
+ stream_config: FileBasedStreamConfig,
358
+ cursor: Optional[AbstractFileBasedCursor],
359
+ parsed_config: AbstractFileBasedSpec,
360
+ ) -> AbstractFileBasedStream:
361
+ """
362
+ Creates different streams depending on the type of the transfer mode selected
363
+ """
364
+ if use_permissions_transfer(parsed_config):
365
+ return self._make_permissions_stream(stream_config, cursor)
366
+ # we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
367
+ else:
368
+ return self._make_default_stream(stream_config, cursor, parsed_config)
369
+
370
+ def _make_identities_stream(
371
+ self,
372
+ ) -> Stream:
373
+ return FileIdentitiesStream(
374
+ catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
375
+ stream_reader=self.stream_reader,
376
+ discovery_policy=self.discovery_policy,
377
+ errors_collector=self.errors_collector,
315
378
  )
316
379
 
317
380
  def _get_stream_from_catalog(
@@ -378,33 +441,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
378
441
  "`input_schema` and `schemaless` options cannot both be set",
379
442
  model=FileBasedStreamConfig,
380
443
  )
381
-
382
- @staticmethod
383
- def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
384
- use_file_transfer = (
385
- hasattr(parsed_config.delivery_method, "delivery_type")
386
- and parsed_config.delivery_method.delivery_type == "use_file_transfer"
387
- )
388
- return use_file_transfer
389
-
390
- @staticmethod
391
- def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392
- """
393
- Determines whether to preserve directory structure during file transfer.
394
-
395
- When enabled, files maintain their subdirectory paths in the destination.
396
- When disabled, files are flattened to the root of the destination.
397
-
398
- Args:
399
- parsed_config: The parsed configuration containing delivery method settings
400
-
401
- Returns:
402
- True if directory structure should be preserved (default), False otherwise
403
- """
404
- if (
405
- FileBasedSource._use_file_transfer(parsed_config)
406
- and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
407
- and parsed_config.delivery_method.preserve_directory_structure is not None
408
- ):
409
- return parsed_config.delivery_method.preserve_directory_structure
410
- return True
@@ -13,6 +13,11 @@ from typing import Any, Dict, Iterable, List, Optional, Set
13
13
  from wcmatch.glob import GLOBSTAR, globmatch
14
14
 
15
15
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
16
+ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
17
+ include_identities_stream,
18
+ preserve_directory_structure,
19
+ use_file_transfer,
20
+ )
16
21
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
17
22
 
18
23
 
@@ -128,24 +133,20 @@ class AbstractFileBasedStreamReader(ABC):
128
133
 
129
134
  def use_file_transfer(self) -> bool:
130
135
  if self.config:
131
- use_file_transfer = (
132
- hasattr(self.config.delivery_method, "delivery_type")
133
- and self.config.delivery_method.delivery_type == "use_file_transfer"
134
- )
135
- return use_file_transfer
136
+ return use_file_transfer(self.config)
136
137
  return False
137
138
 
138
139
  def preserve_directory_structure(self) -> bool:
139
140
  # fall back to preserve subdirectories if config is not present or incomplete
140
- if (
141
- self.use_file_transfer()
142
- and self.config
143
- and hasattr(self.config.delivery_method, "preserve_directory_structure")
144
- and self.config.delivery_method.preserve_directory_structure is not None
145
- ):
146
- return self.config.delivery_method.preserve_directory_structure
141
+ if self.config:
142
+ return preserve_directory_structure(self.config)
147
143
  return True
148
144
 
145
+ def include_identities_stream(self) -> bool:
146
+ if self.config:
147
+ return include_identities_stream(self.config)
148
+ return False
149
+
149
150
  @abstractmethod
150
151
  def get_file(
151
152
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
@@ -183,3 +184,97 @@ class AbstractFileBasedStreamReader(ABC):
183
184
  makedirs(path.dirname(local_file_path), exist_ok=True)
184
185
  absolute_file_path = path.abspath(local_file_path)
185
186
  return [file_relative_path, local_file_path, absolute_file_path]
187
+
188
+ @abstractmethod
189
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
190
+ """
191
+ This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
192
+
193
+ e.g.
194
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
195
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
196
+ result = api_conn.get_file_permissions_info(file.id)
197
+ return MyPermissionsModel(
198
+ id=result["id"],
199
+ access_control_list = result["access_control_list"],
200
+ is_public = result["is_public"],
201
+ ).dict()
202
+ """
203
+ raise NotImplementedError(
204
+ f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
205
+ )
206
+
207
+ @abstractmethod
208
+ def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
209
+ """
210
+ This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
211
+
212
+ e.g.
213
+ def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
214
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
215
+ users_api = api_conn.users()
216
+ groups_api = api_conn.groups()
217
+ members_api = self.google_directory_service.members()
218
+ for user in users_api.list():
219
+ yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
220
+ for group in groups_api.list():
221
+ group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
222
+ for member in members_api.list(group=group):
223
+ group_obj.member_email_addresses = group_obj.member_email_addresses or []
224
+ group_obj.member_email_addresses.append(member.email)
225
+ yield group_obj.dict()
226
+ """
227
+ raise NotImplementedError(
228
+ f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
229
+ )
230
+
231
+ @property
232
+ @abstractmethod
233
+ def file_permissions_schema(self) -> Dict[str, Any]:
234
+ """
235
+ This function should return the permissions schema for file permissions stream.
236
+
237
+ e.g.
238
+ def file_permissions_schema(self) -> Dict[str, Any]:
239
+ # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
240
+ return {
241
+ "type": "object",
242
+ "properties": {
243
+ "id": { "type": "string" },
244
+ "file_path": { "type": "string" },
245
+ "access_control_list": {
246
+ "type": "array",
247
+ "items": { "type": "string" }
248
+ },
249
+ "publicly_accessible": { "type": "boolean" }
250
+ }
251
+ }
252
+ """
253
+ raise NotImplementedError(
254
+ f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
255
+ )
256
+
257
+ @property
258
+ @abstractmethod
259
+ def identities_schema(self) -> Dict[str, Any]:
260
+ """
261
+ This function should return the identities schema for file identity stream.
262
+
263
+ e.g.
264
+ def identities_schema(self) -> Dict[str, Any]:
265
+ # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
266
+ return {
267
+ "type": "object",
268
+ "properties": {
269
+ "id": { "type": "string" },
270
+ "remote_id": { "type": "string" },
271
+ "name": { "type": ["null", "string"] },
272
+ "email_address": { "type": ["null", "string"] },
273
+ "member_email_addresses": { "type": ["null", "array"] },
274
+ "type": { "type": "string" },
275
+ }
276
+ }
277
+ """
278
+ raise NotImplementedError(
279
+ f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
280
+ )
@@ -1,4 +1,13 @@
1
1
  from airbyte_cdk.sources.file_based.stream.abstract_file_based_stream import AbstractFileBasedStream
2
2
  from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
3
+ from airbyte_cdk.sources.file_based.stream.identities_stream import FileIdentitiesStream
4
+ from airbyte_cdk.sources.file_based.stream.permissions_file_based_stream import (
5
+ PermissionsFileBasedStream,
6
+ )
3
7
 
4
- __all__ = ["AbstractFileBasedStream", "DefaultFileBasedStream"]
8
+ __all__ = [
9
+ "AbstractFileBasedStream",
10
+ "DefaultFileBasedStream",
11
+ "FileIdentitiesStream",
12
+ "PermissionsFileBasedStream",
13
+ ]
@@ -0,0 +1,47 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from functools import cache
6
+ from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional
7
+
8
+ from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
9
+ from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
10
+ from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector
11
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
12
+ from airbyte_cdk.sources.streams.core import JsonSchema
13
+ from airbyte_cdk.sources.streams.permissions.identities_stream import IdentitiesStream
14
+
15
+
16
+ class FileIdentitiesStream(IdentitiesStream):
17
+ """
18
+ The identities stream. A full refresh stream to sync identities from a certain domain.
19
+ The stream reader manage the logic to get such data, which is implemented on connector side.
20
+ """
21
+
22
+ is_resumable = False
23
+
24
+ def __init__(
25
+ self,
26
+ catalog_schema: Optional[Mapping[str, Any]],
27
+ stream_reader: AbstractFileBasedStreamReader,
28
+ discovery_policy: AbstractDiscoveryPolicy,
29
+ errors_collector: FileBasedErrorsCollector,
30
+ ) -> None:
31
+ super().__init__()
32
+ self.catalog_schema = catalog_schema
33
+ self.stream_reader = stream_reader
34
+ self._discovery_policy = discovery_policy
35
+ self.errors_collector = errors_collector
36
+ self._cursor: MutableMapping[str, Any] = {}
37
+
38
+ @property
39
+ def primary_key(self) -> PrimaryKeyType:
40
+ return None
41
+
42
+ def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
43
+ return self.stream_reader.load_identity_groups(logger=self.logger)
44
+
45
+ @cache
46
+ def get_json_schema(self) -> JsonSchema:
47
+ return self.stream_reader.identities_schema
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import traceback
6
+ from typing import Any, Dict, Iterable
7
+
8
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
9
+ from airbyte_cdk.models import Type as MessageType
10
+ from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
11
+ from airbyte_cdk.sources.file_based.types import StreamSlice
12
+ from airbyte_cdk.sources.streams.core import JsonSchema
13
+ from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
14
+
15
+
16
+ class PermissionsFileBasedStream(DefaultFileBasedStream):
17
+ """
18
+ A specialized stream for handling file-based ACL permissions.
19
+
20
+ This stream works with the stream_reader to:
21
+ 1. Fetch ACL permissions for each file in the source
22
+ 2. Transform permissions into a standardized format
23
+ 3. Generate records containing permission information
24
+
25
+ The stream_reader is responsible for the actual implementation of permission retrieval
26
+ and schema definition, while this class handles the streaming interface.
27
+ """
28
+
29
+ def _filter_schema_invalid_properties(
30
+ self, configured_catalog_json_schema: Dict[str, Any]
31
+ ) -> Dict[str, Any]:
32
+ return self.stream_reader.file_permissions_schema
33
+
34
+ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
35
+ """
36
+ Yield permissions records from all remote files
37
+ """
38
+
39
+ for file in stream_slice["files"]:
40
+ no_permissions = False
41
+ file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
42
+ try:
43
+ permissions_record = self.stream_reader.get_file_acl_permissions(
44
+ file, logger=self.logger
45
+ )
46
+ if not permissions_record:
47
+ no_permissions = True
48
+ self.logger.warning(
49
+ f"Unable to fetch permissions. stream={self.name} file={file.uri}"
50
+ )
51
+ continue
52
+ permissions_record = self.transform_record(
53
+ permissions_record, file, file_datetime_string
54
+ )
55
+ yield stream_data_to_airbyte_message(
56
+ self.name, permissions_record, is_file_transfer_message=False
57
+ )
58
+ except Exception as e:
59
+ self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
60
+ yield AirbyteMessage(
61
+ type=MessageType.LOG,
62
+ log=AirbyteLogMessage(
63
+ level=Level.ERROR,
64
+ message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
65
+ stack_trace=traceback.format_exc(),
66
+ ),
67
+ )
68
+ finally:
69
+ if no_permissions:
70
+ yield AirbyteMessage(
71
+ type=MessageType.LOG,
72
+ log=AirbyteLogMessage(
73
+ level=Level.WARN,
74
+ message=f"Unable to fetch permissions. stream={self.name} file={file.uri}",
75
+ ),
76
+ )
77
+
78
+ def _get_raw_json_schema(self) -> JsonSchema:
79
+ """
80
+ Retrieve the raw JSON schema for file permissions from the stream reader.
81
+
82
+ Returns:
83
+ The file permissions schema that defines the structure of permission records
84
+ """
85
+ return self.stream_reader.file_permissions_schema
@@ -0,0 +1,26 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Literal
6
+
7
+ from pydantic.v1 import AnyUrl, BaseModel, Field
8
+
9
+ from airbyte_cdk import OneOfOptionConfig
10
+
11
+
12
+ class DeliverPermissions(BaseModel):
13
+ class Config(OneOfOptionConfig):
14
+ title = "Replicate Permissions ACL"
15
+ description = "Sends one identity stream and one for more permissions (ACL) streams to the destination. This data can be used in downstream systems to recreate permission restrictions mirroring the original source."
16
+ discriminator = "delivery_type"
17
+
18
+ delivery_type: Literal["use_permissions_transfer"] = Field(
19
+ "use_permissions_transfer", const=True
20
+ )
21
+
22
+ include_identities_stream: bool = Field(
23
+ title="Include Identity Stream",
24
+ description="This data can be used in downstream systems to recreate permission restrictions mirroring the original source",
25
+ default=True,
26
+ )