airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
6
|
+
AbstractFileBasedSpec,
|
7
|
+
DeliverRawFiles,
|
8
|
+
)
|
9
|
+
from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
|
10
|
+
|
11
|
+
DELIVERY_TYPE_KEY = "delivery_type"
|
12
|
+
DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE = "use_permissions_transfer"
|
13
|
+
DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE = "use_file_transfer"
|
14
|
+
PRESERVE_DIRECTORY_STRUCTURE_KEY = "preserve_directory_structure"
|
15
|
+
INCLUDE_IDENTITIES_STREAM_KEY = "include_identities_stream"
|
16
|
+
|
17
|
+
|
18
|
+
def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
|
19
|
+
"""Returns `True` if the configuration uses file transfer mode."""
|
20
|
+
return (
|
21
|
+
hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
|
22
|
+
and parsed_config.delivery_method.delivery_type == DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
|
27
|
+
"""
|
28
|
+
Determines whether to preserve directory structure during file transfer.
|
29
|
+
|
30
|
+
When enabled, files maintain their subdirectory paths in the destination.
|
31
|
+
When disabled, files are flattened to the root of the destination.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
parsed_config: The parsed configuration containing delivery method settings
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
True if directory structure should be preserved (default), False otherwise
|
38
|
+
"""
|
39
|
+
if (
|
40
|
+
use_file_transfer(parsed_config)
|
41
|
+
and hasattr(parsed_config.delivery_method, PRESERVE_DIRECTORY_STRUCTURE_KEY)
|
42
|
+
and isinstance(parsed_config.delivery_method, DeliverRawFiles)
|
43
|
+
):
|
44
|
+
return parsed_config.delivery_method.preserve_directory_structure
|
45
|
+
return True
|
46
|
+
|
47
|
+
|
48
|
+
def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
|
49
|
+
"""
|
50
|
+
Determines whether to use permissions transfer to sync ACLs and Identities
|
51
|
+
|
52
|
+
Args:
|
53
|
+
parsed_config: The parsed configuration containing delivery method settings
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
True if permissions transfer should be enabled, False otherwise
|
57
|
+
"""
|
58
|
+
return (
|
59
|
+
hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
|
60
|
+
and parsed_config.delivery_method.delivery_type
|
61
|
+
== DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
|
66
|
+
"""
|
67
|
+
There are scenarios where user may not have access to identities but still is valuable to get ACLs
|
68
|
+
|
69
|
+
Args:
|
70
|
+
parsed_config: The parsed configuration containing delivery method settings
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
True if we should include Identities stream.
|
74
|
+
"""
|
75
|
+
if (
|
76
|
+
use_permissions_transfer(parsed_config)
|
77
|
+
and hasattr(parsed_config.delivery_method, INCLUDE_IDENTITIES_STREAM_KEY)
|
78
|
+
and isinstance(parsed_config.delivery_method, DeliverPermissions)
|
79
|
+
):
|
80
|
+
return parsed_config.delivery_method.include_identities_stream
|
81
|
+
return False
|
@@ -33,6 +33,12 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
|
33
33
|
FileBasedStreamConfig,
|
34
34
|
ValidationPolicy,
|
35
35
|
)
|
36
|
+
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
37
|
+
include_identities_stream,
|
38
|
+
preserve_directory_structure,
|
39
|
+
use_file_transfer,
|
40
|
+
use_permissions_transfer,
|
41
|
+
)
|
36
42
|
from airbyte_cdk.sources.file_based.discovery_policy import (
|
37
43
|
AbstractDiscoveryPolicy,
|
38
44
|
DefaultDiscoveryPolicy,
|
@@ -49,7 +55,12 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
|
|
49
55
|
DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
50
56
|
AbstractSchemaValidationPolicy,
|
51
57
|
)
|
52
|
-
from airbyte_cdk.sources.file_based.stream import
|
58
|
+
from airbyte_cdk.sources.file_based.stream import (
|
59
|
+
AbstractFileBasedStream,
|
60
|
+
DefaultFileBasedStream,
|
61
|
+
FileIdentitiesStream,
|
62
|
+
PermissionsFileBasedStream,
|
63
|
+
)
|
53
64
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
|
54
65
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
55
66
|
AbstractConcurrentFileBasedCursor,
|
@@ -66,6 +77,7 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
66
77
|
DEFAULT_CONCURRENCY = 100
|
67
78
|
MAX_CONCURRENCY = 100
|
68
79
|
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
|
80
|
+
IDENTITIES_STREAM = "identities"
|
69
81
|
|
70
82
|
|
71
83
|
class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
@@ -157,13 +169,20 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
157
169
|
errors = []
|
158
170
|
tracebacks = []
|
159
171
|
for stream in streams:
|
172
|
+
if isinstance(stream, FileIdentitiesStream):
|
173
|
+
identity = next(iter(stream.load_identity_groups()))
|
174
|
+
if not identity:
|
175
|
+
errors.append(
|
176
|
+
"Unable to get identities for current configuration, please check your credentials"
|
177
|
+
)
|
178
|
+
continue
|
160
179
|
if not isinstance(stream, AbstractFileBasedStream):
|
161
180
|
raise ValueError(f"Stream {stream} is not a file-based stream.")
|
162
181
|
try:
|
163
182
|
parsed_config = self._get_parsed_config(config)
|
164
183
|
availability_method = (
|
165
184
|
stream.availability_strategy.check_availability
|
166
|
-
if
|
185
|
+
if use_file_transfer(parsed_config) or use_permissions_transfer(parsed_config)
|
167
186
|
else stream.availability_strategy.check_availability_and_parsability
|
168
187
|
)
|
169
188
|
(
|
@@ -239,7 +258,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
239
258
|
message_repository=self.message_repository,
|
240
259
|
)
|
241
260
|
stream = FileBasedStreamFacade.create_from_stream(
|
242
|
-
stream=self.
|
261
|
+
stream=self._make_file_based_stream(
|
243
262
|
stream_config=stream_config,
|
244
263
|
cursor=cursor,
|
245
264
|
parsed_config=parsed_config,
|
@@ -270,7 +289,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
270
289
|
CursorField(DefaultFileBasedStream.ab_last_mod_col),
|
271
290
|
)
|
272
291
|
stream = FileBasedStreamFacade.create_from_stream(
|
273
|
-
stream=self.
|
292
|
+
stream=self._make_file_based_stream(
|
274
293
|
stream_config=stream_config,
|
275
294
|
cursor=cursor,
|
276
295
|
parsed_config=parsed_config,
|
@@ -282,13 +301,17 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
282
301
|
)
|
283
302
|
else:
|
284
303
|
cursor = self.cursor_cls(stream_config)
|
285
|
-
stream = self.
|
304
|
+
stream = self._make_file_based_stream(
|
286
305
|
stream_config=stream_config,
|
287
306
|
cursor=cursor,
|
288
307
|
parsed_config=parsed_config,
|
289
308
|
)
|
290
309
|
|
291
310
|
streams.append(stream)
|
311
|
+
|
312
|
+
if include_identities_stream(parsed_config):
|
313
|
+
identities_stream = self._make_identities_stream()
|
314
|
+
streams.append(identities_stream)
|
292
315
|
return streams
|
293
316
|
|
294
317
|
except ValidationError as exc:
|
@@ -310,8 +333,48 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
310
333
|
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
311
334
|
errors_collector=self.errors_collector,
|
312
335
|
cursor=cursor,
|
313
|
-
use_file_transfer=
|
314
|
-
preserve_directory_structure=
|
336
|
+
use_file_transfer=use_file_transfer(parsed_config),
|
337
|
+
preserve_directory_structure=preserve_directory_structure(parsed_config),
|
338
|
+
)
|
339
|
+
|
340
|
+
def _make_permissions_stream(
|
341
|
+
self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
|
342
|
+
) -> AbstractFileBasedStream:
|
343
|
+
return PermissionsFileBasedStream(
|
344
|
+
config=stream_config,
|
345
|
+
catalog_schema=self.stream_schemas.get(stream_config.name),
|
346
|
+
stream_reader=self.stream_reader,
|
347
|
+
availability_strategy=self.availability_strategy,
|
348
|
+
discovery_policy=self.discovery_policy,
|
349
|
+
parsers=self.parsers,
|
350
|
+
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
351
|
+
errors_collector=self.errors_collector,
|
352
|
+
cursor=cursor,
|
353
|
+
)
|
354
|
+
|
355
|
+
def _make_file_based_stream(
|
356
|
+
self,
|
357
|
+
stream_config: FileBasedStreamConfig,
|
358
|
+
cursor: Optional[AbstractFileBasedCursor],
|
359
|
+
parsed_config: AbstractFileBasedSpec,
|
360
|
+
) -> AbstractFileBasedStream:
|
361
|
+
"""
|
362
|
+
Creates different streams depending on the type of the transfer mode selected
|
363
|
+
"""
|
364
|
+
if use_permissions_transfer(parsed_config):
|
365
|
+
return self._make_permissions_stream(stream_config, cursor)
|
366
|
+
# we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
|
367
|
+
else:
|
368
|
+
return self._make_default_stream(stream_config, cursor, parsed_config)
|
369
|
+
|
370
|
+
def _make_identities_stream(
|
371
|
+
self,
|
372
|
+
) -> Stream:
|
373
|
+
return FileIdentitiesStream(
|
374
|
+
catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
|
375
|
+
stream_reader=self.stream_reader,
|
376
|
+
discovery_policy=self.discovery_policy,
|
377
|
+
errors_collector=self.errors_collector,
|
315
378
|
)
|
316
379
|
|
317
380
|
def _get_stream_from_catalog(
|
@@ -378,33 +441,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
378
441
|
"`input_schema` and `schemaless` options cannot both be set",
|
379
442
|
model=FileBasedStreamConfig,
|
380
443
|
)
|
381
|
-
|
382
|
-
@staticmethod
|
383
|
-
def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
|
384
|
-
use_file_transfer = (
|
385
|
-
hasattr(parsed_config.delivery_method, "delivery_type")
|
386
|
-
and parsed_config.delivery_method.delivery_type == "use_file_transfer"
|
387
|
-
)
|
388
|
-
return use_file_transfer
|
389
|
-
|
390
|
-
@staticmethod
|
391
|
-
def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
|
392
|
-
"""
|
393
|
-
Determines whether to preserve directory structure during file transfer.
|
394
|
-
|
395
|
-
When enabled, files maintain their subdirectory paths in the destination.
|
396
|
-
When disabled, files are flattened to the root of the destination.
|
397
|
-
|
398
|
-
Args:
|
399
|
-
parsed_config: The parsed configuration containing delivery method settings
|
400
|
-
|
401
|
-
Returns:
|
402
|
-
True if directory structure should be preserved (default), False otherwise
|
403
|
-
"""
|
404
|
-
if (
|
405
|
-
FileBasedSource._use_file_transfer(parsed_config)
|
406
|
-
and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
|
407
|
-
and parsed_config.delivery_method.preserve_directory_structure is not None
|
408
|
-
):
|
409
|
-
return parsed_config.delivery_method.preserve_directory_structure
|
410
|
-
return True
|
@@ -13,6 +13,11 @@ from typing import Any, Dict, Iterable, List, Optional, Set
|
|
13
13
|
from wcmatch.glob import GLOBSTAR, globmatch
|
14
14
|
|
15
15
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
16
|
+
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
17
|
+
include_identities_stream,
|
18
|
+
preserve_directory_structure,
|
19
|
+
use_file_transfer,
|
20
|
+
)
|
16
21
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
17
22
|
|
18
23
|
|
@@ -128,24 +133,20 @@ class AbstractFileBasedStreamReader(ABC):
|
|
128
133
|
|
129
134
|
def use_file_transfer(self) -> bool:
|
130
135
|
if self.config:
|
131
|
-
use_file_transfer
|
132
|
-
hasattr(self.config.delivery_method, "delivery_type")
|
133
|
-
and self.config.delivery_method.delivery_type == "use_file_transfer"
|
134
|
-
)
|
135
|
-
return use_file_transfer
|
136
|
+
return use_file_transfer(self.config)
|
136
137
|
return False
|
137
138
|
|
138
139
|
def preserve_directory_structure(self) -> bool:
|
139
140
|
# fall back to preserve subdirectories if config is not present or incomplete
|
140
|
-
if
|
141
|
-
self.
|
142
|
-
and self.config
|
143
|
-
and hasattr(self.config.delivery_method, "preserve_directory_structure")
|
144
|
-
and self.config.delivery_method.preserve_directory_structure is not None
|
145
|
-
):
|
146
|
-
return self.config.delivery_method.preserve_directory_structure
|
141
|
+
if self.config:
|
142
|
+
return preserve_directory_structure(self.config)
|
147
143
|
return True
|
148
144
|
|
145
|
+
def include_identities_stream(self) -> bool:
|
146
|
+
if self.config:
|
147
|
+
return include_identities_stream(self.config)
|
148
|
+
return False
|
149
|
+
|
149
150
|
@abstractmethod
|
150
151
|
def get_file(
|
151
152
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
@@ -183,3 +184,97 @@ class AbstractFileBasedStreamReader(ABC):
|
|
183
184
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
184
185
|
absolute_file_path = path.abspath(local_file_path)
|
185
186
|
return [file_relative_path, local_file_path, absolute_file_path]
|
187
|
+
|
188
|
+
@abstractmethod
|
189
|
+
def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
190
|
+
"""
|
191
|
+
This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
|
192
|
+
|
193
|
+
e.g.
|
194
|
+
def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
|
195
|
+
api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
|
196
|
+
result = api_conn.get_file_permissions_info(file.id)
|
197
|
+
return MyPermissionsModel(
|
198
|
+
id=result["id"],
|
199
|
+
access_control_list = result["access_control_list"],
|
200
|
+
is_public = result["is_public"],
|
201
|
+
).dict()
|
202
|
+
"""
|
203
|
+
raise NotImplementedError(
|
204
|
+
f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
|
205
|
+
)
|
206
|
+
|
207
|
+
@abstractmethod
|
208
|
+
def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
|
209
|
+
"""
|
210
|
+
This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
|
211
|
+
|
212
|
+
e.g.
|
213
|
+
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
214
|
+
api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
|
215
|
+
users_api = api_conn.users()
|
216
|
+
groups_api = api_conn.groups()
|
217
|
+
members_api = self.google_directory_service.members()
|
218
|
+
for user in users_api.list():
|
219
|
+
yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
|
220
|
+
for group in groups_api.list():
|
221
|
+
group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
|
222
|
+
for member in members_api.list(group=group):
|
223
|
+
group_obj.member_email_addresses = group_obj.member_email_addresses or []
|
224
|
+
group_obj.member_email_addresses.append(member.email)
|
225
|
+
yield group_obj.dict()
|
226
|
+
"""
|
227
|
+
raise NotImplementedError(
|
228
|
+
f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
|
229
|
+
)
|
230
|
+
|
231
|
+
@property
|
232
|
+
@abstractmethod
|
233
|
+
def file_permissions_schema(self) -> Dict[str, Any]:
|
234
|
+
"""
|
235
|
+
This function should return the permissions schema for file permissions stream.
|
236
|
+
|
237
|
+
e.g.
|
238
|
+
def file_permissions_schema(self) -> Dict[str, Any]:
|
239
|
+
# you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
|
240
|
+
return {
|
241
|
+
"type": "object",
|
242
|
+
"properties": {
|
243
|
+
"id": { "type": "string" },
|
244
|
+
"file_path": { "type": "string" },
|
245
|
+
"access_control_list": {
|
246
|
+
"type": "array",
|
247
|
+
"items": { "type": "string" }
|
248
|
+
},
|
249
|
+
"publicly_accessible": { "type": "boolean" }
|
250
|
+
}
|
251
|
+
}
|
252
|
+
"""
|
253
|
+
raise NotImplementedError(
|
254
|
+
f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
|
255
|
+
)
|
256
|
+
|
257
|
+
@property
|
258
|
+
@abstractmethod
|
259
|
+
def identities_schema(self) -> Dict[str, Any]:
|
260
|
+
"""
|
261
|
+
This function should return the identities schema for file identity stream.
|
262
|
+
|
263
|
+
e.g.
|
264
|
+
def identities_schema(self) -> Dict[str, Any]:
|
265
|
+
# you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
|
266
|
+
return {
|
267
|
+
"type": "object",
|
268
|
+
"properties": {
|
269
|
+
"id": { "type": "string" },
|
270
|
+
"remote_id": { "type": "string" },
|
271
|
+
"name": { "type": ["null", "string"] },
|
272
|
+
"email_address": { "type": ["null", "string"] },
|
273
|
+
"member_email_addresses": { "type": ["null", "array"] },
|
274
|
+
"type": { "type": "string" },
|
275
|
+
}
|
276
|
+
}
|
277
|
+
"""
|
278
|
+
raise NotImplementedError(
|
279
|
+
f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
|
280
|
+
)
|
@@ -1,4 +1,13 @@
|
|
1
1
|
from airbyte_cdk.sources.file_based.stream.abstract_file_based_stream import AbstractFileBasedStream
|
2
2
|
from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
|
3
|
+
from airbyte_cdk.sources.file_based.stream.identities_stream import FileIdentitiesStream
|
4
|
+
from airbyte_cdk.sources.file_based.stream.permissions_file_based_stream import (
|
5
|
+
PermissionsFileBasedStream,
|
6
|
+
)
|
3
7
|
|
4
|
-
__all__ = [
|
8
|
+
__all__ = [
|
9
|
+
"AbstractFileBasedStream",
|
10
|
+
"DefaultFileBasedStream",
|
11
|
+
"FileIdentitiesStream",
|
12
|
+
"PermissionsFileBasedStream",
|
13
|
+
]
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from functools import cache
|
6
|
+
from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
|
9
|
+
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
|
10
|
+
from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector
|
11
|
+
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
12
|
+
from airbyte_cdk.sources.streams.core import JsonSchema
|
13
|
+
from airbyte_cdk.sources.streams.permissions.identities_stream import IdentitiesStream
|
14
|
+
|
15
|
+
|
16
|
+
class FileIdentitiesStream(IdentitiesStream):
|
17
|
+
"""
|
18
|
+
The identities stream. A full refresh stream to sync identities from a certain domain.
|
19
|
+
The stream reader manage the logic to get such data, which is implemented on connector side.
|
20
|
+
"""
|
21
|
+
|
22
|
+
is_resumable = False
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
catalog_schema: Optional[Mapping[str, Any]],
|
27
|
+
stream_reader: AbstractFileBasedStreamReader,
|
28
|
+
discovery_policy: AbstractDiscoveryPolicy,
|
29
|
+
errors_collector: FileBasedErrorsCollector,
|
30
|
+
) -> None:
|
31
|
+
super().__init__()
|
32
|
+
self.catalog_schema = catalog_schema
|
33
|
+
self.stream_reader = stream_reader
|
34
|
+
self._discovery_policy = discovery_policy
|
35
|
+
self.errors_collector = errors_collector
|
36
|
+
self._cursor: MutableMapping[str, Any] = {}
|
37
|
+
|
38
|
+
@property
|
39
|
+
def primary_key(self) -> PrimaryKeyType:
|
40
|
+
return None
|
41
|
+
|
42
|
+
def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
|
43
|
+
return self.stream_reader.load_identity_groups(logger=self.logger)
|
44
|
+
|
45
|
+
@cache
|
46
|
+
def get_json_schema(self) -> JsonSchema:
|
47
|
+
return self.stream_reader.identities_schema
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import traceback
|
6
|
+
from typing import Any, Dict, Iterable
|
7
|
+
|
8
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
9
|
+
from airbyte_cdk.models import Type as MessageType
|
10
|
+
from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
|
11
|
+
from airbyte_cdk.sources.file_based.types import StreamSlice
|
12
|
+
from airbyte_cdk.sources.streams.core import JsonSchema
|
13
|
+
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
14
|
+
|
15
|
+
|
16
|
+
class PermissionsFileBasedStream(DefaultFileBasedStream):
|
17
|
+
"""
|
18
|
+
A specialized stream for handling file-based ACL permissions.
|
19
|
+
|
20
|
+
This stream works with the stream_reader to:
|
21
|
+
1. Fetch ACL permissions for each file in the source
|
22
|
+
2. Transform permissions into a standardized format
|
23
|
+
3. Generate records containing permission information
|
24
|
+
|
25
|
+
The stream_reader is responsible for the actual implementation of permission retrieval
|
26
|
+
and schema definition, while this class handles the streaming interface.
|
27
|
+
"""
|
28
|
+
|
29
|
+
def _filter_schema_invalid_properties(
|
30
|
+
self, configured_catalog_json_schema: Dict[str, Any]
|
31
|
+
) -> Dict[str, Any]:
|
32
|
+
return self.stream_reader.file_permissions_schema
|
33
|
+
|
34
|
+
def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
|
35
|
+
"""
|
36
|
+
Yield permissions records from all remote files
|
37
|
+
"""
|
38
|
+
|
39
|
+
for file in stream_slice["files"]:
|
40
|
+
no_permissions = False
|
41
|
+
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
42
|
+
try:
|
43
|
+
permissions_record = self.stream_reader.get_file_acl_permissions(
|
44
|
+
file, logger=self.logger
|
45
|
+
)
|
46
|
+
if not permissions_record:
|
47
|
+
no_permissions = True
|
48
|
+
self.logger.warning(
|
49
|
+
f"Unable to fetch permissions. stream={self.name} file={file.uri}"
|
50
|
+
)
|
51
|
+
continue
|
52
|
+
permissions_record = self.transform_record(
|
53
|
+
permissions_record, file, file_datetime_string
|
54
|
+
)
|
55
|
+
yield stream_data_to_airbyte_message(
|
56
|
+
self.name, permissions_record, is_file_transfer_message=False
|
57
|
+
)
|
58
|
+
except Exception as e:
|
59
|
+
self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
|
60
|
+
yield AirbyteMessage(
|
61
|
+
type=MessageType.LOG,
|
62
|
+
log=AirbyteLogMessage(
|
63
|
+
level=Level.ERROR,
|
64
|
+
message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
|
65
|
+
stack_trace=traceback.format_exc(),
|
66
|
+
),
|
67
|
+
)
|
68
|
+
finally:
|
69
|
+
if no_permissions:
|
70
|
+
yield AirbyteMessage(
|
71
|
+
type=MessageType.LOG,
|
72
|
+
log=AirbyteLogMessage(
|
73
|
+
level=Level.WARN,
|
74
|
+
message=f"Unable to fetch permissions. stream={self.name} file={file.uri}",
|
75
|
+
),
|
76
|
+
)
|
77
|
+
|
78
|
+
def _get_raw_json_schema(self) -> JsonSchema:
|
79
|
+
"""
|
80
|
+
Retrieve the raw JSON schema for file permissions from the stream reader.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
The file permissions schema that defines the structure of permission records
|
84
|
+
"""
|
85
|
+
return self.stream_reader.file_permissions_schema
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Literal
|
6
|
+
|
7
|
+
from pydantic.v1 import AnyUrl, BaseModel, Field
|
8
|
+
|
9
|
+
from airbyte_cdk import OneOfOptionConfig
|
10
|
+
|
11
|
+
|
12
|
+
class DeliverPermissions(BaseModel):
|
13
|
+
class Config(OneOfOptionConfig):
|
14
|
+
title = "Replicate Permissions ACL"
|
15
|
+
description = "Sends one identity stream and one for more permissions (ACL) streams to the destination. This data can be used in downstream systems to recreate permission restrictions mirroring the original source."
|
16
|
+
discriminator = "delivery_type"
|
17
|
+
|
18
|
+
delivery_type: Literal["use_permissions_transfer"] = Field(
|
19
|
+
"use_permissions_transfer", const=True
|
20
|
+
)
|
21
|
+
|
22
|
+
include_identities_stream: bool = Field(
|
23
|
+
title="Include Identity Stream",
|
24
|
+
description="This data can be used in downstream systems to recreate permission restrictions mirroring the original source",
|
25
|
+
default=True,
|
26
|
+
)
|