airbyte-cdk 6.26.0.dev4109__py3-none-any.whl → 6.26.0.dev4110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/file_based/file_based_source.py +32 -4
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +0 -31
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +54 -0
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/RECORD +9 -8
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/entry_points.txt +0 -0
@@ -67,6 +67,9 @@ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
|
67
67
|
FileBasedFinalStateCursor,
|
68
68
|
)
|
69
69
|
from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
70
|
+
from airbyte_cdk.sources.file_based.stream.permissions_file_based_stream import (
|
71
|
+
PermissionsFileBasedStream,
|
72
|
+
)
|
70
73
|
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
|
71
74
|
from airbyte_cdk.sources.streams import Stream
|
72
75
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -257,7 +260,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
257
260
|
message_repository=self.message_repository,
|
258
261
|
)
|
259
262
|
stream = FileBasedStreamFacade.create_from_stream(
|
260
|
-
stream=self.
|
263
|
+
stream=self._make_file_based_stream(
|
261
264
|
stream_config=stream_config,
|
262
265
|
cursor=cursor,
|
263
266
|
parsed_config=parsed_config,
|
@@ -288,7 +291,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
288
291
|
CursorField(DefaultFileBasedStream.ab_last_mod_col),
|
289
292
|
)
|
290
293
|
stream = FileBasedStreamFacade.create_from_stream(
|
291
|
-
stream=self.
|
294
|
+
stream=self._make_file_based_stream(
|
292
295
|
stream_config=stream_config,
|
293
296
|
cursor=cursor,
|
294
297
|
parsed_config=parsed_config,
|
@@ -300,7 +303,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
300
303
|
)
|
301
304
|
else:
|
302
305
|
cursor = self.cursor_cls(stream_config)
|
303
|
-
stream = self.
|
306
|
+
stream = self._make_file_based_stream(
|
304
307
|
stream_config=stream_config,
|
305
308
|
cursor=cursor,
|
306
309
|
parsed_config=parsed_config,
|
@@ -334,9 +337,34 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
334
337
|
cursor=cursor,
|
335
338
|
use_file_transfer=use_file_transfer(parsed_config),
|
336
339
|
preserve_directory_structure=preserve_directory_structure(parsed_config),
|
337
|
-
use_permissions_transfer=use_permissions_transfer(parsed_config),
|
338
340
|
)
|
339
341
|
|
342
|
+
def _make_permissions_stream(
|
343
|
+
self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
|
344
|
+
) -> AbstractFileBasedStream:
|
345
|
+
return PermissionsFileBasedStream(
|
346
|
+
config=stream_config,
|
347
|
+
catalog_schema=self.stream_schemas.get(stream_config.name),
|
348
|
+
stream_reader=self.stream_reader,
|
349
|
+
availability_strategy=self.availability_strategy,
|
350
|
+
discovery_policy=self.discovery_policy,
|
351
|
+
parsers=self.parsers,
|
352
|
+
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
353
|
+
errors_collector=self.errors_collector,
|
354
|
+
cursor=cursor,
|
355
|
+
)
|
356
|
+
|
357
|
+
def _make_file_based_stream(
|
358
|
+
self,
|
359
|
+
stream_config: FileBasedStreamConfig,
|
360
|
+
cursor: Optional[AbstractFileBasedCursor],
|
361
|
+
parsed_config: AbstractFileBasedSpec,
|
362
|
+
) -> AbstractFileBasedStream:
|
363
|
+
if use_permissions_transfer(parsed_config):
|
364
|
+
return self._make_permissions_stream(stream_config, cursor)
|
365
|
+
else:
|
366
|
+
return self._make_default_stream(stream_config, cursor, parsed_config)
|
367
|
+
|
340
368
|
def _make_identities_stream(
|
341
369
|
self,
|
342
370
|
) -> Stream:
|
@@ -47,7 +47,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
47
47
|
|
48
48
|
FILE_TRANSFER_KW = "use_file_transfer"
|
49
49
|
PRESERVE_DIRECTORY_STRUCTURE_KW = "preserve_directory_structure"
|
50
|
-
PERMISSIONS_TRANSFER_KW = "use_permissions_transfer"
|
51
50
|
FILES_KEY = "files"
|
52
51
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
53
52
|
ab_last_mod_col = "_ab_source_file_last_modified"
|
@@ -57,7 +56,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
57
56
|
airbyte_columns = [ab_last_mod_col, ab_file_name_col]
|
58
57
|
use_file_transfer = False
|
59
58
|
preserve_directory_structure = True
|
60
|
-
use_permissions_transfer = False
|
61
59
|
|
62
60
|
def __init__(self, **kwargs: Any):
|
63
61
|
if self.FILE_TRANSFER_KW in kwargs:
|
@@ -66,8 +64,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
66
64
|
self.preserve_directory_structure = kwargs.pop(
|
67
65
|
self.PRESERVE_DIRECTORY_STRUCTURE_KW, True
|
68
66
|
)
|
69
|
-
if self.PERMISSIONS_TRANSFER_KW in kwargs:
|
70
|
-
self.use_permissions_transfer = kwargs.pop(self.PERMISSIONS_TRANSFER_KW, False)
|
71
67
|
super().__init__(**kwargs)
|
72
68
|
|
73
69
|
@property
|
@@ -109,8 +105,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
109
105
|
self.ab_file_name_col: {"type": "string"},
|
110
106
|
},
|
111
107
|
}
|
112
|
-
elif self.use_permissions_transfer:
|
113
|
-
return self.stream_reader.file_permissions_schema
|
114
108
|
else:
|
115
109
|
return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
|
116
110
|
|
@@ -193,29 +187,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
193
187
|
yield stream_data_to_airbyte_message(
|
194
188
|
self.name, record, is_file_transfer_message=True
|
195
189
|
)
|
196
|
-
elif self.use_permissions_transfer:
|
197
|
-
try:
|
198
|
-
permissions_record = self.stream_reader.get_file_acl_permissions(
|
199
|
-
file, logger=self.logger
|
200
|
-
)
|
201
|
-
permissions_record = self.transform_record(
|
202
|
-
permissions_record, file, file_datetime_string
|
203
|
-
)
|
204
|
-
yield stream_data_to_airbyte_message(
|
205
|
-
self.name, permissions_record, is_file_transfer_message=False
|
206
|
-
)
|
207
|
-
except Exception as e:
|
208
|
-
self.logger.error(
|
209
|
-
f"Failed to retrieve permissions for file {file.uri}: {str(e)}"
|
210
|
-
)
|
211
|
-
yield AirbyteMessage(
|
212
|
-
type=MessageType.LOG,
|
213
|
-
log=AirbyteLogMessage(
|
214
|
-
level=Level.ERROR,
|
215
|
-
message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
|
216
|
-
stack_trace=traceback.format_exc(),
|
217
|
-
),
|
218
|
-
)
|
219
190
|
else:
|
220
191
|
for record in parser.parse_records(
|
221
192
|
self.config, file, self.stream_reader, self.logger, schema
|
@@ -313,8 +284,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
313
284
|
def _get_raw_json_schema(self) -> JsonSchema:
|
314
285
|
if self.use_file_transfer:
|
315
286
|
return file_transfer_schema
|
316
|
-
elif self.use_permissions_transfer:
|
317
|
-
return self.stream_reader.file_permissions_schema
|
318
287
|
elif self.config.input_schema:
|
319
288
|
return self.config.get_input_schema() # type: ignore
|
320
289
|
elif self.config.schemaless:
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import traceback
|
6
|
+
from typing import Any, Dict, Iterable
|
7
|
+
|
8
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
9
|
+
from airbyte_cdk.models import Type as MessageType
|
10
|
+
from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
|
11
|
+
from airbyte_cdk.sources.file_based.types import StreamSlice
|
12
|
+
from airbyte_cdk.sources.streams.core import JsonSchema
|
13
|
+
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
14
|
+
|
15
|
+
|
16
|
+
class PermissionsFileBasedStream(DefaultFileBasedStream):
|
17
|
+
"""
|
18
|
+
The permissions stream, stream_reader on source handles logic for schemas and ACLs permissions.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def _filter_schema_invalid_properties(
|
22
|
+
self, configured_catalog_json_schema: Dict[str, Any]
|
23
|
+
) -> Dict[str, Any]:
|
24
|
+
return self.stream_reader.file_permissions_schema
|
25
|
+
|
26
|
+
def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
|
27
|
+
"""
|
28
|
+
Yield permissions records from all remote files
|
29
|
+
"""
|
30
|
+
for file in stream_slice["files"]:
|
31
|
+
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
32
|
+
try:
|
33
|
+
permissions_record = self.stream_reader.get_file_acl_permissions(
|
34
|
+
file, logger=self.logger
|
35
|
+
)
|
36
|
+
permissions_record = self.transform_record(
|
37
|
+
permissions_record, file, file_datetime_string
|
38
|
+
)
|
39
|
+
yield stream_data_to_airbyte_message(
|
40
|
+
self.name, permissions_record, is_file_transfer_message=False
|
41
|
+
)
|
42
|
+
except Exception as e:
|
43
|
+
self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
|
44
|
+
yield AirbyteMessage(
|
45
|
+
type=MessageType.LOG,
|
46
|
+
log=AirbyteLogMessage(
|
47
|
+
level=Level.ERROR,
|
48
|
+
message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
|
49
|
+
stack_trace=traceback.format_exc(),
|
50
|
+
),
|
51
|
+
)
|
52
|
+
|
53
|
+
def _get_raw_json_schema(self) -> JsonSchema:
|
54
|
+
return self.stream_reader.file_permissions_schema
|
@@ -214,7 +214,7 @@ airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfra
|
|
214
214
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
|
215
215
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
|
216
216
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
|
217
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
217
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=nWWTWfwp6SSrcRcJZOWMMfu2Joi-PFZhHlreni-pVR4,18778
|
218
218
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=z_DNWGiKUQCgYO9vsOy40aNbTsU_QbOuSHMCGegNLuw,8409
|
219
219
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
|
220
220
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
|
@@ -241,8 +241,9 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
|
|
241
241
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
242
242
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
|
243
243
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
|
244
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
244
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
|
245
245
|
airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=KuhIVV0NFc6RhtzrAvswYvAO_dgPSYkWgOfqr4NAgQw,1765
|
246
|
+
airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=QTB3G-Bs6nK37W1yGAygHnUaNzVbF7KUuJyhkdMNKYo,2261
|
246
247
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
247
248
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
248
249
|
airbyte_cdk/sources/http_logger.py,sha256=l_1fk5YwdonZ1wvAsTwjj6d36fj2WrVraIAMj5jTQdM,1575
|
@@ -355,9 +356,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
355
356
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
356
357
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
357
358
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
358
|
-
airbyte_cdk-6.26.0.
|
359
|
-
airbyte_cdk-6.26.0.
|
360
|
-
airbyte_cdk-6.26.0.
|
361
|
-
airbyte_cdk-6.26.0.
|
362
|
-
airbyte_cdk-6.26.0.
|
363
|
-
airbyte_cdk-6.26.0.
|
359
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
360
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
361
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/METADATA,sha256=M8Mus_j1GU55csZYUEiFBsJ9sUcXMQYZGjP65pyv3gE,6018
|
362
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
363
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
364
|
+
airbyte_cdk-6.26.0.dev4110.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
{airbyte_cdk-6.26.0.dev4109.dist-info → airbyte_cdk-6.26.0.dev4110.dist-info}/entry_points.txt
RENAMED
File without changes
|