airbyte-cdk 6.38.3.dev4100__py3-none-any.whl → 6.38.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,9 +48,6 @@ from airbyte_cdk.sources.file_based.exceptions import (
48
48
  FileBasedErrorsCollector,
49
49
  FileBasedSourceError,
50
50
  )
51
- from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
52
- AbstractFileBasedStreamPermissionsReader,
53
- )
54
51
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
55
52
  from airbyte_cdk.sources.file_based.file_types import default_parsers
56
53
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
@@ -103,10 +100,8 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
103
100
  cursor_cls: Type[
104
101
  Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor]
105
102
  ] = FileBasedConcurrentCursor,
106
- stream_permissions_reader: Optional[AbstractFileBasedStreamPermissionsReader] = None,
107
103
  ):
108
104
  self.stream_reader = stream_reader
109
- self.stream_permissions_reader = stream_permissions_reader
110
105
  self.spec_class = spec_class
111
106
  self.config = config
112
107
  self.catalog = catalog
@@ -342,23 +337,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
342
337
  preserve_directory_structure=preserve_directory_structure(parsed_config),
343
338
  )
344
339
 
345
- def _ensure_permissions_reader_available(self) -> None:
346
- """
347
- Validates that a stream permissions reader is available.
348
- Raises a ValueError if the reader is not provided.
349
- """
350
- if not self.stream_permissions_reader:
351
- raise ValueError(
352
- "Stream permissions reader is required for streams that use permissions transfer mode."
353
- )
354
-
355
340
  def _make_permissions_stream(
356
341
  self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
357
342
  ) -> AbstractFileBasedStream:
358
- """
359
- Creates a stream that reads permissions from files.
360
- """
361
- self._ensure_permissions_reader_available()
362
343
  return PermissionsFileBasedStream(
363
344
  config=stream_config,
364
345
  catalog_schema=self.stream_schemas.get(stream_config.name),
@@ -369,7 +350,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
369
350
  validation_policy=self._validate_and_get_validation_policy(stream_config),
370
351
  errors_collector=self.errors_collector,
371
352
  cursor=cursor,
372
- stream_permissions_reader=self.stream_permissions_reader, # type: ignore
373
353
  )
374
354
 
375
355
  def _make_file_based_stream(
@@ -390,10 +370,9 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
390
370
  def _make_identities_stream(
391
371
  self,
392
372
  ) -> Stream:
393
- self._ensure_permissions_reader_available()
394
373
  return FileIdentitiesStream(
395
374
  catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
396
- stream_permissions_reader=self.stream_permissions_reader, # type: ignore
375
+ stream_reader=self.stream_reader,
397
376
  discovery_policy=self.discovery_policy,
398
377
  errors_collector=self.errors_collector,
399
378
  )
@@ -184,3 +184,97 @@ class AbstractFileBasedStreamReader(ABC):
184
184
  makedirs(path.dirname(local_file_path), exist_ok=True)
185
185
  absolute_file_path = path.abspath(local_file_path)
186
186
  return [file_relative_path, local_file_path, absolute_file_path]
187
+
188
+ @abstractmethod
189
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
190
+ """
191
+ This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
192
+
193
+ e.g.
194
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
195
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
196
+ result = api_conn.get_file_permissions_info(file.id)
197
+ return MyPermissionsModel(
198
+ id=result["id"],
199
+ access_control_list = result["access_control_list"],
200
+ is_public = result["is_public"],
201
+ ).dict()
202
+ """
203
+ raise NotImplementedError(
204
+ f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
205
+ )
206
+
207
+ @abstractmethod
208
+ def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
209
+ """
210
+ This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
211
+
212
+ e.g.
213
+ def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
214
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
215
+ users_api = api_conn.users()
216
+ groups_api = api_conn.groups()
217
+ members_api = self.google_directory_service.members()
218
+ for user in users_api.list():
219
+ yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
220
+ for group in groups_api.list():
221
+ group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
222
+ for member in members_api.list(group=group):
223
+ group_obj.member_email_addresses = group_obj.member_email_addresses or []
224
+ group_obj.member_email_addresses.append(member.email)
225
+ yield group_obj.dict()
226
+ """
227
+ raise NotImplementedError(
228
+ f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
229
+ )
230
+
231
+ @property
232
+ @abstractmethod
233
+ def file_permissions_schema(self) -> Dict[str, Any]:
234
+ """
235
+ This function should return the permissions schema for file permissions stream.
236
+
237
+ e.g.
238
+ def file_permissions_schema(self) -> Dict[str, Any]:
239
+ # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
240
+ return {
241
+ "type": "object",
242
+ "properties": {
243
+ "id": { "type": "string" },
244
+ "file_path": { "type": "string" },
245
+ "access_control_list": {
246
+ "type": "array",
247
+ "items": { "type": "string" }
248
+ },
249
+ "publicly_accessible": { "type": "boolean" }
250
+ }
251
+ }
252
+ """
253
+ raise NotImplementedError(
254
+ f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
255
+ )
256
+
257
+ @property
258
+ @abstractmethod
259
+ def identities_schema(self) -> Dict[str, Any]:
260
+ """
261
+ This function should return the identities schema for file identity stream.
262
+
263
+ e.g.
264
+ def identities_schema(self) -> Dict[str, Any]:
265
+ # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
266
+ return {
267
+ "type": "object",
268
+ "properties": {
269
+ "id": { "type": "string" },
270
+ "remote_id": { "type": "string" },
271
+ "name": { "type": ["null", "string"] },
272
+ "email_address": { "type": ["null", "string"] },
273
+ "member_email_addresses": { "type": ["null", "array"] },
274
+ "type": { "type": "string" },
275
+ }
276
+ }
277
+ """
278
+ raise NotImplementedError(
279
+ f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
280
+ )
@@ -8,9 +8,7 @@ from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional
8
8
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
9
9
  from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
10
10
  from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector
11
- from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
12
- AbstractFileBasedStreamPermissionsReader,
13
- )
11
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
14
12
  from airbyte_cdk.sources.streams.core import JsonSchema
15
13
  from airbyte_cdk.sources.streams.permissions.identities_stream import IdentitiesStream
16
14
 
@@ -26,13 +24,13 @@ class FileIdentitiesStream(IdentitiesStream):
26
24
  def __init__(
27
25
  self,
28
26
  catalog_schema: Optional[Mapping[str, Any]],
29
- stream_permissions_reader: AbstractFileBasedStreamPermissionsReader,
27
+ stream_reader: AbstractFileBasedStreamReader,
30
28
  discovery_policy: AbstractDiscoveryPolicy,
31
29
  errors_collector: FileBasedErrorsCollector,
32
30
  ) -> None:
33
31
  super().__init__()
34
32
  self.catalog_schema = catalog_schema
35
- self.stream_permissions_reader = stream_permissions_reader
33
+ self.stream_reader = stream_reader
36
34
  self._discovery_policy = discovery_policy
37
35
  self.errors_collector = errors_collector
38
36
  self._cursor: MutableMapping[str, Any] = {}
@@ -42,8 +40,8 @@ class FileIdentitiesStream(IdentitiesStream):
42
40
  return None
43
41
 
44
42
  def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
45
- return self.stream_permissions_reader.load_identity_groups(logger=self.logger)
43
+ return self.stream_reader.load_identity_groups(logger=self.logger)
46
44
 
47
45
  @cache
48
46
  def get_json_schema(self) -> JsonSchema:
49
- return self.stream_permissions_reader.identities_schema
47
+ return self.stream_reader.identities_schema
@@ -7,9 +7,6 @@ from typing import Any, Dict, Iterable
7
7
 
8
8
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
9
9
  from airbyte_cdk.models import Type as MessageType
10
- from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
11
- AbstractFileBasedStreamPermissionsReader,
12
- )
13
10
  from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
14
11
  from airbyte_cdk.sources.file_based.types import StreamSlice
15
12
  from airbyte_cdk.sources.streams.core import JsonSchema
@@ -29,16 +26,10 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
29
26
  and schema definition, while this class handles the streaming interface.
30
27
  """
31
28
 
32
- def __init__(
33
- self, stream_permissions_reader: AbstractFileBasedStreamPermissionsReader, **kwargs: Any
34
- ):
35
- super().__init__(**kwargs)
36
- self.stream_permissions_reader = stream_permissions_reader
37
-
38
29
  def _filter_schema_invalid_properties(
39
30
  self, configured_catalog_json_schema: Dict[str, Any]
40
31
  ) -> Dict[str, Any]:
41
- return self.stream_permissions_reader.file_permissions_schema
32
+ return self.stream_reader.file_permissions_schema
42
33
 
43
34
  def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
44
35
  """
@@ -49,7 +40,7 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
49
40
  no_permissions = False
50
41
  file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
51
42
  try:
52
- permissions_record = self.stream_permissions_reader.get_file_acl_permissions(
43
+ permissions_record = self.stream_reader.get_file_acl_permissions(
53
44
  file, logger=self.logger
54
45
  )
55
46
  if not permissions_record:
@@ -91,4 +82,4 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
91
82
  Returns:
92
83
  The file permissions schema that defines the structure of permission records
93
84
  """
94
- return self.stream_permissions_reader.file_permissions_schema
85
+ return self.stream_reader.file_permissions_schema
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.38.3.dev4100
3
+ Version: 6.38.4
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -214,9 +214,8 @@ airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfra
214
214
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
215
215
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
216
216
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
217
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=HG4wok4kTEQYzoMxa3u_JwkgdUp_eHkqFMEx5Is7jOM,19934
218
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=hwwVTi5Ue5cPCzVM8jWOVbsTdjVRW4eDyVv8JMWFnyc,4846
219
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=0cmppYO3pZlFiJrs5oorF4JXv4ErhOeEMrdLG7P-Gdk,6742
217
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=JXfwc9KaW7PvjAbm2GJ7Ra3DJnCZH4KaE3WytYvtM1Q,18925
218
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=d2UZ3C8M-A591KvBvg8kDpVdpox0rKVlRhVy5bi-auc,11209
220
219
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
221
220
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
222
221
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
@@ -243,8 +242,8 @@ airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8
243
242
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
244
243
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
245
244
  airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
246
- airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=FZH83Geoy3K3nwUk2VVNJERFcXUTnl-4XljjucUM23s,1893
247
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=ke82qgm7snOlQTDx94Lqsc0cDkHWi3OJDTrPxffpFqc,3914
245
+ airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=DwgNU-jDp5vZ_WloQSUzBciDnAFMo8bXPjXpQx5-eko,1790
246
+ airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=i0Jn0zuAPomLa4pHSu9TQ3gAN5xXhNzPTYVwUDiDEyE,3523
248
247
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
249
248
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
250
249
  airbyte_cdk/sources/http_logger.py,sha256=H93kPAujHhPmXNX0JSFG3D-SL6yEFA5PtKot9Hu3TYA,1690
@@ -358,9 +357,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
358
357
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
359
358
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
360
359
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
361
- airbyte_cdk-6.38.3.dev4100.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
- airbyte_cdk-6.38.3.dev4100.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
- airbyte_cdk-6.38.3.dev4100.dist-info/METADATA,sha256=w60PKl3DfFjzvG3uKUtLCCEf0s83i-osbdqy2doggrc,6021
364
- airbyte_cdk-6.38.3.dev4100.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
- airbyte_cdk-6.38.3.dev4100.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
- airbyte_cdk-6.38.3.dev4100.dist-info/RECORD,,
360
+ airbyte_cdk-6.38.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
361
+ airbyte_cdk-6.38.4.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
362
+ airbyte_cdk-6.38.4.dist-info/METADATA,sha256=r625YvczxwtgbhW8dS2s_xslYC-2sxWgmj_FkGvHO0Q,6013
363
+ airbyte_cdk-6.38.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
364
+ airbyte_cdk-6.38.4.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
365
+ airbyte_cdk-6.38.4.dist-info/RECORD,,
@@ -1,109 +0,0 @@
1
- #
2
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import logging
6
- from abc import ABC, abstractmethod
7
- from typing import Any, Dict, Iterable
8
-
9
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
10
-
11
-
12
- class AbstractFileBasedStreamPermissionsReader(ABC):
13
- """
14
- This class is responsible for reading file permissions and Identities from a source.
15
- """
16
-
17
- @abstractmethod
18
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
19
- """
20
- This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
21
-
22
- e.g.
23
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
24
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
25
- result = api_conn.get_file_permissions_info(file.id)
26
- return MyPermissionsModel(
27
- id=result["id"],
28
- access_control_list = result["access_control_list"],
29
- is_public = result["is_public"],
30
- ).dict()
31
- """
32
- raise NotImplementedError(
33
- f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
34
- )
35
-
36
- @abstractmethod
37
- def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
38
- """
39
- This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
40
-
41
- e.g.
42
- def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
43
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
44
- users_api = api_conn.users()
45
- groups_api = api_conn.groups()
46
- members_api = self.google_directory_service.members()
47
- for user in users_api.list():
48
- yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
49
- for group in groups_api.list():
50
- group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
51
- for member in members_api.list(group=group):
52
- group_obj.member_email_addresses = group_obj.member_email_addresses or []
53
- group_obj.member_email_addresses.append(member.email)
54
- yield group_obj.dict()
55
- """
56
- raise NotImplementedError(
57
- f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
58
- )
59
-
60
- @property
61
- @abstractmethod
62
- def file_permissions_schema(self) -> Dict[str, Any]:
63
- """
64
- This function should return the permissions schema for file permissions stream.
65
-
66
- e.g.
67
- def file_permissions_schema(self) -> Dict[str, Any]:
68
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
69
- return {
70
- "type": "object",
71
- "properties": {
72
- "id": { "type": "string" },
73
- "file_path": { "type": "string" },
74
- "access_control_list": {
75
- "type": "array",
76
- "items": { "type": "string" }
77
- },
78
- "publicly_accessible": { "type": "boolean" }
79
- }
80
- }
81
- """
82
- raise NotImplementedError(
83
- f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
84
- )
85
-
86
- @property
87
- @abstractmethod
88
- def identities_schema(self) -> Dict[str, Any]:
89
- """
90
- This function should return the identities schema for file identity stream.
91
-
92
- e.g.
93
- def identities_schema(self) -> Dict[str, Any]:
94
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
95
- return {
96
- "type": "object",
97
- "properties": {
98
- "id": { "type": "string" },
99
- "remote_id": { "type": "string" },
100
- "name": { "type": ["null", "string"] },
101
- "email_address": { "type": ["null", "string"] },
102
- "member_email_addresses": { "type": ["null", "array"] },
103
- "type": { "type": "string" },
104
- }
105
- }
106
- """
107
- raise NotImplementedError(
108
- f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
109
- )