airbyte-source-google-drive 0.2.0.dev202502031727__py3-none-any.whl → 0.2.0.dev202502032236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.2.0.dev202502031727
3
+ Version: 0.2.0.dev202502032236
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04106)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04107)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -2,10 +2,10 @@ source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-B
2
2
  source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
3
  source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
4
  source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
5
- source_google_drive/spec.py,sha256=YPOmOInQfmwHeB6Ay2z78LNvHsggncwU1bchQOsAZSk,4268
6
- source_google_drive/stream_reader.py,sha256=h4p3aafE8U9BqYgRBGXuADEhm4NCGMqYl2ExnQkj6bU,20266
5
+ source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
6
+ source_google_drive/stream_reader.py,sha256=8F0jEKz1ElM2wqHxVR91RUJfCSDoLtMQUxbX5lFkEy0,20166
7
7
  source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
8
- airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/METADATA,sha256=rMydjQTVW8_21SbChXDmp3i-LbEYiZi1OwOPvEKHh_k,5536
9
- airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
- airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
- airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/RECORD,,
8
+ airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/METADATA,sha256=Hfb2iDOlay912TAmyj3QZ9GqvyVstYZflM_J2lpf2Lk,5536
9
+ airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
+ airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
+ airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/RECORD,,
@@ -1,7 +1,9 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ import uuid
5
+ from datetime import datetime
6
+ from enum import Enum
5
7
  from typing import Any, Dict, Literal, Optional, Union
6
8
 
7
9
  import dpath.util
@@ -11,22 +13,40 @@ from airbyte_cdk import OneOfOptionConfig
11
13
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
12
14
  AbstractFileBasedSpec,
13
15
  DeliverRawFiles,
16
+ DeliverRecords,
14
17
  )
15
18
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
16
- DeliverRecords as DeliverRecordsBase,
19
+ DeliverPermissions as DeliverPermissionsBase,
17
20
  )
18
21
 
19
22
 
20
- class DeliverRecords(DeliverRecordsBase):
21
- # Overriding to make visible with airbyte_hidden=False
22
- sync_acl_permissions: bool = Field(
23
- title="Include ACL Permissions",
24
- description="Joins Document allowlists to each stream.",
25
- default=False,
26
- airbyte_hidden=False,
27
- order=0,
28
- )
29
- domain: Optional[str] = Field(title="Domain", description="The domain of the identities.", airbyte_hidden=False, order=1)
23
+ class RemoteIdentityType(Enum):
24
+ USER = "user"
25
+ GROUP = "group"
26
+
27
+
28
+ class RemoteIdentity(BaseModel):
29
+ id: uuid.UUID
30
+ remote_id: str
31
+ parent_id: str | None = None
32
+ name: str | None = None
33
+ description: str | None = None
34
+ email_address: str | None = None
35
+ member_email_addresses: list[str] | None = None
36
+ type: RemoteIdentityType
37
+ modified_at: datetime
38
+
39
+
40
+ class RemotePermissions(BaseModel):
41
+ id: str
42
+ file_path: str
43
+ allowed_identity_remote_ids: list[str] | None = None
44
+ denied_identity_remote_ids: list[str] | None = None
45
+ publicly_accessible: bool = False
46
+
47
+
48
+ class DeliverPermissions(DeliverPermissionsBase):
49
+ domain: Optional[str] = Field(title="Domain", description="The Google domain of the identities.", airbyte_hidden=False, order=1)
30
50
 
31
51
 
32
52
  class OAuthCredentials(BaseModel):
@@ -77,7 +97,7 @@ class SourceGoogleDriveSpec(AbstractFileBasedSpec, BaseModel):
77
97
  pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID",
78
98
  )
79
99
 
80
- delivery_method: DeliverRecords | DeliverRawFiles = Field(
100
+ delivery_method: DeliverRecords | DeliverRawFiles | DeliverPermissions = Field(
81
101
  title="Delivery Method",
82
102
  discriminator="delivery_type",
83
103
  type="object",
@@ -18,14 +18,13 @@ from googleapiclient.discovery import build
18
18
  from googleapiclient.http import MediaIoBaseDownload
19
19
 
20
20
  from airbyte_cdk import AirbyteTracedException, FailureType
21
- from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
22
21
  from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
23
22
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
24
23
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
25
24
  from source_google_drive.utils import get_folder_id
26
25
 
27
26
  from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
28
- from .spec import SourceGoogleDriveSpec
27
+ from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
29
28
 
30
29
 
31
30
  FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -122,7 +121,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
122
121
  if self.config.credentials.auth_type == "Client":
123
122
  creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
124
123
  else:
125
- scopes = PERMISSIONS_API_SCOPES if self.sync_acl_permissions() else None
124
+ scopes = PERMISSIONS_API_SCOPES if self.include_identities_stream() else None
126
125
  creds = service_account.Credentials.from_service_account_info(
127
126
  json.loads(self.config.credentials.service_account_info), scopes=scopes
128
127
  )
@@ -310,7 +309,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
310
309
  except Exception as e:
311
310
  raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
312
311
 
313
- def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteFileIdentity], bool]:
312
+ def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteIdentity], bool]:
314
313
  """
315
314
  Retrieves the permissions of a file in Google Drive and checks for public access.
316
315
 
@@ -346,13 +345,13 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
346
345
  except Exception as e:
347
346
  raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
348
347
 
349
- def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteFileIdentity | None:
348
+ def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteIdentity | None:
350
349
  if identity.get("id") in PUBLIC_PERMISSION_IDS:
351
350
  return None
352
351
  if identity.get("deleted") is True:
353
352
  return None
354
353
 
355
- return RemoteFileIdentity(
354
+ return RemoteIdentity(
356
355
  modified_at=datetime.now(),
357
356
  id=uuid.uuid4(),
358
357
  remote_id=identity.get("emailAddress"),
@@ -364,7 +363,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
364
363
 
365
364
  def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
366
365
  remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
367
- return RemoteFilePermissions(
366
+ return RemotePermissions(
368
367
  id=file.id,
369
368
  file_path=file.uri,
370
369
  allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
@@ -410,24 +409,24 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
410
409
  members_api = directory_service.members()
411
410
 
412
411
  for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
413
- rfp = RemoteFileIdentity(
412
+ rfp = RemoteIdentity(
414
413
  id=uuid.uuid4(),
415
414
  remote_id=user["primaryEmail"],
416
415
  name=user["name"]["fullName"] if user["name"] is not None else None,
417
416
  email_address=user["primaryEmail"],
418
417
  member_email_addresses=[x["address"] for x in user["emails"]],
419
- type=RemoteFileIdentityType.USER,
418
+ type=RemoteIdentityType.USER,
420
419
  modified_at=datetime_now(),
421
420
  )
422
421
  yield rfp.dict()
423
422
 
424
423
  for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
425
- rfp = RemoteFileIdentity(
424
+ rfp = RemoteIdentity(
426
425
  id=uuid.uuid4(),
427
426
  remote_id=group["email"],
428
427
  name=group["name"],
429
428
  email_address=group["email"],
430
- type=RemoteFileIdentityType.GROUP,
429
+ type=RemoteIdentityType.GROUP,
431
430
  modified_at=datetime_now(),
432
431
  )
433
432