airbyte-source-google-drive 0.1.0rc1.dev202501201950__py3-none-any.whl → 0.2.0.dev202501241656__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.1.0rc1.dev202501201950
3
+ Version: 0.2.0.dev202501241656
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (>=6.18.2,<7.0.0)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04100)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -0,0 +1,11 @@
1
+ source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
2
+ source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
+ source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
+ source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
5
+ source_google_drive/spec.py,sha256=krZ1qRjUP7vV4JLGMgggMkP4kizGG4uj8mX0S0CUwU8,4706
6
+ source_google_drive/stream_reader.py,sha256=8Ibh4vXdPElHfI4KgeBf2HqkQMMVWrV6shLGBdYXUu4,17008
7
+ source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
8
+ airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/METADATA,sha256=3v6W_Q5H627SdajGIzK_u4r2llp7Uc9SAI700QMkOW4,5536
9
+ airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
+ airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
+ airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/RECORD,,
@@ -2,14 +2,51 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
-
5
+ import uuid
6
+ from datetime import datetime
6
7
  from typing import Any, Dict, Literal, Union
7
8
 
8
9
  import dpath.util
9
10
  from pydantic.v1 import BaseModel, Field
10
11
 
11
12
  from airbyte_cdk import OneOfOptionConfig
12
- from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec, DeliverRawFiles, DeliverRecords
13
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
14
+ AbstractFileBasedSpec,
15
+ DeliverRawFiles,
16
+ )
17
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
18
+ DeliverRecords as DeliverRecordsBase,
19
+ )
20
+
21
+
22
+ class RemoteFileIdentity(BaseModel):
23
+ id: uuid.UUID
24
+ remote_id: str
25
+ parent_id: str | None = None
26
+ name: str | None = None
27
+ description: str | None = None
28
+ email_address: str | None = None
29
+ member_email_addresses: list[str] | None = None
30
+ type: str
31
+ modified_at: datetime
32
+
33
+
34
+ class RemoteFileMetadata(BaseModel):
35
+ id: str
36
+ file_path: str
37
+ allowed_identity_remote_ids: list[str] | None = None
38
+ denied_identity_remote_ids: list[str] | None = None
39
+ publicly_accessible: bool = False
40
+
41
+
42
+ class DeliverRecords(DeliverRecordsBase):
43
+ # Overriding to make visible with airbyte_hidden=False
44
+ sync_metadata: bool = Field(
45
+ title="Make stream sync files metadata",
46
+ description="If enabled, streams will sync files metadata instead of files data.",
47
+ default=False,
48
+ airbyte_hidden=False,
49
+ )
13
50
 
14
51
 
15
52
  class OAuthCredentials(BaseModel):
@@ -6,10 +6,11 @@
6
6
  import io
7
7
  import json
8
8
  import logging
9
+ import uuid
9
10
  from datetime import datetime
10
11
  from io import IOBase
11
12
  from os.path import getsize
12
- from typing import Dict, Iterable, List, Optional, Set
13
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
13
14
 
14
15
  from google.oauth2 import credentials, service_account
15
16
  from googleapiclient.discovery import build
@@ -22,7 +23,7 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
22
23
  from source_google_drive.utils import get_folder_id
23
24
 
24
25
  from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
25
- from .spec import SourceGoogleDriveSpec
26
+ from .spec import RemoteFileIdentity, RemoteFileMetadata, SourceGoogleDriveSpec
26
27
 
27
28
 
28
29
  FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -54,6 +55,20 @@ DOWNLOADABLE_DOCUMENTS_MIME_TYPES = {
54
55
  GOOGLE_DRAWING_MIME_TYPE: {EXPORT_MEDIA_MIME_TYPE_KEY: EXPORT_MEDIA_MIME_TYPE_PDF, DOCUMENT_FILE_EXTENSION_KEY: ".pdf"},
55
56
  }
56
57
 
58
+ PUBLIC_PERMISSION_IDS = [
59
+ "anyoneWithLink",
60
+ "anyoneCanFind",
61
+ "domainCanFind",
62
+ "domainWithLink",
63
+ ]
64
+
65
+ PERMISSIONS_API_SCOPES = [
66
+ "https://www.googleapis.com/auth/drive",
67
+ "https://www.googleapis.com/auth/admin.directory.group.readonly",
68
+ "https://www.googleapis.com/auth/admin.directory.group.member.readonly",
69
+ "https://www.googleapis.com/auth/admin.directory.user.readonly",
70
+ ]
71
+
57
72
 
58
73
  class GoogleDriveRemoteFile(RemoteFile):
59
74
  id: str
@@ -98,7 +113,10 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
98
113
  if self.config.credentials.auth_type == "Client":
99
114
  creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
100
115
  else:
101
- creds = service_account.Credentials.from_service_account_info(json.loads(self.config.credentials.service_account_info))
116
+ scopes = PERMISSIONS_API_SCOPES if self.sync_metadata() else None
117
+ creds = service_account.Credentials.from_service_account_info(
118
+ json.loads(self.config.credentials.service_account_info), scopes=scopes
119
+ )
102
120
  self._drive_service = build("drive", "v3", credentials=creds)
103
121
  except Exception as e:
104
122
  raise AirbyteTracedException(
@@ -281,4 +299,65 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
281
299
  return {"file_url": absolute_file_path, "bytes": file_size, "file_relative_path": file_relative_path}
282
300
 
283
301
  except Exception as e:
284
- raise ErrorDownloadingFile(f"There was an error while trying to download the file: {str(e)}")
302
+ raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
303
+
304
+ def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteFileIdentity], bool]:
305
+ """
306
+ Retrieves the permissions of a file in Google Drive and checks for public access.
307
+
308
+ Args:
309
+ file_id (str): The file to get permissions for.
310
+ file_name (str): The name of the file to get permissions for.
311
+ logger (logging.Logger): Logger for debugging and information.
312
+
313
+ Returns:
314
+ Tuple(List[RemoteFileIdentity], boolean): A list of RemoteFileIdentity objects containing permission details.
315
+ """
316
+ try:
317
+ request = self.google_drive_service.permissions().list(
318
+ fileId=file_id,
319
+ fields="permissions, permissions/role, permissions/type, permissions/id, permissions/emailAddress",
320
+ supportsAllDrives=True,
321
+ )
322
+ response = request.execute()
323
+ permissions = response.get("permissions", [])
324
+ is_public = False
325
+
326
+ remote_identities = []
327
+
328
+ for p in permissions:
329
+ identity = self._to_remote_file_identity(p)
330
+ if p.get("id") in PUBLIC_PERMISSION_IDS:
331
+ is_public = True
332
+ if identity is not None:
333
+ remote_identities.append(identity)
334
+
335
+ logger.info(f"File {file_name} has {len(remote_identities)} valid permissions")
336
+ return remote_identities, is_public
337
+ except Exception as e:
338
+ raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
339
+
340
+ def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteFileIdentity | None:
341
+ if identity.get("id") in PUBLIC_PERMISSION_IDS:
342
+ return None
343
+ if identity.get("deleted") is True:
344
+ return None
345
+
346
+ return RemoteFileIdentity(
347
+ modified_at=datetime.now(),
348
+ id=uuid.uuid4(),
349
+ remote_id=identity.get("emailAddress"),
350
+ name=identity.get("name"),
351
+ email_address=identity.get("emailAddress"),
352
+ type=identity.get("type"),
353
+ description=None,
354
+ )
355
+
356
+ def get_file_metadata(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
357
+ remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
358
+ return RemoteFileMetadata(
359
+ id=file.id,
360
+ file_path=file.uri,
361
+ allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
362
+ publicly_accessible=is_public,
363
+ ).dict(exclude_none=True)
@@ -1,11 +0,0 @@
1
- source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
2
- source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
- source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
- source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
5
- source_google_drive/spec.py,sha256=W5tUpyt-T9JjxsN6WTgz1SHBcGJnXg__L9M-nsDWOlk,3690
6
- source_google_drive/stream_reader.py,sha256=-BF7GuxyEInv4SeLHZhFvJUq1yaS2P5kRAvdtKwZFm8,13720
7
- source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
8
- airbyte_source_google_drive-0.1.0rc1.dev202501201950.dist-info/METADATA,sha256=RdzBQ6hgFqKmjz3HUky4Orxvi8dpcfoqFoIVmJQJ-AU,5537
9
- airbyte_source_google_drive-0.1.0rc1.dev202501201950.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
- airbyte_source_google_drive-0.1.0rc1.dev202501201950.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
- airbyte_source_google_drive-0.1.0rc1.dev202501201950.dist-info/RECORD,,