airbyte-source-google-drive 0.1.0rc1.dev202501202317__py3-none-any.whl → 0.2.0.dev202501241824__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info → airbyte_source_google_drive-0.2.0.dev202501241824.dist-info}/METADATA +2 -2
- airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/RECORD +11 -0
- source_google_drive/spec.py +39 -2
- source_google_drive/stream_reader.py +85 -3
- airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info/RECORD +0 -11
- {airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info → airbyte_source_google_drive-0.2.0.dev202501241824.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info → airbyte_source_google_drive-0.2.0.dev202501241824.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0.dev202501241824
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04101)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
+
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
+
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
+
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
+
source_google_drive/spec.py,sha256=krZ1qRjUP7vV4JLGMgggMkP4kizGG4uj8mX0S0CUwU8,4706
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=mMcxz8C0J3ePBOCu3HWfaBFzam8kkrp45NxAKEHoiI0,17175
|
|
7
|
+
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
8
|
+
airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/METADATA,sha256=MAUR34WJ1R-JdcBVIWm72hif8hAtCJ8uzORoZSQOSH8,5536
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/RECORD,,
|
source_google_drive/spec.py
CHANGED
|
@@ -2,14 +2,51 @@
|
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import uuid
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
from typing import Any, Dict, Literal, Union
|
|
7
8
|
|
|
8
9
|
import dpath.util
|
|
9
10
|
from pydantic.v1 import BaseModel, Field
|
|
10
11
|
|
|
11
12
|
from airbyte_cdk import OneOfOptionConfig
|
|
12
|
-
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import
|
|
13
|
+
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
14
|
+
AbstractFileBasedSpec,
|
|
15
|
+
DeliverRawFiles,
|
|
16
|
+
)
|
|
17
|
+
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
18
|
+
DeliverRecords as DeliverRecordsBase,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RemoteFileIdentity(BaseModel):
|
|
23
|
+
id: uuid.UUID
|
|
24
|
+
remote_id: str
|
|
25
|
+
parent_id: str | None = None
|
|
26
|
+
name: str | None = None
|
|
27
|
+
description: str | None = None
|
|
28
|
+
email_address: str | None = None
|
|
29
|
+
member_email_addresses: list[str] | None = None
|
|
30
|
+
type: str
|
|
31
|
+
modified_at: datetime
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RemoteFileMetadata(BaseModel):
|
|
35
|
+
id: str
|
|
36
|
+
file_path: str
|
|
37
|
+
allowed_identity_remote_ids: list[str] | None = None
|
|
38
|
+
denied_identity_remote_ids: list[str] | None = None
|
|
39
|
+
publicly_accessible: bool = False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DeliverRecords(DeliverRecordsBase):
|
|
43
|
+
# Overriding to make visible with airbyte_hidden=False
|
|
44
|
+
sync_metadata: bool = Field(
|
|
45
|
+
title="Make stream sync files metadata",
|
|
46
|
+
description="If enabled, streams will sync files metadata instead of files data.",
|
|
47
|
+
default=False,
|
|
48
|
+
airbyte_hidden=False,
|
|
49
|
+
)
|
|
13
50
|
|
|
14
51
|
|
|
15
52
|
class OAuthCredentials(BaseModel):
|
|
@@ -6,10 +6,11 @@
|
|
|
6
6
|
import io
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
|
+
import uuid
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
from io import IOBase
|
|
11
12
|
from os.path import getsize
|
|
12
|
-
from typing import Dict, Iterable, List, Optional, Set
|
|
13
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
13
14
|
|
|
14
15
|
from google.oauth2 import credentials, service_account
|
|
15
16
|
from googleapiclient.discovery import build
|
|
@@ -22,7 +23,7 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
|
22
23
|
from source_google_drive.utils import get_folder_id
|
|
23
24
|
|
|
24
25
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
25
|
-
from .spec import SourceGoogleDriveSpec
|
|
26
|
+
from .spec import RemoteFileIdentity, RemoteFileMetadata, SourceGoogleDriveSpec
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -54,6 +55,23 @@ DOWNLOADABLE_DOCUMENTS_MIME_TYPES = {
|
|
|
54
55
|
GOOGLE_DRAWING_MIME_TYPE: {EXPORT_MEDIA_MIME_TYPE_KEY: EXPORT_MEDIA_MIME_TYPE_PDF, DOCUMENT_FILE_EXTENSION_KEY: ".pdf"},
|
|
55
56
|
}
|
|
56
57
|
|
|
58
|
+
PUBLIC_PERMISSION_IDS = [
|
|
59
|
+
"anyoneWithLink",
|
|
60
|
+
"anyoneCanFind",
|
|
61
|
+
"domainCanFind",
|
|
62
|
+
"domainWithLink",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
PERMISSIONS_API_SCOPES = [
|
|
66
|
+
"https://www.googleapis.com/auth/drive",
|
|
67
|
+
"https://www.googleapis.com/auth/drive.readonly",
|
|
68
|
+
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
69
|
+
"https://www.googleapis.com/auth/drive.file",
|
|
70
|
+
"https://www.googleapis.com/auth/admin.directory.group.readonly",
|
|
71
|
+
"https://www.googleapis.com/auth/admin.directory.group.member.readonly",
|
|
72
|
+
"https://www.googleapis.com/auth/admin.directory.user.readonly",
|
|
73
|
+
]
|
|
74
|
+
|
|
57
75
|
|
|
58
76
|
class GoogleDriveRemoteFile(RemoteFile):
|
|
59
77
|
id: str
|
|
@@ -98,7 +116,10 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
98
116
|
if self.config.credentials.auth_type == "Client":
|
|
99
117
|
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
100
118
|
else:
|
|
101
|
-
|
|
119
|
+
scopes = PERMISSIONS_API_SCOPES if self.sync_metadata() else None
|
|
120
|
+
creds = service_account.Credentials.from_service_account_info(
|
|
121
|
+
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
122
|
+
)
|
|
102
123
|
self._drive_service = build("drive", "v3", credentials=creds)
|
|
103
124
|
except Exception as e:
|
|
104
125
|
raise AirbyteTracedException(
|
|
@@ -282,3 +303,64 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
282
303
|
|
|
283
304
|
except Exception as e:
|
|
284
305
|
raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
|
|
306
|
+
|
|
307
|
+
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteFileIdentity], bool]:
|
|
308
|
+
"""
|
|
309
|
+
Retrieves the permissions of a file in Google Drive and checks for public access.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
file_id (str): The file to get permissions for.
|
|
313
|
+
file_name (str): The name of the file to get permissions for.
|
|
314
|
+
logger (logging.Logger): Logger for debugging and information.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Tuple(List[RemoteFileIdentity], boolean): A list of RemoteFileIdentity objects containing permission details.
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
request = self.google_drive_service.permissions().list(
|
|
321
|
+
fileId=file_id,
|
|
322
|
+
fields="permissions, permissions/role, permissions/type, permissions/id, permissions/emailAddress",
|
|
323
|
+
supportsAllDrives=True,
|
|
324
|
+
)
|
|
325
|
+
response = request.execute()
|
|
326
|
+
permissions = response.get("permissions", [])
|
|
327
|
+
is_public = False
|
|
328
|
+
|
|
329
|
+
remote_identities = []
|
|
330
|
+
|
|
331
|
+
for p in permissions:
|
|
332
|
+
identity = self._to_remote_file_identity(p)
|
|
333
|
+
if p.get("id") in PUBLIC_PERMISSION_IDS:
|
|
334
|
+
is_public = True
|
|
335
|
+
if identity is not None:
|
|
336
|
+
remote_identities.append(identity)
|
|
337
|
+
|
|
338
|
+
logger.info(f"File {file_name} has {len(remote_identities)} valid permissions")
|
|
339
|
+
return remote_identities, is_public
|
|
340
|
+
except Exception as e:
|
|
341
|
+
raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
|
|
342
|
+
|
|
343
|
+
def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteFileIdentity | None:
|
|
344
|
+
if identity.get("id") in PUBLIC_PERMISSION_IDS:
|
|
345
|
+
return None
|
|
346
|
+
if identity.get("deleted") is True:
|
|
347
|
+
return None
|
|
348
|
+
|
|
349
|
+
return RemoteFileIdentity(
|
|
350
|
+
modified_at=datetime.now(),
|
|
351
|
+
id=uuid.uuid4(),
|
|
352
|
+
remote_id=identity.get("emailAddress"),
|
|
353
|
+
name=identity.get("name"),
|
|
354
|
+
email_address=identity.get("emailAddress"),
|
|
355
|
+
type=identity.get("type"),
|
|
356
|
+
description=None,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def get_file_metadata(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
|
360
|
+
remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
|
|
361
|
+
return RemoteFileMetadata(
|
|
362
|
+
id=file.id,
|
|
363
|
+
file_path=file.uri,
|
|
364
|
+
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
365
|
+
publicly_accessible=is_public,
|
|
366
|
+
).dict(exclude_none=True)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
-
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
-
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
-
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
-
source_google_drive/spec.py,sha256=W5tUpyt-T9JjxsN6WTgz1SHBcGJnXg__L9M-nsDWOlk,3690
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=thZEVYsF2sCI4ofFNfWWA24cBzyCm-O-30JM5Coi6Ew,13731
|
|
7
|
-
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
8
|
-
airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info/METADATA,sha256=lGS2yboaN8ZVkkgL_GlSy6dAVww23cDGdN5hda2B3vw,5537
|
|
9
|
-
airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
-
airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
-
airbyte_source_google_drive-0.1.0rc1.dev202501202317.dist-info/RECORD,,
|
|
File without changes
|