airbyte-source-google-drive 0.2.0.dev202501291717__py3-none-any.whl → 0.2.0.dev202502032236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202501291717.dist-info → airbyte_source_google_drive-0.2.0.dev202502032236.dist-info}/METADATA +2 -2
- airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/RECORD +11 -0
- source_google_drive/spec.py +33 -13
- source_google_drive/stream_reader.py +72 -74
- source_google_drive/utils.py +2 -0
- airbyte_source_google_drive-0.2.0.dev202501291717.dist-info/RECORD +0 -11
- {airbyte_source_google_drive-0.2.0.dev202501291717.dist-info → airbyte_source_google_drive-0.2.0.dev202502032236.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.2.0.dev202501291717.dist-info → airbyte_source_google_drive-0.2.0.dev202502032236.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202502032236
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04107)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
+
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
+
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
+
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
+
source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=8F0jEKz1ElM2wqHxVR91RUJfCSDoLtMQUxbX5lFkEy0,20166
|
|
7
|
+
source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
|
|
8
|
+
airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/METADATA,sha256=Hfb2iDOlay912TAmyj3QZ9GqvyVstYZflM_J2lpf2Lk,5536
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/RECORD,,
|
source_google_drive/spec.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
5
7
|
from typing import Any, Dict, Literal, Optional, Union
|
|
6
8
|
|
|
7
9
|
import dpath.util
|
|
@@ -11,22 +13,40 @@ from airbyte_cdk import OneOfOptionConfig
|
|
|
11
13
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
12
14
|
AbstractFileBasedSpec,
|
|
13
15
|
DeliverRawFiles,
|
|
16
|
+
DeliverRecords,
|
|
14
17
|
)
|
|
15
18
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
16
|
-
|
|
19
|
+
DeliverPermissions as DeliverPermissionsBase,
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
class RemoteIdentityType(Enum):
|
|
24
|
+
USER = "user"
|
|
25
|
+
GROUP = "group"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RemoteIdentity(BaseModel):
|
|
29
|
+
id: uuid.UUID
|
|
30
|
+
remote_id: str
|
|
31
|
+
parent_id: str | None = None
|
|
32
|
+
name: str | None = None
|
|
33
|
+
description: str | None = None
|
|
34
|
+
email_address: str | None = None
|
|
35
|
+
member_email_addresses: list[str] | None = None
|
|
36
|
+
type: RemoteIdentityType
|
|
37
|
+
modified_at: datetime
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RemotePermissions(BaseModel):
|
|
41
|
+
id: str
|
|
42
|
+
file_path: str
|
|
43
|
+
allowed_identity_remote_ids: list[str] | None = None
|
|
44
|
+
denied_identity_remote_ids: list[str] | None = None
|
|
45
|
+
publicly_accessible: bool = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DeliverPermissions(DeliverPermissionsBase):
|
|
49
|
+
domain: Optional[str] = Field(title="Domain", description="The Google domain of the identities.", airbyte_hidden=False, order=1)
|
|
30
50
|
|
|
31
51
|
|
|
32
52
|
class OAuthCredentials(BaseModel):
|
|
@@ -77,7 +97,7 @@ class SourceGoogleDriveSpec(AbstractFileBasedSpec, BaseModel):
|
|
|
77
97
|
pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID",
|
|
78
98
|
)
|
|
79
99
|
|
|
80
|
-
delivery_method: DeliverRecords | DeliverRawFiles = Field(
|
|
100
|
+
delivery_method: DeliverRecords | DeliverRawFiles | DeliverPermissions = Field(
|
|
81
101
|
title="Delivery Method",
|
|
82
102
|
discriminator="delivery_type",
|
|
83
103
|
type="object",
|
|
@@ -18,14 +18,13 @@ from googleapiclient.discovery import build
|
|
|
18
18
|
from googleapiclient.http import MediaIoBaseDownload
|
|
19
19
|
|
|
20
20
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
21
|
-
from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
|
|
22
21
|
from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
|
|
23
22
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
24
23
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
25
24
|
from source_google_drive.utils import get_folder_id
|
|
26
25
|
|
|
27
26
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
28
|
-
from .spec import SourceGoogleDriveSpec
|
|
27
|
+
from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -122,7 +121,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
122
121
|
if self.config.credentials.auth_type == "Client":
|
|
123
122
|
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
124
123
|
else:
|
|
125
|
-
scopes = PERMISSIONS_API_SCOPES if self.
|
|
124
|
+
scopes = PERMISSIONS_API_SCOPES if self.include_identities_stream() else None
|
|
126
125
|
creds = service_account.Credentials.from_service_account_info(
|
|
127
126
|
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
128
127
|
)
|
|
@@ -137,73 +136,6 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
137
136
|
|
|
138
137
|
return self._drive_service
|
|
139
138
|
|
|
140
|
-
def _get_looping_google_api_list_response(
|
|
141
|
-
self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
|
|
142
|
-
) -> Iterator[dict[str, Any]]:
|
|
143
|
-
try:
|
|
144
|
-
looping = True
|
|
145
|
-
next_page_token: str | None = None
|
|
146
|
-
while looping:
|
|
147
|
-
rsp = service.list(pageToken=next_page_token, **args).execute()
|
|
148
|
-
next_page_token = rsp.get("nextPageToken")
|
|
149
|
-
items: list[dict[str, Any]] = rsp.get(key)
|
|
150
|
-
|
|
151
|
-
if items is None or len(items) == 0:
|
|
152
|
-
looping = False
|
|
153
|
-
break
|
|
154
|
-
|
|
155
|
-
if rsp.get("nextPageToken") is None:
|
|
156
|
-
looping = False
|
|
157
|
-
else:
|
|
158
|
-
next_page_token = rsp.get("nextPageToken")
|
|
159
|
-
|
|
160
|
-
for item in items:
|
|
161
|
-
yield item
|
|
162
|
-
except Exception as e:
|
|
163
|
-
logger.error(f"There was an error listing {key} with {args}: {str(e)}")
|
|
164
|
-
raise e
|
|
165
|
-
|
|
166
|
-
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
167
|
-
domain = self.config.delivery_method.domain
|
|
168
|
-
if not domain:
|
|
169
|
-
raise Exception("No domain was provided")
|
|
170
|
-
if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
|
|
171
|
-
raise Exception("No auth found")
|
|
172
|
-
|
|
173
|
-
directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
|
|
174
|
-
users_api = directory_service.users()
|
|
175
|
-
groups_api = directory_service.groups()
|
|
176
|
-
members_api = directory_service.members()
|
|
177
|
-
|
|
178
|
-
# here is failing
|
|
179
|
-
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
180
|
-
rfp = RemoteFileIdentity(
|
|
181
|
-
id=uuid.uuid4(),
|
|
182
|
-
remote_id=user["primaryEmail"],
|
|
183
|
-
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
184
|
-
email_address=user["primaryEmail"],
|
|
185
|
-
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
186
|
-
type=RemoteFileIdentityType.USER,
|
|
187
|
-
modified_at=datetime_now(),
|
|
188
|
-
)
|
|
189
|
-
yield rfp.dict()
|
|
190
|
-
|
|
191
|
-
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
192
|
-
rfp = RemoteFileIdentity(
|
|
193
|
-
id=uuid.uuid4(),
|
|
194
|
-
remote_id=group["email"],
|
|
195
|
-
name=group["name"],
|
|
196
|
-
email_address=group["email"],
|
|
197
|
-
type=RemoteFileIdentityType.GROUP,
|
|
198
|
-
modified_at=datetime_now(),
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
|
|
202
|
-
rfp.member_email_addresses = rfp.member_email_addresses or []
|
|
203
|
-
rfp.member_email_addresses.append(member["email"])
|
|
204
|
-
|
|
205
|
-
yield rfp.dict()
|
|
206
|
-
|
|
207
139
|
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
|
|
208
140
|
"""
|
|
209
141
|
Get all files matching the specified glob patterns.
|
|
@@ -377,7 +309,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
377
309
|
except Exception as e:
|
|
378
310
|
raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
|
|
379
311
|
|
|
380
|
-
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[
|
|
312
|
+
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteIdentity], bool]:
|
|
381
313
|
"""
|
|
382
314
|
Retrieves the permissions of a file in Google Drive and checks for public access.
|
|
383
315
|
|
|
@@ -413,13 +345,13 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
413
345
|
except Exception as e:
|
|
414
346
|
raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
|
|
415
347
|
|
|
416
|
-
def _to_remote_file_identity(self, identity: dict[str, Any]) ->
|
|
348
|
+
def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteIdentity | None:
|
|
417
349
|
if identity.get("id") in PUBLIC_PERMISSION_IDS:
|
|
418
350
|
return None
|
|
419
351
|
if identity.get("deleted") is True:
|
|
420
352
|
return None
|
|
421
353
|
|
|
422
|
-
return
|
|
354
|
+
return RemoteIdentity(
|
|
423
355
|
modified_at=datetime.now(),
|
|
424
356
|
id=uuid.uuid4(),
|
|
425
357
|
remote_id=identity.get("emailAddress"),
|
|
@@ -431,9 +363,75 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
431
363
|
|
|
432
364
|
def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
|
433
365
|
remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
|
|
434
|
-
return
|
|
366
|
+
return RemotePermissions(
|
|
435
367
|
id=file.id,
|
|
436
368
|
file_path=file.uri,
|
|
437
369
|
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
438
370
|
publicly_accessible=is_public,
|
|
439
371
|
).dict(exclude_none=True)
|
|
372
|
+
|
|
373
|
+
def _get_looping_google_api_list_response(
|
|
374
|
+
self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
|
|
375
|
+
) -> Iterator[dict[str, Any]]:
|
|
376
|
+
try:
|
|
377
|
+
looping = True
|
|
378
|
+
next_page_token: str | None = None
|
|
379
|
+
while looping:
|
|
380
|
+
rsp = service.list(pageToken=next_page_token, **args).execute()
|
|
381
|
+
next_page_token = rsp.get("nextPageToken")
|
|
382
|
+
items: list[dict[str, Any]] = rsp.get(key)
|
|
383
|
+
|
|
384
|
+
if items is None or len(items) == 0:
|
|
385
|
+
looping = False
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
if rsp.get("nextPageToken") is None:
|
|
389
|
+
looping = False
|
|
390
|
+
else:
|
|
391
|
+
next_page_token = rsp.get("nextPageToken")
|
|
392
|
+
|
|
393
|
+
for item in items:
|
|
394
|
+
yield item
|
|
395
|
+
except Exception as e:
|
|
396
|
+
logger.error(f"There was an error listing {key} with {args}: {str(e)}")
|
|
397
|
+
raise e
|
|
398
|
+
|
|
399
|
+
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
400
|
+
domain = self.config.delivery_method.domain
|
|
401
|
+
if not domain:
|
|
402
|
+
raise Exception("No domain was provided")
|
|
403
|
+
if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
|
|
404
|
+
raise Exception("No auth found")
|
|
405
|
+
|
|
406
|
+
directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
|
|
407
|
+
users_api = directory_service.users()
|
|
408
|
+
groups_api = directory_service.groups()
|
|
409
|
+
members_api = directory_service.members()
|
|
410
|
+
|
|
411
|
+
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
412
|
+
rfp = RemoteIdentity(
|
|
413
|
+
id=uuid.uuid4(),
|
|
414
|
+
remote_id=user["primaryEmail"],
|
|
415
|
+
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
416
|
+
email_address=user["primaryEmail"],
|
|
417
|
+
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
418
|
+
type=RemoteIdentityType.USER,
|
|
419
|
+
modified_at=datetime_now(),
|
|
420
|
+
)
|
|
421
|
+
yield rfp.dict()
|
|
422
|
+
|
|
423
|
+
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
424
|
+
rfp = RemoteIdentity(
|
|
425
|
+
id=uuid.uuid4(),
|
|
426
|
+
remote_id=group["email"],
|
|
427
|
+
name=group["name"],
|
|
428
|
+
email_address=group["email"],
|
|
429
|
+
type=RemoteIdentityType.GROUP,
|
|
430
|
+
modified_at=datetime_now(),
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
|
|
434
|
+
rfp.member_email_addresses = rfp.member_email_addresses or []
|
|
435
|
+
rfp.member_email_addresses.append(member["email"])
|
|
436
|
+
|
|
437
|
+
yield rfp.dict()
|
source_google_drive/utils.py
CHANGED
|
@@ -15,6 +15,8 @@ def get_folder_id(url_string: str) -> str:
|
|
|
15
15
|
if parsed_url.scheme != "https" or parsed_url.netloc != "drive.google.com":
|
|
16
16
|
raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
|
|
17
17
|
path_segments = list(filter(None, parsed_url.path.split("/")))
|
|
18
|
+
if path_segments[-1] in ["my-drive", "home"]:
|
|
19
|
+
return "root"
|
|
18
20
|
if path_segments[-2] != "folders" or len(path_segments) < 3:
|
|
19
21
|
raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
|
|
20
22
|
return path_segments[-1]
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
-
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
-
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
-
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
-
source_google_drive/spec.py,sha256=YPOmOInQfmwHeB6Ay2z78LNvHsggncwU1bchQOsAZSk,4268
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=DClCknbFqAq1SZQQBJnWVpbIoJkfu7ZeLzLh0E0aE5U,20292
|
|
7
|
-
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
8
|
-
airbyte_source_google_drive-0.2.0.dev202501291717.dist-info/METADATA,sha256=gXbq2ilBqoJCjwtdwqmSgFVL3RxFMlxgwUxODMZT4Yg,5536
|
|
9
|
-
airbyte_source_google_drive-0.2.0.dev202501291717.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
-
airbyte_source_google_drive-0.2.0.dev202501291717.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
-
airbyte_source_google_drive-0.2.0.dev202501291717.dist-info/RECORD,,
|
|
File without changes
|