airbyte-source-google-drive 0.2.0.dev202501241656__py3-none-any.whl → 0.2.0.dev202501270320__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202501241656.dist-info → airbyte_source_google_drive-0.2.0.dev202501270320.dist-info}/METADATA +2 -2
- airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/RECORD +12 -0
- source_google_drive/spec.py +5 -26
- source_google_drive/stream_reader.py +87 -5
- source_google_drive/temp_mock.py +36 -0
- airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/RECORD +0 -11
- {airbyte_source_google_drive-0.2.0.dev202501241656.dist-info → airbyte_source_google_drive-0.2.0.dev202501270320.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.2.0.dev202501241656.dist-info → airbyte_source_google_drive-0.2.0.dev202501270320.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202501270320
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04105)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
+
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
+
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
+
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
+
source_google_drive/spec.py,sha256=Xt20_aNmOxdL0XwFsGuAEGHCXCygDPPIZ3QKRwnW47k,4242
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=6dQOzlsMB2XoMIAdJH97Rb_dKVyDQaWeepXAN0GxxzA,20575
|
|
7
|
+
source_google_drive/temp_mock.py,sha256=NdvsAltcZBIqnCBOQuU9R3_yEV5v-bHBR9T_BdjPtU4,1109
|
|
8
|
+
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/METADATA,sha256=fao8_WncWsO455kN99bXlp33S5bKj-iw8VcVZSnbW1Y,5536
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
12
|
+
airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/RECORD,,
|
source_google_drive/spec.py
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from typing import Any, Dict, Literal, Union
|
|
5
|
+
from typing import Any, Dict, Literal, Optional, Union
|
|
8
6
|
|
|
9
7
|
import dpath.util
|
|
10
8
|
from pydantic.v1 import BaseModel, Field
|
|
@@ -19,34 +17,15 @@ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
|
19
17
|
)
|
|
20
18
|
|
|
21
19
|
|
|
22
|
-
class RemoteFileIdentity(BaseModel):
|
|
23
|
-
id: uuid.UUID
|
|
24
|
-
remote_id: str
|
|
25
|
-
parent_id: str | None = None
|
|
26
|
-
name: str | None = None
|
|
27
|
-
description: str | None = None
|
|
28
|
-
email_address: str | None = None
|
|
29
|
-
member_email_addresses: list[str] | None = None
|
|
30
|
-
type: str
|
|
31
|
-
modified_at: datetime
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class RemoteFileMetadata(BaseModel):
|
|
35
|
-
id: str
|
|
36
|
-
file_path: str
|
|
37
|
-
allowed_identity_remote_ids: list[str] | None = None
|
|
38
|
-
denied_identity_remote_ids: list[str] | None = None
|
|
39
|
-
publicly_accessible: bool = False
|
|
40
|
-
|
|
41
|
-
|
|
42
20
|
class DeliverRecords(DeliverRecordsBase):
|
|
43
21
|
# Overriding to make visible with airbyte_hidden=False
|
|
44
|
-
|
|
45
|
-
title="
|
|
46
|
-
description="
|
|
22
|
+
sync_acl_permissions: bool = Field(
|
|
23
|
+
title="Include ACL Permissions",
|
|
24
|
+
description="Joins Document allowlists to each stream.",
|
|
47
25
|
default=False,
|
|
48
26
|
airbyte_hidden=False,
|
|
49
27
|
)
|
|
28
|
+
domain: Optional[str] = Field(title="Domain", description="The domain of the identities.", airbyte_hidden=False)
|
|
50
29
|
|
|
51
30
|
|
|
52
31
|
class OAuthCredentials(BaseModel):
|
|
@@ -10,20 +10,25 @@ import uuid
|
|
|
10
10
|
from datetime import datetime
|
|
11
11
|
from io import IOBase
|
|
12
12
|
from os.path import getsize
|
|
13
|
-
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
13
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple
|
|
14
14
|
|
|
15
|
+
import pytz
|
|
15
16
|
from google.oauth2 import credentials, service_account
|
|
16
17
|
from googleapiclient.discovery import build
|
|
17
18
|
from googleapiclient.http import MediaIoBaseDownload
|
|
18
19
|
|
|
19
20
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
21
|
+
from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
|
|
20
22
|
from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
|
|
21
23
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
22
24
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
23
25
|
from source_google_drive.utils import get_folder_id
|
|
24
26
|
|
|
25
27
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
26
|
-
from .spec import
|
|
28
|
+
from .spec import SourceGoogleDriveSpec
|
|
29
|
+
|
|
30
|
+
# remove this, just using while test credentials are prepared
|
|
31
|
+
from .temp_mock import _get_looping_google_api_list_response
|
|
27
32
|
|
|
28
33
|
|
|
29
34
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -64,12 +69,19 @@ PUBLIC_PERMISSION_IDS = [
|
|
|
64
69
|
|
|
65
70
|
PERMISSIONS_API_SCOPES = [
|
|
66
71
|
"https://www.googleapis.com/auth/drive",
|
|
72
|
+
"https://www.googleapis.com/auth/drive.readonly",
|
|
73
|
+
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
74
|
+
"https://www.googleapis.com/auth/drive.file",
|
|
67
75
|
"https://www.googleapis.com/auth/admin.directory.group.readonly",
|
|
68
76
|
"https://www.googleapis.com/auth/admin.directory.group.member.readonly",
|
|
69
77
|
"https://www.googleapis.com/auth/admin.directory.user.readonly",
|
|
70
78
|
]
|
|
71
79
|
|
|
72
80
|
|
|
81
|
+
def datetime_now() -> datetime:
|
|
82
|
+
return datetime.now(pytz.UTC)
|
|
83
|
+
|
|
84
|
+
|
|
73
85
|
class GoogleDriveRemoteFile(RemoteFile):
|
|
74
86
|
id: str
|
|
75
87
|
# The mime type of the file as returned by the Google Drive API
|
|
@@ -113,7 +125,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
113
125
|
if self.config.credentials.auth_type == "Client":
|
|
114
126
|
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
115
127
|
else:
|
|
116
|
-
scopes = PERMISSIONS_API_SCOPES if self.
|
|
128
|
+
scopes = PERMISSIONS_API_SCOPES if self.sync_acl_permissions() else None
|
|
117
129
|
creds = service_account.Credentials.from_service_account_info(
|
|
118
130
|
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
119
131
|
)
|
|
@@ -128,6 +140,76 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
128
140
|
|
|
129
141
|
return self._drive_service
|
|
130
142
|
|
|
143
|
+
def _get_looping_google_api_list_response(
|
|
144
|
+
self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
|
|
145
|
+
) -> Iterator[dict[str, Any]]:
|
|
146
|
+
try:
|
|
147
|
+
looping = True
|
|
148
|
+
next_page_token: str | None = None
|
|
149
|
+
while looping:
|
|
150
|
+
rsp = service.list(pageToken=next_page_token, **args).execute()
|
|
151
|
+
next_page_token = rsp.get("nextPageToken")
|
|
152
|
+
items: list[dict[str, Any]] = rsp.get(key)
|
|
153
|
+
|
|
154
|
+
if items is None or len(items) == 0:
|
|
155
|
+
looping = False
|
|
156
|
+
break
|
|
157
|
+
|
|
158
|
+
if rsp.get("nextPageToken") is None:
|
|
159
|
+
looping = False
|
|
160
|
+
else:
|
|
161
|
+
next_page_token = rsp.get("nextPageToken")
|
|
162
|
+
|
|
163
|
+
for item in items:
|
|
164
|
+
yield item
|
|
165
|
+
except Exception as e:
|
|
166
|
+
logger.info(f"There was an error listing {key} with {args}: {str(e)}")
|
|
167
|
+
logger.info(f"Backing off to mocked data for development purposes")
|
|
168
|
+
yield from _get_looping_google_api_list_response(service, key, args)
|
|
169
|
+
|
|
170
|
+
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
171
|
+
domain = self.config.delivery_method.domain
|
|
172
|
+
if not domain:
|
|
173
|
+
raise Exception("No domain was provided")
|
|
174
|
+
if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
|
|
175
|
+
raise Exception("No auth found")
|
|
176
|
+
|
|
177
|
+
directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
|
|
178
|
+
users_api = directory_service.users()
|
|
179
|
+
groups_api = directory_service.groups()
|
|
180
|
+
members_api = directory_service.members()
|
|
181
|
+
|
|
182
|
+
# here is failing
|
|
183
|
+
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
184
|
+
rfp = RemoteFileIdentity(
|
|
185
|
+
id=uuid.uuid4(),
|
|
186
|
+
remote_id=user["primaryEmail"],
|
|
187
|
+
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
188
|
+
email_address=user["primaryEmail"],
|
|
189
|
+
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
190
|
+
type=RemoteFileIdentityType.USER,
|
|
191
|
+
modified_at=datetime_now(),
|
|
192
|
+
)
|
|
193
|
+
yield rfp.dict()
|
|
194
|
+
|
|
195
|
+
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
196
|
+
rfp = RemoteFileIdentity(
|
|
197
|
+
id=uuid.uuid4(),
|
|
198
|
+
remote_id=group["email"],
|
|
199
|
+
name=group["name"],
|
|
200
|
+
email_address=group["email"],
|
|
201
|
+
type=RemoteFileIdentityType.GROUP,
|
|
202
|
+
modified_at=datetime_now(),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
|
|
206
|
+
rfp.member_email_addresses = rfp.member_email_addresses or []
|
|
207
|
+
rfp.member_email_addresses.append(member["email"])
|
|
208
|
+
|
|
209
|
+
yield rfp.dict()
|
|
210
|
+
|
|
211
|
+
return ""
|
|
212
|
+
|
|
131
213
|
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
|
|
132
214
|
"""
|
|
133
215
|
Get all files matching the specified glob patterns.
|
|
@@ -353,9 +435,9 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
353
435
|
description=None,
|
|
354
436
|
)
|
|
355
437
|
|
|
356
|
-
def
|
|
438
|
+
def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
|
357
439
|
remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
|
|
358
|
-
return
|
|
440
|
+
return RemoteFilePermissions(
|
|
359
441
|
id=file.id,
|
|
360
442
|
file_path=file.uri,
|
|
361
443
|
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
+
|
|
3
|
+
from typing import Any, Iterator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _get_looping_google_api_list_response(service: Any, key: str, args: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
|
7
|
+
domain = args.get("domain") or args.get("groupKey")
|
|
8
|
+
|
|
9
|
+
if key == "users":
|
|
10
|
+
# Mocking 5 users
|
|
11
|
+
for i in range(1, 6):
|
|
12
|
+
yield {
|
|
13
|
+
"primaryEmail": f"user{i}@{domain}",
|
|
14
|
+
"name": {"fullName": f"User {i}"},
|
|
15
|
+
"emails": [{"address": f"user{i}@{domain}"}],
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
elif key == "groups":
|
|
19
|
+
# Mocking 5 groups
|
|
20
|
+
for i in range(1, 6):
|
|
21
|
+
yield {
|
|
22
|
+
"email": f"group{i}@{domain}",
|
|
23
|
+
"name": f"Group {i}",
|
|
24
|
+
"id": f"group-id-{i}",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
elif key == "members":
|
|
28
|
+
# Mocking 5 members per group
|
|
29
|
+
group_id = args.get("groupKey")
|
|
30
|
+
for i in range(1, 6):
|
|
31
|
+
yield {
|
|
32
|
+
"email": f"member{i}@{domain}",
|
|
33
|
+
"role": "MEMBER",
|
|
34
|
+
"type": "USER",
|
|
35
|
+
"groupId": group_id,
|
|
36
|
+
}
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
-
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
-
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
-
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
-
source_google_drive/spec.py,sha256=krZ1qRjUP7vV4JLGMgggMkP4kizGG4uj8mX0S0CUwU8,4706
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=8Ibh4vXdPElHfI4KgeBf2HqkQMMVWrV6shLGBdYXUu4,17008
|
|
7
|
-
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
8
|
-
airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/METADATA,sha256=3v6W_Q5H627SdajGIzK_u4r2llp7Uc9SAI700QMkOW4,5536
|
|
9
|
-
airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
-
airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
-
airbyte_source_google_drive-0.2.0.dev202501241656.dist-info/RECORD,,
|
|
File without changes
|