airbyte-source-google-drive 0.2.0.dev202501272304__py3-none-any.whl → 0.2.0.dev202502031727__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202501272304.dist-info → airbyte_source_google_drive-0.2.0.dev202502031727.dist-info}/METADATA +2 -2
- airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/RECORD +11 -0
- source_google_drive/stream_reader.py +66 -73
- source_google_drive/utils.py +2 -0
- airbyte_source_google_drive-0.2.0.dev202501272304.dist-info/RECORD +0 -12
- source_google_drive/temp_mock.py +0 -36
- {airbyte_source_google_drive-0.2.0.dev202501272304.dist-info → airbyte_source_google_drive-0.2.0.dev202502031727.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.2.0.dev202501272304.dist-info → airbyte_source_google_drive-0.2.0.dev202502031727.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202502031727
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04106)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
+
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
+
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
+
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
+
source_google_drive/spec.py,sha256=YPOmOInQfmwHeB6Ay2z78LNvHsggncwU1bchQOsAZSk,4268
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=h4p3aafE8U9BqYgRBGXuADEhm4NCGMqYl2ExnQkj6bU,20266
|
|
7
|
+
source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
|
|
8
|
+
airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/METADATA,sha256=rMydjQTVW8_21SbChXDmp3i-LbEYiZi1OwOPvEKHh_k,5536
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202502031727.dist-info/RECORD,,
|
|
@@ -27,9 +27,6 @@ from source_google_drive.utils import get_folder_id
|
|
|
27
27
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
28
28
|
from .spec import SourceGoogleDriveSpec
|
|
29
29
|
|
|
30
|
-
# remove this, just using while test credentials are prepared
|
|
31
|
-
from .temp_mock import _get_looping_google_api_list_response
|
|
32
|
-
|
|
33
30
|
|
|
34
31
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
35
32
|
GOOGLE_DOC_MIME_TYPE = "application/vnd.google-apps.document"
|
|
@@ -140,76 +137,6 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
140
137
|
|
|
141
138
|
return self._drive_service
|
|
142
139
|
|
|
143
|
-
def _get_looping_google_api_list_response(
|
|
144
|
-
self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
|
|
145
|
-
) -> Iterator[dict[str, Any]]:
|
|
146
|
-
try:
|
|
147
|
-
looping = True
|
|
148
|
-
next_page_token: str | None = None
|
|
149
|
-
while looping:
|
|
150
|
-
rsp = service.list(pageToken=next_page_token, **args).execute()
|
|
151
|
-
next_page_token = rsp.get("nextPageToken")
|
|
152
|
-
items: list[dict[str, Any]] = rsp.get(key)
|
|
153
|
-
|
|
154
|
-
if items is None or len(items) == 0:
|
|
155
|
-
looping = False
|
|
156
|
-
break
|
|
157
|
-
|
|
158
|
-
if rsp.get("nextPageToken") is None:
|
|
159
|
-
looping = False
|
|
160
|
-
else:
|
|
161
|
-
next_page_token = rsp.get("nextPageToken")
|
|
162
|
-
|
|
163
|
-
for item in items:
|
|
164
|
-
yield item
|
|
165
|
-
except Exception as e:
|
|
166
|
-
logger.info(f"There was an error listing {key} with {args}: {str(e)}")
|
|
167
|
-
logger.info(f"Backing off to mocked data for development purposes")
|
|
168
|
-
yield from _get_looping_google_api_list_response(service, key, args)
|
|
169
|
-
|
|
170
|
-
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
171
|
-
domain = self.config.delivery_method.domain
|
|
172
|
-
if not domain:
|
|
173
|
-
raise Exception("No domain was provided")
|
|
174
|
-
if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
|
|
175
|
-
raise Exception("No auth found")
|
|
176
|
-
|
|
177
|
-
directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
|
|
178
|
-
users_api = directory_service.users()
|
|
179
|
-
groups_api = directory_service.groups()
|
|
180
|
-
members_api = directory_service.members()
|
|
181
|
-
|
|
182
|
-
# here is failing
|
|
183
|
-
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
184
|
-
rfp = RemoteFileIdentity(
|
|
185
|
-
id=uuid.uuid4(),
|
|
186
|
-
remote_id=user["primaryEmail"],
|
|
187
|
-
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
188
|
-
email_address=user["primaryEmail"],
|
|
189
|
-
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
190
|
-
type=RemoteFileIdentityType.USER,
|
|
191
|
-
modified_at=datetime_now(),
|
|
192
|
-
)
|
|
193
|
-
yield rfp.dict()
|
|
194
|
-
|
|
195
|
-
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
196
|
-
rfp = RemoteFileIdentity(
|
|
197
|
-
id=uuid.uuid4(),
|
|
198
|
-
remote_id=group["email"],
|
|
199
|
-
name=group["name"],
|
|
200
|
-
email_address=group["email"],
|
|
201
|
-
type=RemoteFileIdentityType.GROUP,
|
|
202
|
-
modified_at=datetime_now(),
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
|
|
206
|
-
rfp.member_email_addresses = rfp.member_email_addresses or []
|
|
207
|
-
rfp.member_email_addresses.append(member["email"])
|
|
208
|
-
|
|
209
|
-
yield rfp.dict()
|
|
210
|
-
|
|
211
|
-
return ""
|
|
212
|
-
|
|
213
140
|
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
|
|
214
141
|
"""
|
|
215
142
|
Get all files matching the specified glob patterns.
|
|
@@ -443,3 +370,69 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
443
370
|
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
444
371
|
publicly_accessible=is_public,
|
|
445
372
|
).dict(exclude_none=True)
|
|
373
|
+
|
|
374
|
+
def _get_looping_google_api_list_response(
|
|
375
|
+
self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
|
|
376
|
+
) -> Iterator[dict[str, Any]]:
|
|
377
|
+
try:
|
|
378
|
+
looping = True
|
|
379
|
+
next_page_token: str | None = None
|
|
380
|
+
while looping:
|
|
381
|
+
rsp = service.list(pageToken=next_page_token, **args).execute()
|
|
382
|
+
next_page_token = rsp.get("nextPageToken")
|
|
383
|
+
items: list[dict[str, Any]] = rsp.get(key)
|
|
384
|
+
|
|
385
|
+
if items is None or len(items) == 0:
|
|
386
|
+
looping = False
|
|
387
|
+
break
|
|
388
|
+
|
|
389
|
+
if rsp.get("nextPageToken") is None:
|
|
390
|
+
looping = False
|
|
391
|
+
else:
|
|
392
|
+
next_page_token = rsp.get("nextPageToken")
|
|
393
|
+
|
|
394
|
+
for item in items:
|
|
395
|
+
yield item
|
|
396
|
+
except Exception as e:
|
|
397
|
+
logger.error(f"There was an error listing {key} with {args}: {str(e)}")
|
|
398
|
+
raise e
|
|
399
|
+
|
|
400
|
+
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
401
|
+
domain = self.config.delivery_method.domain
|
|
402
|
+
if not domain:
|
|
403
|
+
raise Exception("No domain was provided")
|
|
404
|
+
if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
|
|
405
|
+
raise Exception("No auth found")
|
|
406
|
+
|
|
407
|
+
directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
|
|
408
|
+
users_api = directory_service.users()
|
|
409
|
+
groups_api = directory_service.groups()
|
|
410
|
+
members_api = directory_service.members()
|
|
411
|
+
|
|
412
|
+
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
413
|
+
rfp = RemoteFileIdentity(
|
|
414
|
+
id=uuid.uuid4(),
|
|
415
|
+
remote_id=user["primaryEmail"],
|
|
416
|
+
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
417
|
+
email_address=user["primaryEmail"],
|
|
418
|
+
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
419
|
+
type=RemoteFileIdentityType.USER,
|
|
420
|
+
modified_at=datetime_now(),
|
|
421
|
+
)
|
|
422
|
+
yield rfp.dict()
|
|
423
|
+
|
|
424
|
+
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
425
|
+
rfp = RemoteFileIdentity(
|
|
426
|
+
id=uuid.uuid4(),
|
|
427
|
+
remote_id=group["email"],
|
|
428
|
+
name=group["name"],
|
|
429
|
+
email_address=group["email"],
|
|
430
|
+
type=RemoteFileIdentityType.GROUP,
|
|
431
|
+
modified_at=datetime_now(),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
|
|
435
|
+
rfp.member_email_addresses = rfp.member_email_addresses or []
|
|
436
|
+
rfp.member_email_addresses.append(member["email"])
|
|
437
|
+
|
|
438
|
+
yield rfp.dict()
|
source_google_drive/utils.py
CHANGED
|
@@ -15,6 +15,8 @@ def get_folder_id(url_string: str) -> str:
|
|
|
15
15
|
if parsed_url.scheme != "https" or parsed_url.netloc != "drive.google.com":
|
|
16
16
|
raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
|
|
17
17
|
path_segments = list(filter(None, parsed_url.path.split("/")))
|
|
18
|
+
if path_segments[-1] in ["my-drive", "home"]:
|
|
19
|
+
return "root"
|
|
18
20
|
if path_segments[-2] != "folders" or len(path_segments) < 3:
|
|
19
21
|
raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
|
|
20
22
|
return path_segments[-1]
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
|
-
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
|
-
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
-
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
-
source_google_drive/spec.py,sha256=YPOmOInQfmwHeB6Ay2z78LNvHsggncwU1bchQOsAZSk,4268
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=6dQOzlsMB2XoMIAdJH97Rb_dKVyDQaWeepXAN0GxxzA,20575
|
|
7
|
-
source_google_drive/temp_mock.py,sha256=NdvsAltcZBIqnCBOQuU9R3_yEV5v-bHBR9T_BdjPtU4,1109
|
|
8
|
-
source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
|
|
9
|
-
airbyte_source_google_drive-0.2.0.dev202501272304.dist-info/METADATA,sha256=bSvWdrIpQ1GRLS1JDsssYJy2aP0BuaZMe8IfEtyDNP8,5536
|
|
10
|
-
airbyte_source_google_drive-0.2.0.dev202501272304.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
11
|
-
airbyte_source_google_drive-0.2.0.dev202501272304.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
12
|
-
airbyte_source_google_drive-0.2.0.dev202501272304.dist-info/RECORD,,
|
source_google_drive/temp_mock.py
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
-
|
|
3
|
-
from typing import Any, Iterator
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _get_looping_google_api_list_response(service: Any, key: str, args: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
|
7
|
-
domain = args.get("domain") or args.get("groupKey")
|
|
8
|
-
|
|
9
|
-
if key == "users":
|
|
10
|
-
# Mocking 5 users
|
|
11
|
-
for i in range(1, 6):
|
|
12
|
-
yield {
|
|
13
|
-
"primaryEmail": f"user{i}@{domain}",
|
|
14
|
-
"name": {"fullName": f"User {i}"},
|
|
15
|
-
"emails": [{"address": f"user{i}@{domain}"}],
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
elif key == "groups":
|
|
19
|
-
# Mocking 5 groups
|
|
20
|
-
for i in range(1, 6):
|
|
21
|
-
yield {
|
|
22
|
-
"email": f"group{i}@{domain}",
|
|
23
|
-
"name": f"Group {i}",
|
|
24
|
-
"id": f"group-id-{i}",
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
elif key == "members":
|
|
28
|
-
# Mocking 5 members per group
|
|
29
|
-
group_id = args.get("groupKey")
|
|
30
|
-
for i in range(1, 6):
|
|
31
|
-
yield {
|
|
32
|
-
"email": f"member{i}@{domain}",
|
|
33
|
-
"role": "MEMBER",
|
|
34
|
-
"type": "USER",
|
|
35
|
-
"groupId": group_id,
|
|
36
|
-
}
|
|
File without changes
|