airbyte-source-google-drive 0.2.0.dev202502031727__py3-none-any.whl → 0.2.0.dev202502052009__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202502031727.dist-info → airbyte_source_google_drive-0.2.0.dev202502052009.dist-info}/METADATA +2 -2
- {airbyte_source_google_drive-0.2.0.dev202502031727.dist-info → airbyte_source_google_drive-0.2.0.dev202502052009.dist-info}/RECORD +6 -6
- source_google_drive/spec.py +33 -13
- source_google_drive/stream_reader.py +43 -38
- {airbyte_source_google_drive-0.2.0.dev202502031727.dist-info → airbyte_source_google_drive-0.2.0.dev202502052009.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727.dist-info → airbyte_source_google_drive-0.2.0.dev202502052009.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202502052009
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04108)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -2,10 +2,10 @@ source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-B
|
|
|
2
2
|
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
3
|
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
4
|
source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
|
|
5
|
-
source_google_drive/spec.py,sha256
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=
|
|
5
|
+
source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=jKMibQGjpP-9raJ557TqbOg6iGVuPYv9vamStwXDOCQ,20176
|
|
7
7
|
source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
|
|
8
|
-
airbyte_source_google_drive-0.2.0.
|
|
9
|
-
airbyte_source_google_drive-0.2.0.
|
|
10
|
-
airbyte_source_google_drive-0.2.0.
|
|
11
|
-
airbyte_source_google_drive-0.2.0.
|
|
8
|
+
airbyte_source_google_drive-0.2.0.dev202502052009.dist-info/METADATA,sha256=fiVV_jIGTrIB43tzlnCv3BAkJEbJqr8BYtB25-vIUBU,5536
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202502052009.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202502052009.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202502052009.dist-info/RECORD,,
|
source_google_drive/spec.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
5
7
|
from typing import Any, Dict, Literal, Optional, Union
|
|
6
8
|
|
|
7
9
|
import dpath.util
|
|
@@ -11,22 +13,40 @@ from airbyte_cdk import OneOfOptionConfig
|
|
|
11
13
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
12
14
|
AbstractFileBasedSpec,
|
|
13
15
|
DeliverRawFiles,
|
|
16
|
+
DeliverRecords,
|
|
14
17
|
)
|
|
15
18
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
16
|
-
|
|
19
|
+
DeliverPermissions as DeliverPermissionsBase,
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
class RemoteIdentityType(Enum):
|
|
24
|
+
USER = "user"
|
|
25
|
+
GROUP = "group"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RemoteIdentity(BaseModel):
|
|
29
|
+
id: uuid.UUID
|
|
30
|
+
remote_id: str
|
|
31
|
+
parent_id: str | None = None
|
|
32
|
+
name: str | None = None
|
|
33
|
+
description: str | None = None
|
|
34
|
+
email_address: str | None = None
|
|
35
|
+
member_email_addresses: list[str] | None = None
|
|
36
|
+
type: RemoteIdentityType
|
|
37
|
+
modified_at: datetime
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RemotePermissions(BaseModel):
|
|
41
|
+
id: str
|
|
42
|
+
file_path: str
|
|
43
|
+
allowed_identity_remote_ids: list[str] | None = None
|
|
44
|
+
denied_identity_remote_ids: list[str] | None = None
|
|
45
|
+
publicly_accessible: bool = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DeliverPermissions(DeliverPermissionsBase):
|
|
49
|
+
domain: Optional[str] = Field(title="Domain", description="The Google domain of the identities.", airbyte_hidden=False, order=1)
|
|
30
50
|
|
|
31
51
|
|
|
32
52
|
class OAuthCredentials(BaseModel):
|
|
@@ -77,7 +97,7 @@ class SourceGoogleDriveSpec(AbstractFileBasedSpec, BaseModel):
|
|
|
77
97
|
pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID",
|
|
78
98
|
)
|
|
79
99
|
|
|
80
|
-
delivery_method: DeliverRecords | DeliverRawFiles = Field(
|
|
100
|
+
delivery_method: DeliverRecords | DeliverRawFiles | DeliverPermissions = Field(
|
|
81
101
|
title="Delivery Method",
|
|
82
102
|
discriminator="delivery_type",
|
|
83
103
|
type="object",
|
|
@@ -18,14 +18,13 @@ from googleapiclient.discovery import build
|
|
|
18
18
|
from googleapiclient.http import MediaIoBaseDownload
|
|
19
19
|
|
|
20
20
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
21
|
-
from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
|
|
22
21
|
from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
|
|
23
22
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
24
23
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
25
24
|
from source_google_drive.utils import get_folder_id
|
|
26
25
|
|
|
27
26
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
28
|
-
from .spec import SourceGoogleDriveSpec
|
|
27
|
+
from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -64,11 +63,8 @@ PUBLIC_PERMISSION_IDS = [
|
|
|
64
63
|
"domainWithLink",
|
|
65
64
|
]
|
|
66
65
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"https://www.googleapis.com/auth/drive.readonly",
|
|
70
|
-
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
71
|
-
"https://www.googleapis.com/auth/drive.file",
|
|
66
|
+
|
|
67
|
+
DRIVE_SERVICE_SCOPES = [
|
|
72
68
|
"https://www.googleapis.com/auth/admin.directory.group.readonly",
|
|
73
69
|
"https://www.googleapis.com/auth/admin.directory.group.member.readonly",
|
|
74
70
|
"https://www.googleapis.com/auth/admin.directory.user.readonly",
|
|
@@ -92,6 +88,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
92
88
|
def __init__(self):
|
|
93
89
|
super().__init__()
|
|
94
90
|
self._drive_service = None
|
|
91
|
+
self._directory_service = None
|
|
95
92
|
|
|
96
93
|
@property
|
|
97
94
|
def config(self) -> SourceGoogleDriveSpec:
|
|
@@ -111,32 +108,41 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
111
108
|
assert isinstance(value, SourceGoogleDriveSpec)
|
|
112
109
|
self._config = value
|
|
113
110
|
|
|
114
|
-
|
|
115
|
-
def google_drive_service(self):
|
|
111
|
+
def _build_google_service(self, service_name: str, version: str, scopes: List[str] = None):
|
|
116
112
|
if self.config is None:
|
|
117
113
|
# We shouldn't hit this; config should always get set before attempting to
|
|
118
114
|
# list or read files.
|
|
119
|
-
raise ValueError("Source config is missing; cannot create the Google
|
|
115
|
+
raise ValueError(f"Source config is missing; cannot create the Google {service_name} client.")
|
|
120
116
|
try:
|
|
121
|
-
if self.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
)
|
|
129
|
-
self._drive_service = build("drive", "v3", credentials=creds)
|
|
117
|
+
if self.config.credentials.auth_type == "Client":
|
|
118
|
+
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
119
|
+
else:
|
|
120
|
+
creds = service_account.Credentials.from_service_account_info(
|
|
121
|
+
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
122
|
+
)
|
|
123
|
+
google_service = build(service_name, version, credentials=creds)
|
|
130
124
|
except Exception as e:
|
|
131
125
|
raise AirbyteTracedException(
|
|
132
126
|
internal_message=str(e),
|
|
133
|
-
message="Could not authenticate with Google
|
|
127
|
+
message=f"Could not authenticate with Google {service_name}. Please check your credentials.",
|
|
134
128
|
failure_type=FailureType.config_error,
|
|
135
129
|
exception=e,
|
|
136
130
|
)
|
|
137
131
|
|
|
132
|
+
return google_service
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def google_drive_service(self):
|
|
136
|
+
if self._drive_service is None:
|
|
137
|
+
self._drive_service = self._build_google_service("drive", "v3")
|
|
138
138
|
return self._drive_service
|
|
139
139
|
|
|
140
|
+
@property
|
|
141
|
+
def google_directory_service(self):
|
|
142
|
+
if self._directory_service is None:
|
|
143
|
+
self._directory_service = self._build_google_service("admin", "directory_v1", DRIVE_SERVICE_SCOPES)
|
|
144
|
+
return self._directory_service
|
|
145
|
+
|
|
140
146
|
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
|
|
141
147
|
"""
|
|
142
148
|
Get all files matching the specified glob patterns.
|
|
@@ -310,7 +316,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
310
316
|
except Exception as e:
|
|
311
317
|
raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
|
|
312
318
|
|
|
313
|
-
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[
|
|
319
|
+
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteIdentity], bool]:
|
|
314
320
|
"""
|
|
315
321
|
Retrieves the permissions of a file in Google Drive and checks for public access.
|
|
316
322
|
|
|
@@ -341,18 +347,17 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
341
347
|
if identity is not None:
|
|
342
348
|
remote_identities.append(identity)
|
|
343
349
|
|
|
344
|
-
logger.info(f"File {file_name} has {len(remote_identities)} valid permissions")
|
|
345
350
|
return remote_identities, is_public
|
|
346
351
|
except Exception as e:
|
|
347
352
|
raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
|
|
348
353
|
|
|
349
|
-
def _to_remote_file_identity(self, identity: dict[str, Any]) ->
|
|
354
|
+
def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteIdentity | None:
|
|
350
355
|
if identity.get("id") in PUBLIC_PERMISSION_IDS:
|
|
351
356
|
return None
|
|
352
357
|
if identity.get("deleted") is True:
|
|
353
358
|
return None
|
|
354
359
|
|
|
355
|
-
return
|
|
360
|
+
return RemoteIdentity(
|
|
356
361
|
modified_at=datetime.now(),
|
|
357
362
|
id=uuid.uuid4(),
|
|
358
363
|
remote_id=identity.get("emailAddress"),
|
|
@@ -364,7 +369,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
364
369
|
|
|
365
370
|
def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
|
366
371
|
remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
|
|
367
|
-
return
|
|
372
|
+
return RemotePermissions(
|
|
368
373
|
id=file.id,
|
|
369
374
|
file_path=file.uri,
|
|
370
375
|
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
@@ -400,34 +405,34 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
400
405
|
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
401
406
|
domain = self.config.delivery_method.domain
|
|
402
407
|
if not domain:
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
408
|
+
logger.info("No domain provided. Trying to fetch identities from the user workspace.")
|
|
409
|
+
api_args = {"customer": "my_customer"}
|
|
410
|
+
else:
|
|
411
|
+
api_args = {"domain": domain}
|
|
406
412
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
members_api = directory_service.members()
|
|
413
|
+
users_api = self.google_directory_service.users()
|
|
414
|
+
groups_api = self.google_directory_service.groups()
|
|
415
|
+
members_api = self.google_directory_service.members()
|
|
411
416
|
|
|
412
|
-
for user in self._get_looping_google_api_list_response(users_api, "users",
|
|
413
|
-
rfp =
|
|
417
|
+
for user in self._get_looping_google_api_list_response(users_api, "users", args=api_args, logger=logger):
|
|
418
|
+
rfp = RemoteIdentity(
|
|
414
419
|
id=uuid.uuid4(),
|
|
415
420
|
remote_id=user["primaryEmail"],
|
|
416
421
|
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
417
422
|
email_address=user["primaryEmail"],
|
|
418
423
|
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
419
|
-
type=
|
|
424
|
+
type=RemoteIdentityType.USER,
|
|
420
425
|
modified_at=datetime_now(),
|
|
421
426
|
)
|
|
422
427
|
yield rfp.dict()
|
|
423
428
|
|
|
424
|
-
for group in self._get_looping_google_api_list_response(groups_api, "groups",
|
|
425
|
-
rfp =
|
|
429
|
+
for group in self._get_looping_google_api_list_response(groups_api, "groups", args=api_args, logger=logger):
|
|
430
|
+
rfp = RemoteIdentity(
|
|
426
431
|
id=uuid.uuid4(),
|
|
427
432
|
remote_id=group["email"],
|
|
428
433
|
name=group["name"],
|
|
429
434
|
email_address=group["email"],
|
|
430
|
-
type=
|
|
435
|
+
type=RemoteIdentityType.GROUP,
|
|
431
436
|
modified_at=datetime_now(),
|
|
432
437
|
)
|
|
433
438
|
|
|
File without changes
|