airbyte-source-google-drive 0.2.0.dev202502031727__tar.gz → 0.2.0.dev202502032236__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/PKG-INFO +2 -2
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/pyproject.toml +2 -2
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/spec.py +33 -13
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/stream_reader.py +10 -11
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/README.md +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/__init__.py +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/exceptions.py +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/run.py +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/source.py +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502031727 → airbyte_source_google_drive-0.2.0.dev202502032236}/source_google_drive/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202502032236
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04107)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "0.2.0.
|
|
8
|
+
version = "0.2.0.dev202502032236"
|
|
9
9
|
name = "airbyte-source-google-drive"
|
|
10
10
|
description = "Source implementation for Google Drive."
|
|
11
11
|
authors = [
|
|
@@ -31,7 +31,7 @@ google-api-python-client-stubs = "==1.18.0"
|
|
|
31
31
|
extras = [
|
|
32
32
|
"file-based",
|
|
33
33
|
]
|
|
34
|
-
version = "6.26.0.
|
|
34
|
+
version = "6.26.0.dev04107"
|
|
35
35
|
|
|
36
36
|
[tool.poetry.scripts]
|
|
37
37
|
source-google-drive = "source_google_drive.run:run"
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
5
7
|
from typing import Any, Dict, Literal, Optional, Union
|
|
6
8
|
|
|
7
9
|
import dpath.util
|
|
@@ -11,22 +13,40 @@ from airbyte_cdk import OneOfOptionConfig
|
|
|
11
13
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
12
14
|
AbstractFileBasedSpec,
|
|
13
15
|
DeliverRawFiles,
|
|
16
|
+
DeliverRecords,
|
|
14
17
|
)
|
|
15
18
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
|
|
16
|
-
|
|
19
|
+
DeliverPermissions as DeliverPermissionsBase,
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
class RemoteIdentityType(Enum):
|
|
24
|
+
USER = "user"
|
|
25
|
+
GROUP = "group"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RemoteIdentity(BaseModel):
|
|
29
|
+
id: uuid.UUID
|
|
30
|
+
remote_id: str
|
|
31
|
+
parent_id: str | None = None
|
|
32
|
+
name: str | None = None
|
|
33
|
+
description: str | None = None
|
|
34
|
+
email_address: str | None = None
|
|
35
|
+
member_email_addresses: list[str] | None = None
|
|
36
|
+
type: RemoteIdentityType
|
|
37
|
+
modified_at: datetime
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RemotePermissions(BaseModel):
|
|
41
|
+
id: str
|
|
42
|
+
file_path: str
|
|
43
|
+
allowed_identity_remote_ids: list[str] | None = None
|
|
44
|
+
denied_identity_remote_ids: list[str] | None = None
|
|
45
|
+
publicly_accessible: bool = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DeliverPermissions(DeliverPermissionsBase):
|
|
49
|
+
domain: Optional[str] = Field(title="Domain", description="The Google domain of the identities.", airbyte_hidden=False, order=1)
|
|
30
50
|
|
|
31
51
|
|
|
32
52
|
class OAuthCredentials(BaseModel):
|
|
@@ -77,7 +97,7 @@ class SourceGoogleDriveSpec(AbstractFileBasedSpec, BaseModel):
|
|
|
77
97
|
pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID",
|
|
78
98
|
)
|
|
79
99
|
|
|
80
|
-
delivery_method: DeliverRecords | DeliverRawFiles = Field(
|
|
100
|
+
delivery_method: DeliverRecords | DeliverRawFiles | DeliverPermissions = Field(
|
|
81
101
|
title="Delivery Method",
|
|
82
102
|
discriminator="delivery_type",
|
|
83
103
|
type="object",
|
|
@@ -18,14 +18,13 @@ from googleapiclient.discovery import build
|
|
|
18
18
|
from googleapiclient.http import MediaIoBaseDownload
|
|
19
19
|
|
|
20
20
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
21
|
-
from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
|
|
22
21
|
from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
|
|
23
22
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
24
23
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
25
24
|
from source_google_drive.utils import get_folder_id
|
|
26
25
|
|
|
27
26
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
28
|
-
from .spec import SourceGoogleDriveSpec
|
|
27
|
+
from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -122,7 +121,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
122
121
|
if self.config.credentials.auth_type == "Client":
|
|
123
122
|
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
124
123
|
else:
|
|
125
|
-
scopes = PERMISSIONS_API_SCOPES if self.
|
|
124
|
+
scopes = PERMISSIONS_API_SCOPES if self.include_identities_stream() else None
|
|
126
125
|
creds = service_account.Credentials.from_service_account_info(
|
|
127
126
|
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
128
127
|
)
|
|
@@ -310,7 +309,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
310
309
|
except Exception as e:
|
|
311
310
|
raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
|
|
312
311
|
|
|
313
|
-
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[
|
|
312
|
+
def get_file_permissions(self, file_id: str, file_name: str, logger: logging.Logger) -> Tuple[List[RemoteIdentity], bool]:
|
|
314
313
|
"""
|
|
315
314
|
Retrieves the permissions of a file in Google Drive and checks for public access.
|
|
316
315
|
|
|
@@ -346,13 +345,13 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
346
345
|
except Exception as e:
|
|
347
346
|
raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
|
|
348
347
|
|
|
349
|
-
def _to_remote_file_identity(self, identity: dict[str, Any]) ->
|
|
348
|
+
def _to_remote_file_identity(self, identity: dict[str, Any]) -> RemoteIdentity | None:
|
|
350
349
|
if identity.get("id") in PUBLIC_PERMISSION_IDS:
|
|
351
350
|
return None
|
|
352
351
|
if identity.get("deleted") is True:
|
|
353
352
|
return None
|
|
354
353
|
|
|
355
|
-
return
|
|
354
|
+
return RemoteIdentity(
|
|
356
355
|
modified_at=datetime.now(),
|
|
357
356
|
id=uuid.uuid4(),
|
|
358
357
|
remote_id=identity.get("emailAddress"),
|
|
@@ -364,7 +363,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
364
363
|
|
|
365
364
|
def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
|
366
365
|
remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
|
|
367
|
-
return
|
|
366
|
+
return RemotePermissions(
|
|
368
367
|
id=file.id,
|
|
369
368
|
file_path=file.uri,
|
|
370
369
|
allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
|
|
@@ -410,24 +409,24 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
410
409
|
members_api = directory_service.members()
|
|
411
410
|
|
|
412
411
|
for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
|
|
413
|
-
rfp =
|
|
412
|
+
rfp = RemoteIdentity(
|
|
414
413
|
id=uuid.uuid4(),
|
|
415
414
|
remote_id=user["primaryEmail"],
|
|
416
415
|
name=user["name"]["fullName"] if user["name"] is not None else None,
|
|
417
416
|
email_address=user["primaryEmail"],
|
|
418
417
|
member_email_addresses=[x["address"] for x in user["emails"]],
|
|
419
|
-
type=
|
|
418
|
+
type=RemoteIdentityType.USER,
|
|
420
419
|
modified_at=datetime_now(),
|
|
421
420
|
)
|
|
422
421
|
yield rfp.dict()
|
|
423
422
|
|
|
424
423
|
for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
|
|
425
|
-
rfp =
|
|
424
|
+
rfp = RemoteIdentity(
|
|
426
425
|
id=uuid.uuid4(),
|
|
427
426
|
remote_id=group["email"],
|
|
428
427
|
name=group["name"],
|
|
429
428
|
email_address=group["email"],
|
|
430
|
-
type=
|
|
429
|
+
type=RemoteIdentityType.GROUP,
|
|
431
430
|
modified_at=datetime_now(),
|
|
432
431
|
)
|
|
433
432
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|