airbyte-source-google-drive 0.2.0.dev202502032236__py3-none-any.whl → 0.2.0.dev202502061746__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.2.0.dev202502032236.dist-info → airbyte_source_google_drive-0.2.0.dev202502061746.dist-info}/METADATA +2 -2
- {airbyte_source_google_drive-0.2.0.dev202502032236.dist-info → airbyte_source_google_drive-0.2.0.dev202502061746.dist-info}/RECORD +6 -6
- source_google_drive/source.py +14 -2
- source_google_drive/stream_reader.py +34 -28
- {airbyte_source_google_drive-0.2.0.dev202502032236.dist-info → airbyte_source_google_drive-0.2.0.dev202502061746.dist-info}/WHEEL +0 -0
- {airbyte_source_google_drive-0.2.0.dev202502032236.dist-info → airbyte_source_google_drive-0.2.0.dev202502061746.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.2.0.
|
|
3
|
+
Version: 0.2.0.dev202502061746
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04108)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
|
|
2
2
|
source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
|
|
3
3
|
source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
|
|
4
|
-
source_google_drive/source.py,sha256=
|
|
4
|
+
source_google_drive/source.py,sha256=tOljkgeg8RIxmhrpfa-qgGhI6WPhiL_-I8itTa6JH04,3959
|
|
5
5
|
source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
|
|
6
|
-
source_google_drive/stream_reader.py,sha256=
|
|
6
|
+
source_google_drive/stream_reader.py,sha256=jKMibQGjpP-9raJ557TqbOg6iGVuPYv9vamStwXDOCQ,20176
|
|
7
7
|
source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
|
|
8
|
-
airbyte_source_google_drive-0.2.0.
|
|
9
|
-
airbyte_source_google_drive-0.2.0.
|
|
10
|
-
airbyte_source_google_drive-0.2.0.
|
|
11
|
-
airbyte_source_google_drive-0.2.0.
|
|
8
|
+
airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/METADATA,sha256=pqQMdjznFYJxr7fcCYHXaLBZcESfHaSS7o0jr2PUH2s,5536
|
|
9
|
+
airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
10
|
+
airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
|
|
11
|
+
airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/RECORD,,
|
source_google_drive/source.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
from typing import Any, Mapping, Optional
|
|
7
7
|
|
|
8
8
|
from airbyte_cdk import AdvancedAuth, ConfiguredAirbyteCatalog, ConnectorSpecification, OAuthConfigSpecification, TState
|
|
9
|
-
from airbyte_cdk.models import AuthFlowType
|
|
9
|
+
from airbyte_cdk.models import AuthFlowType, OauthConnectorInputSpecification
|
|
10
10
|
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
|
11
11
|
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
|
|
12
12
|
from source_google_drive.spec import SourceGoogleDriveSpec
|
|
@@ -28,6 +28,11 @@ class SourceGoogleDrive(FileBasedSource):
|
|
|
28
28
|
"""
|
|
29
29
|
Returns the specification describing what fields can be configured by a user when setting up a file-based source.
|
|
30
30
|
"""
|
|
31
|
+
oauth_connector_input_specification = OauthConnectorInputSpecification(
|
|
32
|
+
consent_url="https://accounts.google.com/o/oauth2/v2/auth?{{client_id_param}}&{{redirect_uri_param}}&response_type=code&{{scope_param}}&access_type=offline&{{state_param}}&include_granted_scopes=true&prompt=consent",
|
|
33
|
+
access_token_url="https://oauth2.googleapis.com/token?{{client_id_param}}&{{client_secret_param}}&{{auth_code_param}}&{{redirect_uri_param}}&grant_type=authorization_code",
|
|
34
|
+
scope="https://www.googleapis.com/auth/drive.readonly https://www.googleapis.com/auth/admin.directory.group.readonly https://www.googleapis.com/auth/admin.directory.group.member.readonly https://www.googleapis.com/auth/admin.directory.user.readonly",
|
|
35
|
+
)
|
|
31
36
|
|
|
32
37
|
return ConnectorSpecification(
|
|
33
38
|
documentationUrl=self.spec_class.documentation_url(),
|
|
@@ -37,10 +42,17 @@ class SourceGoogleDrive(FileBasedSource):
|
|
|
37
42
|
predicate_key=["credentials", "auth_type"],
|
|
38
43
|
predicate_value="Client",
|
|
39
44
|
oauth_config_specification=OAuthConfigSpecification(
|
|
45
|
+
oauth_connector_input_specification=oauth_connector_input_specification,
|
|
40
46
|
complete_oauth_output_specification={
|
|
41
47
|
"type": "object",
|
|
42
48
|
"additionalProperties": False,
|
|
43
|
-
"properties": {
|
|
49
|
+
"properties": {
|
|
50
|
+
"refresh_token": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"path_in_connector_config": ["credentials", "refresh_token"],
|
|
53
|
+
"path_in_oauth_response": ["refresh_token"],
|
|
54
|
+
}
|
|
55
|
+
},
|
|
44
56
|
},
|
|
45
57
|
complete_oauth_server_input_specification={
|
|
46
58
|
"type": "object",
|
|
@@ -63,11 +63,8 @@ PUBLIC_PERMISSION_IDS = [
|
|
|
63
63
|
"domainWithLink",
|
|
64
64
|
]
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"https://www.googleapis.com/auth/drive.readonly",
|
|
69
|
-
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
70
|
-
"https://www.googleapis.com/auth/drive.file",
|
|
66
|
+
|
|
67
|
+
DRIVE_SERVICE_SCOPES = [
|
|
71
68
|
"https://www.googleapis.com/auth/admin.directory.group.readonly",
|
|
72
69
|
"https://www.googleapis.com/auth/admin.directory.group.member.readonly",
|
|
73
70
|
"https://www.googleapis.com/auth/admin.directory.user.readonly",
|
|
@@ -91,6 +88,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
91
88
|
def __init__(self):
|
|
92
89
|
super().__init__()
|
|
93
90
|
self._drive_service = None
|
|
91
|
+
self._directory_service = None
|
|
94
92
|
|
|
95
93
|
@property
|
|
96
94
|
def config(self) -> SourceGoogleDriveSpec:
|
|
@@ -110,32 +108,41 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
110
108
|
assert isinstance(value, SourceGoogleDriveSpec)
|
|
111
109
|
self._config = value
|
|
112
110
|
|
|
113
|
-
|
|
114
|
-
def google_drive_service(self):
|
|
111
|
+
def _build_google_service(self, service_name: str, version: str, scopes: List[str] = None):
|
|
115
112
|
if self.config is None:
|
|
116
113
|
# We shouldn't hit this; config should always get set before attempting to
|
|
117
114
|
# list or read files.
|
|
118
|
-
raise ValueError("Source config is missing; cannot create the Google
|
|
115
|
+
raise ValueError(f"Source config is missing; cannot create the Google {service_name} client.")
|
|
119
116
|
try:
|
|
120
|
-
if self.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
self._drive_service = build("drive", "v3", credentials=creds)
|
|
117
|
+
if self.config.credentials.auth_type == "Client":
|
|
118
|
+
creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
|
|
119
|
+
else:
|
|
120
|
+
creds = service_account.Credentials.from_service_account_info(
|
|
121
|
+
json.loads(self.config.credentials.service_account_info), scopes=scopes
|
|
122
|
+
)
|
|
123
|
+
google_service = build(service_name, version, credentials=creds)
|
|
129
124
|
except Exception as e:
|
|
130
125
|
raise AirbyteTracedException(
|
|
131
126
|
internal_message=str(e),
|
|
132
|
-
message="Could not authenticate with Google
|
|
127
|
+
message=f"Could not authenticate with Google {service_name}. Please check your credentials.",
|
|
133
128
|
failure_type=FailureType.config_error,
|
|
134
129
|
exception=e,
|
|
135
130
|
)
|
|
136
131
|
|
|
132
|
+
return google_service
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def google_drive_service(self):
|
|
136
|
+
if self._drive_service is None:
|
|
137
|
+
self._drive_service = self._build_google_service("drive", "v3")
|
|
137
138
|
return self._drive_service
|
|
138
139
|
|
|
140
|
+
@property
|
|
141
|
+
def google_directory_service(self):
|
|
142
|
+
if self._directory_service is None:
|
|
143
|
+
self._directory_service = self._build_google_service("admin", "directory_v1", DRIVE_SERVICE_SCOPES)
|
|
144
|
+
return self._directory_service
|
|
145
|
+
|
|
139
146
|
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
|
|
140
147
|
"""
|
|
141
148
|
Get all files matching the specified glob patterns.
|
|
@@ -340,7 +347,6 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
340
347
|
if identity is not None:
|
|
341
348
|
remote_identities.append(identity)
|
|
342
349
|
|
|
343
|
-
logger.info(f"File {file_name} has {len(remote_identities)} valid permissions")
|
|
344
350
|
return remote_identities, is_public
|
|
345
351
|
except Exception as e:
|
|
346
352
|
raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
|
|
@@ -399,16 +405,16 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
399
405
|
def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
|
|
400
406
|
domain = self.config.delivery_method.domain
|
|
401
407
|
if not domain:
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
408
|
+
logger.info("No domain provided. Trying to fetch identities from the user workspace.")
|
|
409
|
+
api_args = {"customer": "my_customer"}
|
|
410
|
+
else:
|
|
411
|
+
api_args = {"domain": domain}
|
|
405
412
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
members_api = directory_service.members()
|
|
413
|
+
users_api = self.google_directory_service.users()
|
|
414
|
+
groups_api = self.google_directory_service.groups()
|
|
415
|
+
members_api = self.google_directory_service.members()
|
|
410
416
|
|
|
411
|
-
for user in self._get_looping_google_api_list_response(users_api, "users",
|
|
417
|
+
for user in self._get_looping_google_api_list_response(users_api, "users", args=api_args, logger=logger):
|
|
412
418
|
rfp = RemoteIdentity(
|
|
413
419
|
id=uuid.uuid4(),
|
|
414
420
|
remote_id=user["primaryEmail"],
|
|
@@ -420,7 +426,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
420
426
|
)
|
|
421
427
|
yield rfp.dict()
|
|
422
428
|
|
|
423
|
-
for group in self._get_looping_google_api_list_response(groups_api, "groups",
|
|
429
|
+
for group in self._get_looping_google_api_list_response(groups_api, "groups", args=api_args, logger=logger):
|
|
424
430
|
rfp = RemoteIdentity(
|
|
425
431
|
id=uuid.uuid4(),
|
|
426
432
|
remote_id=group["email"],
|
|
File without changes
|