airbyte-source-google-drive 0.2.0.dev202502032236__py3-none-any.whl → 0.2.0.dev202502061746__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.2.0.dev202502032236
3
+ Version: 0.2.0.dev202502061746
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04107)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04108)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -1,11 +1,11 @@
1
1
  source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
2
2
  source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
3
  source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
- source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
4
+ source_google_drive/source.py,sha256=tOljkgeg8RIxmhrpfa-qgGhI6WPhiL_-I8itTa6JH04,3959
5
5
  source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
6
- source_google_drive/stream_reader.py,sha256=8F0jEKz1ElM2wqHxVR91RUJfCSDoLtMQUxbX5lFkEy0,20166
6
+ source_google_drive/stream_reader.py,sha256=jKMibQGjpP-9raJ557TqbOg6iGVuPYv9vamStwXDOCQ,20176
7
7
  source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
8
- airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/METADATA,sha256=Hfb2iDOlay912TAmyj3QZ9GqvyVstYZflM_J2lpf2Lk,5536
9
- airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
- airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
- airbyte_source_google_drive-0.2.0.dev202502032236.dist-info/RECORD,,
8
+ airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/METADATA,sha256=pqQMdjznFYJxr7fcCYHXaLBZcESfHaSS7o0jr2PUH2s,5536
9
+ airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
+ airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
+ airbyte_source_google_drive-0.2.0.dev202502061746.dist-info/RECORD,,
@@ -6,7 +6,7 @@
6
6
  from typing import Any, Mapping, Optional
7
7
 
8
8
  from airbyte_cdk import AdvancedAuth, ConfiguredAirbyteCatalog, ConnectorSpecification, OAuthConfigSpecification, TState
9
- from airbyte_cdk.models import AuthFlowType
9
+ from airbyte_cdk.models import AuthFlowType, OauthConnectorInputSpecification
10
10
  from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
11
11
  from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
12
12
  from source_google_drive.spec import SourceGoogleDriveSpec
@@ -28,6 +28,11 @@ class SourceGoogleDrive(FileBasedSource):
28
28
  """
29
29
  Returns the specification describing what fields can be configured by a user when setting up a file-based source.
30
30
  """
31
+ oauth_connector_input_specification = OauthConnectorInputSpecification(
32
+ consent_url="https://accounts.google.com/o/oauth2/v2/auth?{{client_id_param}}&{{redirect_uri_param}}&response_type=code&{{scope_param}}&access_type=offline&{{state_param}}&include_granted_scopes=true&prompt=consent",
33
+ access_token_url="https://oauth2.googleapis.com/token?{{client_id_param}}&{{client_secret_param}}&{{auth_code_param}}&{{redirect_uri_param}}&grant_type=authorization_code",
34
+ scope="https://www.googleapis.com/auth/drive.readonly https://www.googleapis.com/auth/admin.directory.group.readonly https://www.googleapis.com/auth/admin.directory.group.member.readonly https://www.googleapis.com/auth/admin.directory.user.readonly",
35
+ )
31
36
 
32
37
  return ConnectorSpecification(
33
38
  documentationUrl=self.spec_class.documentation_url(),
@@ -37,10 +42,17 @@ class SourceGoogleDrive(FileBasedSource):
37
42
  predicate_key=["credentials", "auth_type"],
38
43
  predicate_value="Client",
39
44
  oauth_config_specification=OAuthConfigSpecification(
45
+ oauth_connector_input_specification=oauth_connector_input_specification,
40
46
  complete_oauth_output_specification={
41
47
  "type": "object",
42
48
  "additionalProperties": False,
43
- "properties": {"refresh_token": {"type": "string", "path_in_connector_config": ["credentials", "refresh_token"]}},
49
+ "properties": {
50
+ "refresh_token": {
51
+ "type": "string",
52
+ "path_in_connector_config": ["credentials", "refresh_token"],
53
+ "path_in_oauth_response": ["refresh_token"],
54
+ }
55
+ },
44
56
  },
45
57
  complete_oauth_server_input_specification={
46
58
  "type": "object",
@@ -63,11 +63,8 @@ PUBLIC_PERMISSION_IDS = [
63
63
  "domainWithLink",
64
64
  ]
65
65
 
66
- PERMISSIONS_API_SCOPES = [
67
- "https://www.googleapis.com/auth/drive",
68
- "https://www.googleapis.com/auth/drive.readonly",
69
- "https://www.googleapis.com/auth/drive.metadata.readonly",
70
- "https://www.googleapis.com/auth/drive.file",
66
+
67
+ DRIVE_SERVICE_SCOPES = [
71
68
  "https://www.googleapis.com/auth/admin.directory.group.readonly",
72
69
  "https://www.googleapis.com/auth/admin.directory.group.member.readonly",
73
70
  "https://www.googleapis.com/auth/admin.directory.user.readonly",
@@ -91,6 +88,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
91
88
  def __init__(self):
92
89
  super().__init__()
93
90
  self._drive_service = None
91
+ self._directory_service = None
94
92
 
95
93
  @property
96
94
  def config(self) -> SourceGoogleDriveSpec:
@@ -110,32 +108,41 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
110
108
  assert isinstance(value, SourceGoogleDriveSpec)
111
109
  self._config = value
112
110
 
113
- @property
114
- def google_drive_service(self):
111
+ def _build_google_service(self, service_name: str, version: str, scopes: List[str] = None):
115
112
  if self.config is None:
116
113
  # We shouldn't hit this; config should always get set before attempting to
117
114
  # list or read files.
118
- raise ValueError("Source config is missing; cannot create the Google Drive client.")
115
+ raise ValueError(f"Source config is missing; cannot create the Google {service_name} client.")
119
116
  try:
120
- if self._drive_service is None:
121
- if self.config.credentials.auth_type == "Client":
122
- creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
123
- else:
124
- scopes = PERMISSIONS_API_SCOPES if self.include_identities_stream() else None
125
- creds = service_account.Credentials.from_service_account_info(
126
- json.loads(self.config.credentials.service_account_info), scopes=scopes
127
- )
128
- self._drive_service = build("drive", "v3", credentials=creds)
117
+ if self.config.credentials.auth_type == "Client":
118
+ creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
119
+ else:
120
+ creds = service_account.Credentials.from_service_account_info(
121
+ json.loads(self.config.credentials.service_account_info), scopes=scopes
122
+ )
123
+ google_service = build(service_name, version, credentials=creds)
129
124
  except Exception as e:
130
125
  raise AirbyteTracedException(
131
126
  internal_message=str(e),
132
- message="Could not authenticate with Google Drive. Please check your credentials.",
127
+ message=f"Could not authenticate with Google {service_name}. Please check your credentials.",
133
128
  failure_type=FailureType.config_error,
134
129
  exception=e,
135
130
  )
136
131
 
132
+ return google_service
133
+
134
+ @property
135
+ def google_drive_service(self):
136
+ if self._drive_service is None:
137
+ self._drive_service = self._build_google_service("drive", "v3")
137
138
  return self._drive_service
138
139
 
140
+ @property
141
+ def google_directory_service(self):
142
+ if self._directory_service is None:
143
+ self._directory_service = self._build_google_service("admin", "directory_v1", DRIVE_SERVICE_SCOPES)
144
+ return self._directory_service
145
+
139
146
  def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
140
147
  """
141
148
  Get all files matching the specified glob patterns.
@@ -340,7 +347,6 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
340
347
  if identity is not None:
341
348
  remote_identities.append(identity)
342
349
 
343
- logger.info(f"File {file_name} has {len(remote_identities)} valid permissions")
344
350
  return remote_identities, is_public
345
351
  except Exception as e:
346
352
  raise ErrorFetchingMetadata(f"An error occurred while retrieving file permissions: {str(e)}")
@@ -399,16 +405,16 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
399
405
  def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
400
406
  domain = self.config.delivery_method.domain
401
407
  if not domain:
402
- raise Exception("No domain was provided")
403
- if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
404
- raise Exception("No auth found")
408
+ logger.info("No domain provided. Trying to fetch identities from the user workspace.")
409
+ api_args = {"customer": "my_customer"}
410
+ else:
411
+ api_args = {"domain": domain}
405
412
 
406
- directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
407
- users_api = directory_service.users()
408
- groups_api = directory_service.groups()
409
- members_api = directory_service.members()
413
+ users_api = self.google_directory_service.users()
414
+ groups_api = self.google_directory_service.groups()
415
+ members_api = self.google_directory_service.members()
410
416
 
411
- for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
417
+ for user in self._get_looping_google_api_list_response(users_api, "users", args=api_args, logger=logger):
412
418
  rfp = RemoteIdentity(
413
419
  id=uuid.uuid4(),
414
420
  remote_id=user["primaryEmail"],
@@ -420,7 +426,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
420
426
  )
421
427
  yield rfp.dict()
422
428
 
423
- for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
429
+ for group in self._get_looping_google_api_list_response(groups_api, "groups", args=api_args, logger=logger):
424
430
  rfp = RemoteIdentity(
425
431
  id=uuid.uuid4(),
426
432
  remote_id=group["email"],