airbyte-source-google-drive 0.2.0.dev202501241824__py3-none-any.whl → 0.2.0.dev202501270320__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.2.0.dev202501241824
3
+ Version: 0.2.0.dev202501270320
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04101)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.26.0.dev04105)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -0,0 +1,12 @@
1
+ source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
2
+ source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
+ source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
+ source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
5
+ source_google_drive/spec.py,sha256=Xt20_aNmOxdL0XwFsGuAEGHCXCygDPPIZ3QKRwnW47k,4242
6
+ source_google_drive/stream_reader.py,sha256=6dQOzlsMB2XoMIAdJH97Rb_dKVyDQaWeepXAN0GxxzA,20575
7
+ source_google_drive/temp_mock.py,sha256=NdvsAltcZBIqnCBOQuU9R3_yEV5v-bHBR9T_BdjPtU4,1109
8
+ source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
9
+ airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/METADATA,sha256=fao8_WncWsO455kN99bXlp33S5bKj-iw8VcVZSnbW1Y,5536
10
+ airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
11
+ airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
12
+ airbyte_source_google_drive-0.2.0.dev202501270320.dist-info/RECORD,,
@@ -2,9 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- import uuid
6
- from datetime import datetime
7
- from typing import Any, Dict, Literal, Union
5
+ from typing import Any, Dict, Literal, Optional, Union
8
6
 
9
7
  import dpath.util
10
8
  from pydantic.v1 import BaseModel, Field
@@ -19,34 +17,15 @@ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
19
17
  )
20
18
 
21
19
 
22
- class RemoteFileIdentity(BaseModel):
23
- id: uuid.UUID
24
- remote_id: str
25
- parent_id: str | None = None
26
- name: str | None = None
27
- description: str | None = None
28
- email_address: str | None = None
29
- member_email_addresses: list[str] | None = None
30
- type: str
31
- modified_at: datetime
32
-
33
-
34
- class RemoteFileMetadata(BaseModel):
35
- id: str
36
- file_path: str
37
- allowed_identity_remote_ids: list[str] | None = None
38
- denied_identity_remote_ids: list[str] | None = None
39
- publicly_accessible: bool = False
40
-
41
-
42
20
  class DeliverRecords(DeliverRecordsBase):
43
21
  # Overriding to make visible with airbyte_hidden=False
44
- sync_metadata: bool = Field(
45
- title="Make stream sync files metadata",
46
- description="If enabled, streams will sync files metadata instead of files data.",
22
+ sync_acl_permissions: bool = Field(
23
+ title="Include ACL Permissions",
24
+ description="Joins Document allowlists to each stream.",
47
25
  default=False,
48
26
  airbyte_hidden=False,
49
27
  )
28
+ domain: Optional[str] = Field(title="Domain", description="The domain of the identities.", airbyte_hidden=False)
50
29
 
51
30
 
52
31
  class OAuthCredentials(BaseModel):
@@ -10,20 +10,25 @@ import uuid
10
10
  from datetime import datetime
11
11
  from io import IOBase
12
12
  from os.path import getsize
13
- from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
13
+ from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple
14
14
 
15
+ import pytz
15
16
  from google.oauth2 import credentials, service_account
16
17
  from googleapiclient.discovery import build
17
18
  from googleapiclient.http import MediaIoBaseDownload
18
19
 
19
20
  from airbyte_cdk import AirbyteTracedException, FailureType
21
+ from airbyte_cdk.sources.file_based.config.permissions import RemoteFileIdentity, RemoteFileIdentityType, RemoteFilePermissions
20
22
  from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
21
23
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
22
24
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
23
25
  from source_google_drive.utils import get_folder_id
24
26
 
25
27
  from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
26
- from .spec import RemoteFileIdentity, RemoteFileMetadata, SourceGoogleDriveSpec
28
+ from .spec import SourceGoogleDriveSpec
29
+
30
+ # remove this, just using while test credentials are prepared
31
+ from .temp_mock import _get_looping_google_api_list_response
27
32
 
28
33
 
29
34
  FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -73,6 +78,10 @@ PERMISSIONS_API_SCOPES = [
73
78
  ]
74
79
 
75
80
 
81
+ def datetime_now() -> datetime:
82
+ return datetime.now(pytz.UTC)
83
+
84
+
76
85
  class GoogleDriveRemoteFile(RemoteFile):
77
86
  id: str
78
87
  # The mime type of the file as returned by the Google Drive API
@@ -116,7 +125,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
116
125
  if self.config.credentials.auth_type == "Client":
117
126
  creds = credentials.Credentials.from_authorized_user_info(self.config.credentials.dict())
118
127
  else:
119
- scopes = PERMISSIONS_API_SCOPES if self.sync_metadata() else None
128
+ scopes = PERMISSIONS_API_SCOPES if self.sync_acl_permissions() else None
120
129
  creds = service_account.Credentials.from_service_account_info(
121
130
  json.loads(self.config.credentials.service_account_info), scopes=scopes
122
131
  )
@@ -131,6 +140,76 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
131
140
 
132
141
  return self._drive_service
133
142
 
143
+ def _get_looping_google_api_list_response(
144
+ self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
145
+ ) -> Iterator[dict[str, Any]]:
146
+ try:
147
+ looping = True
148
+ next_page_token: str | None = None
149
+ while looping:
150
+ rsp = service.list(pageToken=next_page_token, **args).execute()
151
+ next_page_token = rsp.get("nextPageToken")
152
+ items: list[dict[str, Any]] = rsp.get(key)
153
+
154
+ if items is None or len(items) == 0:
155
+ looping = False
156
+ break
157
+
158
+ if rsp.get("nextPageToken") is None:
159
+ looping = False
160
+ else:
161
+ next_page_token = rsp.get("nextPageToken")
162
+
163
+ for item in items:
164
+ yield item
165
+ except Exception as e:
166
+ logger.info(f"There was an error listing {key} with {args}: {str(e)}")
167
+ logger.info(f"Backing off to mocked data for development purposes")
168
+ yield from _get_looping_google_api_list_response(service, key, args)
169
+
170
+ def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
171
+ domain = self.config.delivery_method.domain
172
+ if not domain:
173
+ raise Exception("No domain was provided")
174
+ if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
175
+ raise Exception("No auth found")
176
+
177
+ directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
178
+ users_api = directory_service.users()
179
+ groups_api = directory_service.groups()
180
+ members_api = directory_service.members()
181
+
182
+ # here is failing
183
+ for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
184
+ rfp = RemoteFileIdentity(
185
+ id=uuid.uuid4(),
186
+ remote_id=user["primaryEmail"],
187
+ name=user["name"]["fullName"] if user["name"] is not None else None,
188
+ email_address=user["primaryEmail"],
189
+ member_email_addresses=[x["address"] for x in user["emails"]],
190
+ type=RemoteFileIdentityType.USER,
191
+ modified_at=datetime_now(),
192
+ )
193
+ yield rfp.dict()
194
+
195
+ for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
196
+ rfp = RemoteFileIdentity(
197
+ id=uuid.uuid4(),
198
+ remote_id=group["email"],
199
+ name=group["name"],
200
+ email_address=group["email"],
201
+ type=RemoteFileIdentityType.GROUP,
202
+ modified_at=datetime_now(),
203
+ )
204
+
205
+ for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
206
+ rfp.member_email_addresses = rfp.member_email_addresses or []
207
+ rfp.member_email_addresses.append(member["email"])
208
+
209
+ yield rfp.dict()
210
+
211
+ return ""
212
+
134
213
  def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
135
214
  """
136
215
  Get all files matching the specified glob patterns.
@@ -356,9 +435,9 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
356
435
  description=None,
357
436
  )
358
437
 
359
- def get_file_metadata(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
438
+ def get_file_acl_permissions(self, file: GoogleDriveRemoteFile, logger: logging.Logger) -> Dict[str, Any]:
360
439
  remote_identities, is_public = self.get_file_permissions(file.id, file_name=file.uri, logger=logger)
361
- return RemoteFileMetadata(
440
+ return RemoteFilePermissions(
362
441
  id=file.id,
363
442
  file_path=file.uri,
364
443
  allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from typing import Any, Iterator
4
+
5
+
6
+ def _get_looping_google_api_list_response(service: Any, key: str, args: dict[str, Any]) -> Iterator[dict[str, Any]]:
7
+ domain = args.get("domain") or args.get("groupKey")
8
+
9
+ if key == "users":
10
+ # Mocking 5 users
11
+ for i in range(1, 6):
12
+ yield {
13
+ "primaryEmail": f"user{i}@{domain}",
14
+ "name": {"fullName": f"User {i}"},
15
+ "emails": [{"address": f"user{i}@{domain}"}],
16
+ }
17
+
18
+ elif key == "groups":
19
+ # Mocking 5 groups
20
+ for i in range(1, 6):
21
+ yield {
22
+ "email": f"group{i}@{domain}",
23
+ "name": f"Group {i}",
24
+ "id": f"group-id-{i}",
25
+ }
26
+
27
+ elif key == "members":
28
+ # Mocking 5 members per group
29
+ group_id = args.get("groupKey")
30
+ for i in range(1, 6):
31
+ yield {
32
+ "email": f"member{i}@{domain}",
33
+ "role": "MEMBER",
34
+ "type": "USER",
35
+ "groupId": group_id,
36
+ }
@@ -1,11 +0,0 @@
1
- source_google_drive/__init__.py,sha256=SAgPIoGpjdHTY7mvf__A99FiElBh3WvdKSHLstg-BUE,134
2
- source_google_drive/exceptions.py,sha256=6sBxbdZhj0CARkctfBaNgGHB_GGcFUkY4cnMUWyOruM,268
3
- source_google_drive/run.py,sha256=5AQd906FbSGaNZj0udrr-VKmiSS4nsor7F7noMjsdi0,677
4
- source_google_drive/source.py,sha256=OyKTjPg80KR8yZiYiC3YSi6E6egothtAn3YAHAS6Q6A,2839
5
- source_google_drive/spec.py,sha256=krZ1qRjUP7vV4JLGMgggMkP4kizGG4uj8mX0S0CUwU8,4706
6
- source_google_drive/stream_reader.py,sha256=mMcxz8C0J3ePBOCu3HWfaBFzam8kkrp45NxAKEHoiI0,17175
7
- source_google_drive/utils.py,sha256=1Fe3J4KXI1iIf4rklgLoR4p1xkCzLQI7zj5dkyi4Vt0,942
8
- airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/METADATA,sha256=MAUR34WJ1R-JdcBVIWm72hif8hAtCJ8uzORoZSQOSH8,5536
9
- airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
10
- airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
11
- airbyte_source_google_drive-0.2.0.dev202501241824.dist-info/RECORD,,