airbyte-source-google-drive 0.2.0.dev202501291717__tar.gz → 0.2.0.dev202502031727__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.2.0.dev202501291717
3
+ Version: 0.2.0.dev202502031727
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "0.2.0.dev202501291717"
8
+ version = "0.2.0.dev202502031727"
9
9
  name = "airbyte-source-google-drive"
10
10
  description = "Source implementation for Google Drive."
11
11
  authors = [
@@ -137,73 +137,6 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
137
137
 
138
138
  return self._drive_service
139
139
 
140
- def _get_looping_google_api_list_response(
141
- self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
142
- ) -> Iterator[dict[str, Any]]:
143
- try:
144
- looping = True
145
- next_page_token: str | None = None
146
- while looping:
147
- rsp = service.list(pageToken=next_page_token, **args).execute()
148
- next_page_token = rsp.get("nextPageToken")
149
- items: list[dict[str, Any]] = rsp.get(key)
150
-
151
- if items is None or len(items) == 0:
152
- looping = False
153
- break
154
-
155
- if rsp.get("nextPageToken") is None:
156
- looping = False
157
- else:
158
- next_page_token = rsp.get("nextPageToken")
159
-
160
- for item in items:
161
- yield item
162
- except Exception as e:
163
- logger.error(f"There was an error listing {key} with {args}: {str(e)}")
164
- raise e
165
-
166
- def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
167
- domain = self.config.delivery_method.domain
168
- if not domain:
169
- raise Exception("No domain was provided")
170
- if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
171
- raise Exception("No auth found")
172
-
173
- directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
174
- users_api = directory_service.users()
175
- groups_api = directory_service.groups()
176
- members_api = directory_service.members()
177
-
178
- # here is failing
179
- for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
180
- rfp = RemoteFileIdentity(
181
- id=uuid.uuid4(),
182
- remote_id=user["primaryEmail"],
183
- name=user["name"]["fullName"] if user["name"] is not None else None,
184
- email_address=user["primaryEmail"],
185
- member_email_addresses=[x["address"] for x in user["emails"]],
186
- type=RemoteFileIdentityType.USER,
187
- modified_at=datetime_now(),
188
- )
189
- yield rfp.dict()
190
-
191
- for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
192
- rfp = RemoteFileIdentity(
193
- id=uuid.uuid4(),
194
- remote_id=group["email"],
195
- name=group["name"],
196
- email_address=group["email"],
197
- type=RemoteFileIdentityType.GROUP,
198
- modified_at=datetime_now(),
199
- )
200
-
201
- for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
202
- rfp.member_email_addresses = rfp.member_email_addresses or []
203
- rfp.member_email_addresses.append(member["email"])
204
-
205
- yield rfp.dict()
206
-
207
140
  def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]:
208
141
  """
209
142
  Get all files matching the specified glob patterns.
@@ -437,3 +370,69 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
437
370
  allowed_identity_remote_ids=[p.remote_id for p in remote_identities],
438
371
  publicly_accessible=is_public,
439
372
  ).dict(exclude_none=True)
373
+
374
+ def _get_looping_google_api_list_response(
375
+ self, service: Any, key: str, args: dict[str, Any], logger: logging.Logger
376
+ ) -> Iterator[dict[str, Any]]:
377
+ try:
378
+ looping = True
379
+ next_page_token: str | None = None
380
+ while looping:
381
+ rsp = service.list(pageToken=next_page_token, **args).execute()
382
+ next_page_token = rsp.get("nextPageToken")
383
+ items: list[dict[str, Any]] = rsp.get(key)
384
+
385
+ if items is None or len(items) == 0:
386
+ looping = False
387
+ break
388
+
389
+ if rsp.get("nextPageToken") is None:
390
+ looping = False
391
+ else:
392
+ next_page_token = rsp.get("nextPageToken")
393
+
394
+ for item in items:
395
+ yield item
396
+ except Exception as e:
397
+ logger.error(f"There was an error listing {key} with {args}: {str(e)}")
398
+ raise e
399
+
400
+ def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
401
+ domain = self.config.delivery_method.domain
402
+ if not domain:
403
+ raise Exception("No domain was provided")
404
+ if self.google_drive_service is None or self.google_drive_service._http.credentials is None:
405
+ raise Exception("No auth found")
406
+
407
+ directory_service = build("admin", "directory_v1", credentials=self.google_drive_service._http.credentials)
408
+ users_api = directory_service.users()
409
+ groups_api = directory_service.groups()
410
+ members_api = directory_service.members()
411
+
412
+ for user in self._get_looping_google_api_list_response(users_api, "users", {"domain": domain}, logger):
413
+ rfp = RemoteFileIdentity(
414
+ id=uuid.uuid4(),
415
+ remote_id=user["primaryEmail"],
416
+ name=user["name"]["fullName"] if user["name"] is not None else None,
417
+ email_address=user["primaryEmail"],
418
+ member_email_addresses=[x["address"] for x in user["emails"]],
419
+ type=RemoteFileIdentityType.USER,
420
+ modified_at=datetime_now(),
421
+ )
422
+ yield rfp.dict()
423
+
424
+ for group in self._get_looping_google_api_list_response(groups_api, "groups", {"domain": domain}, logger):
425
+ rfp = RemoteFileIdentity(
426
+ id=uuid.uuid4(),
427
+ remote_id=group["email"],
428
+ name=group["name"],
429
+ email_address=group["email"],
430
+ type=RemoteFileIdentityType.GROUP,
431
+ modified_at=datetime_now(),
432
+ )
433
+
434
+ for member in self._get_looping_google_api_list_response(members_api, "members", {"groupKey": group["id"]}, logger):
435
+ rfp.member_email_addresses = rfp.member_email_addresses or []
436
+ rfp.member_email_addresses.append(member["email"])
437
+
438
+ yield rfp.dict()
@@ -15,6 +15,8 @@ def get_folder_id(url_string: str) -> str:
15
15
  if parsed_url.scheme != "https" or parsed_url.netloc != "drive.google.com":
16
16
  raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
17
17
  path_segments = list(filter(None, parsed_url.path.split("/")))
18
+ if path_segments[-1] in ["my-drive", "home"]:
19
+ return "root"
18
20
  if path_segments[-2] != "folders" or len(path_segments) < 3:
19
21
  raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/<folder_id>")
20
22
  return path_segments[-1]