airbyte-source-google-drive 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.3.4
3
+ Version: 0.4.0
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (>=6.38.5,<7.0.0)
13
+ Requires-Dist: airbyte-cdk[file-based] (>=6.45.10,<7.0.0)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -6,9 +6,9 @@ source_google_drive/schemas/identities.json,sha256=JXBR_v0wpfDKiWVzLoc8bPs33x5CG
6
6
  source_google_drive/source.py,sha256=Hmz60uMCQZyA9AhCN0bOUSW2iL3GW6VTwI5vrK0D1RY,4140
7
7
  source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
8
8
  source_google_drive/stream_permissions_reader.py,sha256=88bSJBC5n2svioTDwgVqf0x5yXRxityk_i0xokWEJcQ,9026
9
- source_google_drive/stream_reader.py,sha256=d1c-u3iGI5i7RQ3rTCoDV7JXHwyuKIMp0d1GMQgOsfA,14246
9
+ source_google_drive/stream_reader.py,sha256=S9IDtw0Rrz84l8ZAf92Ge41h0nkPUrTRWD32Xe_E4Bo,15804
10
10
  source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
11
- airbyte_source_google_drive-0.3.4.dist-info/METADATA,sha256=e4fdeOvYr6yHObS4pHKMO9BzGjJLAltxV0uVJgyN2u0,5518
12
- airbyte_source_google_drive-0.3.4.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
- airbyte_source_google_drive-0.3.4.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
14
- airbyte_source_google_drive-0.3.4.dist-info/RECORD,,
11
+ airbyte_source_google_drive-0.4.0.dist-info/METADATA,sha256=SNH1GW0LLL5r3zu91BC1ae-rfTUtlurT8Ire5ZFaOos,5519
12
+ airbyte_source_google_drive-0.4.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
+ airbyte_source_google_drive-0.4.0.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
14
+ airbyte_source_google_drive-0.4.0.dist-info/RECORD,,
@@ -6,27 +6,26 @@
6
6
  import io
7
7
  import json
8
8
  import logging
9
- import uuid
9
+ import os
10
10
  from datetime import datetime
11
11
  from io import IOBase
12
12
  from os.path import getsize
13
- from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple
13
+ from typing import Dict, Iterable, List, Optional, Set, Tuple
14
14
 
15
- import pytz
16
15
  from google.oauth2 import credentials, service_account
17
16
  from googleapiclient.discovery import build
18
17
  from googleapiclient.http import MediaIoBaseDownload
19
18
 
20
19
  from airbyte_cdk import AirbyteTracedException, FailureType
20
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
21
21
  from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
22
22
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
23
+ from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
23
24
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
24
- from airbyte_cdk.sources.streams.core import package_name_from_class
25
- from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
26
25
  from source_google_drive.utils import get_folder_id
27
26
 
28
27
  from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
29
- from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
28
+ from .spec import SourceGoogleDriveSpec
30
29
 
31
30
 
32
31
  FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -64,6 +63,16 @@ class GoogleDriveRemoteFile(RemoteFile):
64
63
  # The mime type of the file as returned by the Google Drive API
65
64
  # This is not the same as the mime type when opened by the parser (e.g. google docs is exported as docx)
66
65
  original_mime_type: str
66
+ view_link: str
67
+ # Only populated for items in shared drives.
68
+ drive_id: Optional[str] = None
69
+ created_at: datetime
70
+
71
+ @property
72
+ def url(self) -> str:
73
+ if self.drive_id:
74
+ return f"https://drive.google.com/open?id={self.id}&driveId={self.drive_id}"
75
+ return self.view_link
67
76
 
68
77
 
69
78
  class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
@@ -135,10 +144,11 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
135
144
  (path, folder_id) = folder_id_queue.pop()
136
145
  # fetch all files in this folder (1000 is the max page size)
137
146
  # supportsAllDrives and includeItemsFromAllDrives are required to access files in shared drives
147
+ # ref https://developers.google.com/workspace/drive/api/reference/rest/v3/files#File
138
148
  request = service.files().list(
139
149
  q=f"'{folder_id}' in parents",
140
150
  pageSize=1000,
141
- fields="nextPageToken, files(id, name, modifiedTime, mimeType)",
151
+ fields="nextPageToken, files(id, name, modifiedTime, mimeType, webViewLink, driveId, createdTime)",
142
152
  supportsAllDrives=True,
143
153
  includeItemsFromAllDrives=True,
144
154
  )
@@ -161,6 +171,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
161
171
  continue
162
172
  else:
163
173
  last_modified = datetime.strptime(new_file["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
174
+ created_at = datetime.strptime(new_file["createdTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
164
175
  original_mime_type = new_file["mimeType"]
165
176
  mime_type = (
166
177
  self._get_export_mime_type(original_mime_type)
@@ -170,9 +181,12 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
170
181
  remote_file = GoogleDriveRemoteFile(
171
182
  uri=file_name,
172
183
  last_modified=last_modified,
184
+ created_at=created_at,
173
185
  id=new_file["id"],
174
186
  original_mime_type=original_mime_type,
175
187
  mime_type=mime_type,
188
+ drive_id=new_file.get("driveId"),
189
+ view_link=new_file.get("webViewLink"),
176
190
  )
177
191
  if self.file_matches_globs(remote_file, globs):
178
192
  yield remote_file
@@ -245,7 +259,9 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
245
259
  except Exception as e:
246
260
  raise ErrorFetchingMetadata(f"An error occurred while retrieving file size: {str(e)}")
247
261
 
248
- def get_file(self, file: GoogleDriveRemoteFile, local_directory: str, logger: logging.Logger) -> Dict[str, str | int]:
262
+ def upload(
263
+ self, file: GoogleDriveRemoteFile, local_directory: str, logger: logging.Logger
264
+ ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
249
265
  """
250
266
  Downloads a file from Google Drive to a specified local directory.
251
267
 
@@ -265,15 +281,18 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
265
281
  raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
266
282
 
267
283
  try:
268
- file_relative_path, local_file_path, absolute_file_path = self._get_file_transfer_paths(file, local_directory)
284
+ file_paths = self._get_file_transfer_paths(source_file_relative_path=file.uri, staging_directory=local_directory)
285
+ local_file_path = file_paths[self.LOCAL_FILE_PATH]
286
+ file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
287
+ file_name = file_paths[self.FILE_NAME]
269
288
 
270
289
  if self._is_exportable_document(file.original_mime_type):
271
290
  request = self.google_drive_service.files().export_media(fileId=file.id, mimeType=file.mime_type)
272
291
 
273
292
  file_extension = DOWNLOADABLE_DOCUMENTS_MIME_TYPES[file.original_mime_type][DOCUMENT_FILE_EXTENSION_KEY]
274
293
  local_file_path += file_extension
275
- absolute_file_path += file_extension
276
294
  file_relative_path += file_extension
295
+ file_name += file_extension
277
296
  else:
278
297
  request = self.google_drive_service.files().get_media(fileId=file.id)
279
298
 
@@ -285,10 +304,26 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
285
304
  progress = status.resumable_progress / status.total_size * 100 if status.total_size else 0
286
305
  logger.info(f"Processing file {file.uri}, progress: {progress:.2f}%")
287
306
 
307
+ logger.info(f"Finished uploading file {file.uri} to {local_file_path}")
288
308
  # native google objects seems to be reporting lower size through the api than the final download size
289
309
  file_size = getsize(local_file_path)
290
310
 
291
- return {"file_url": absolute_file_path, "bytes": file_size, "file_relative_path": file_relative_path}
311
+ file_record_data = FileRecordData(
312
+ folder=file_paths[self.FILE_FOLDER],
313
+ file_name=file_name,
314
+ bytes=file_size,
315
+ id=file.id,
316
+ mime_type=file.mime_type,
317
+ created_at=file.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
318
+ updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
319
+ source_uri=file.url,
320
+ )
321
+ file_reference = AirbyteRecordMessageFileReference(
322
+ staging_file_url=local_file_path,
323
+ source_file_relative_path=file_relative_path,
324
+ file_size_bytes=file_size,
325
+ )
326
+ return file_record_data, file_reference
292
327
 
293
328
  except Exception as e:
294
329
  raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")