airbyte-source-google-drive 0.3.3__py3-none-any.whl → 0.4.0.dev202504091818__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.3.3
3
+ Version: 0.4.0.dev202504091818
4
4
  Summary: Source implementation for Google Drive.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (>=6.38.5,<7.0.0)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.45.0.dev04107)
14
14
  Requires-Dist: google-api-python-client (==2.104.0)
15
15
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
16
  Requires-Dist: google-auth-httplib2 (==0.1.1)
@@ -6,9 +6,9 @@ source_google_drive/schemas/identities.json,sha256=JXBR_v0wpfDKiWVzLoc8bPs33x5CG
6
6
  source_google_drive/source.py,sha256=Hmz60uMCQZyA9AhCN0bOUSW2iL3GW6VTwI5vrK0D1RY,4140
7
7
  source_google_drive/spec.py,sha256=-WkA2zGuQtf3G7uK8uq9BnimUlQh0s3vsqROmIHOgzI,4718
8
8
  source_google_drive/stream_permissions_reader.py,sha256=88bSJBC5n2svioTDwgVqf0x5yXRxityk_i0xokWEJcQ,9026
9
- source_google_drive/stream_reader.py,sha256=d1c-u3iGI5i7RQ3rTCoDV7JXHwyuKIMp0d1GMQgOsfA,14246
9
+ source_google_drive/stream_reader.py,sha256=r7eSUJ1CTgaZsMg3TljUDjSNidTyAyfRGfv2vxhLdlQ,14909
10
10
  source_google_drive/utils.py,sha256=ewR-kBKLmtD-s7zqCfGECfzWYF43tpQdscAQIlUEkR8,1022
11
- airbyte_source_google_drive-0.3.3.dist-info/METADATA,sha256=qLevpcn6boj2tUWKCHvRqYyFMSlvOiWBxztIF7xDONU,5518
12
- airbyte_source_google_drive-0.3.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
- airbyte_source_google_drive-0.3.3.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
14
- airbyte_source_google_drive-0.3.3.dist-info/RECORD,,
11
+ airbyte_source_google_drive-0.4.0.dev202504091818.dist-info/METADATA,sha256=3kVi44z3GI6xpjSQPUhi44MAUSk6FoldVXQlYvhoK5w,5536
12
+ airbyte_source_google_drive-0.4.0.dev202504091818.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
+ airbyte_source_google_drive-0.4.0.dev202504091818.dist-info/entry_points.txt,sha256=YgpJf0nA5Mn0B7YC9VOFI847vz1jI6U4q7BeLUOXa54,67
14
+ airbyte_source_google_drive-0.4.0.dev202504091818.dist-info/RECORD,,
@@ -6,27 +6,26 @@
6
6
  import io
7
7
  import json
8
8
  import logging
9
- import uuid
9
+ import os
10
10
  from datetime import datetime
11
11
  from io import IOBase
12
12
  from os.path import getsize
13
- from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple
13
+ from typing import Dict, Iterable, List, Optional, Set, Tuple
14
14
 
15
- import pytz
16
15
  from google.oauth2 import credentials, service_account
17
16
  from googleapiclient.discovery import build
18
17
  from googleapiclient.http import MediaIoBaseDownload
19
18
 
20
19
  from airbyte_cdk import AirbyteTracedException, FailureType
20
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
21
21
  from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
22
22
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
23
+ from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
23
24
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
24
- from airbyte_cdk.sources.streams.core import package_name_from_class
25
- from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
26
25
  from source_google_drive.utils import get_folder_id
27
26
 
28
27
  from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
29
- from .spec import RemoteIdentity, RemoteIdentityType, RemotePermissions, SourceGoogleDriveSpec
28
+ from .spec import SourceGoogleDriveSpec
30
29
 
31
30
 
32
31
  FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -245,7 +244,9 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
245
244
  except Exception as e:
246
245
  raise ErrorFetchingMetadata(f"An error occurred while retrieving file size: {str(e)}")
247
246
 
248
- def get_file(self, file: GoogleDriveRemoteFile, local_directory: str, logger: logging.Logger) -> Dict[str, str | int]:
247
+ def upload(
248
+ self, file: GoogleDriveRemoteFile, local_directory: str, logger: logging.Logger
249
+ ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
249
250
  """
250
251
  Downloads a file from Google Drive to a specified local directory.
251
252
 
@@ -265,15 +266,18 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
265
266
  raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
266
267
 
267
268
  try:
268
- file_relative_path, local_file_path, absolute_file_path = self._get_file_transfer_paths(file, local_directory)
269
+ file_paths = self._get_file_transfer_paths(file, local_directory)
270
+ local_file_path = file_paths[self.LOCAL_FILE_PATH]
271
+ file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
272
+ file_name = file_paths[self.FILE_NAME]
269
273
 
270
274
  if self._is_exportable_document(file.original_mime_type):
271
275
  request = self.google_drive_service.files().export_media(fileId=file.id, mimeType=file.mime_type)
272
276
 
273
277
  file_extension = DOWNLOADABLE_DOCUMENTS_MIME_TYPES[file.original_mime_type][DOCUMENT_FILE_EXTENSION_KEY]
274
278
  local_file_path += file_extension
275
- absolute_file_path += file_extension
276
279
  file_relative_path += file_extension
280
+ file_name += file_extension
277
281
  else:
278
282
  request = self.google_drive_service.files().get_media(fileId=file.id)
279
283
 
@@ -285,10 +289,24 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
285
289
  progress = status.resumable_progress / status.total_size * 100 if status.total_size else 0
286
290
  logger.info(f"Processing file {file.uri}, progress: {progress:.2f}%")
287
291
 
292
+ logger.info(f"Finished uploading file {file.uri} to {local_file_path}")
288
293
  # native google objects seems to be reporting lower size through the api than the final download size
289
294
  file_size = getsize(local_file_path)
290
295
 
291
- return {"file_url": absolute_file_path, "bytes": file_size, "file_relative_path": file_relative_path}
296
+ file_record_data = FileRecordData(
297
+ folder=file_paths[self.FILE_FOLDER],
298
+ filename=file_name,
299
+ bytes=file_size,
300
+ id=file.id,
301
+ mime_type=file.mime_type,
302
+ updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
303
+ )
304
+ file_reference = AirbyteRecordMessageFileReference(
305
+ staging_file_url=local_file_path,
306
+ source_file_relative_path=file_relative_path,
307
+ file_size_bytes=file_size,
308
+ )
309
+ return file_record_data, file_reference
292
310
 
293
311
  except Exception as e:
294
312
  raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")