airbyte-source-google-drive 0.3.3__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-google-drive might be problematic. Click here for more details.
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/PKG-INFO +2 -2
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/pyproject.toml +2 -2
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/stream_reader.py +46 -11
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/README.md +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/__init__.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/exceptions.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/run.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/schemas/file_permissions.json +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/schemas/identities.json +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/source.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/spec.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/stream_permissions_reader.py +0 -0
- {airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (>=6.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (>=6.45.10,<7.0.0)
|
|
14
14
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
15
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
16
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "0.
|
|
8
|
+
version = "0.4.0"
|
|
9
9
|
name = "airbyte-source-google-drive"
|
|
10
10
|
description = "Source implementation for Google Drive."
|
|
11
11
|
authors = [
|
|
@@ -31,7 +31,7 @@ google-api-python-client-stubs = "==1.18.0"
|
|
|
31
31
|
extras = [
|
|
32
32
|
"file-based",
|
|
33
33
|
]
|
|
34
|
-
version = "^6.
|
|
34
|
+
version = "^6.45.10"
|
|
35
35
|
|
|
36
36
|
[tool.poetry.scripts]
|
|
37
37
|
source-google-drive = "source_google_drive.run:run"
|
|
@@ -6,27 +6,26 @@
|
|
|
6
6
|
import io
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
|
-
import
|
|
9
|
+
import os
|
|
10
10
|
from datetime import datetime
|
|
11
11
|
from io import IOBase
|
|
12
12
|
from os.path import getsize
|
|
13
|
-
from typing import
|
|
13
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|
14
14
|
|
|
15
|
-
import pytz
|
|
16
15
|
from google.oauth2 import credentials, service_account
|
|
17
16
|
from googleapiclient.discovery import build
|
|
18
17
|
from googleapiclient.http import MediaIoBaseDownload
|
|
19
18
|
|
|
20
19
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
20
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
|
21
21
|
from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
|
|
22
22
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
23
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
|
23
24
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
24
|
-
from airbyte_cdk.sources.streams.core import package_name_from_class
|
|
25
|
-
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
|
|
26
25
|
from source_google_drive.utils import get_folder_id
|
|
27
26
|
|
|
28
27
|
from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata
|
|
29
|
-
from .spec import
|
|
28
|
+
from .spec import SourceGoogleDriveSpec
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
|
|
@@ -64,6 +63,16 @@ class GoogleDriveRemoteFile(RemoteFile):
|
|
|
64
63
|
# The mime type of the file as returned by the Google Drive API
|
|
65
64
|
# This is not the same as the mime type when opened by the parser (e.g. google docs is exported as docx)
|
|
66
65
|
original_mime_type: str
|
|
66
|
+
view_link: str
|
|
67
|
+
# Only populated for items in shared drives.
|
|
68
|
+
drive_id: Optional[str] = None
|
|
69
|
+
created_at: datetime
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def url(self) -> str:
|
|
73
|
+
if self.drive_id:
|
|
74
|
+
return f"https://drive.google.com/open?id={self.id}&driveId={self.drive_id}"
|
|
75
|
+
return self.view_link
|
|
67
76
|
|
|
68
77
|
|
|
69
78
|
class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
@@ -135,10 +144,11 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
135
144
|
(path, folder_id) = folder_id_queue.pop()
|
|
136
145
|
# fetch all files in this folder (1000 is the max page size)
|
|
137
146
|
# supportsAllDrives and includeItemsFromAllDrives are required to access files in shared drives
|
|
147
|
+
# ref https://developers.google.com/workspace/drive/api/reference/rest/v3/files#File
|
|
138
148
|
request = service.files().list(
|
|
139
149
|
q=f"'{folder_id}' in parents",
|
|
140
150
|
pageSize=1000,
|
|
141
|
-
fields="nextPageToken, files(id, name, modifiedTime, mimeType)",
|
|
151
|
+
fields="nextPageToken, files(id, name, modifiedTime, mimeType, webViewLink, driveId, createdTime)",
|
|
142
152
|
supportsAllDrives=True,
|
|
143
153
|
includeItemsFromAllDrives=True,
|
|
144
154
|
)
|
|
@@ -161,6 +171,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
161
171
|
continue
|
|
162
172
|
else:
|
|
163
173
|
last_modified = datetime.strptime(new_file["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
174
|
+
created_at = datetime.strptime(new_file["createdTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
164
175
|
original_mime_type = new_file["mimeType"]
|
|
165
176
|
mime_type = (
|
|
166
177
|
self._get_export_mime_type(original_mime_type)
|
|
@@ -170,9 +181,12 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
170
181
|
remote_file = GoogleDriveRemoteFile(
|
|
171
182
|
uri=file_name,
|
|
172
183
|
last_modified=last_modified,
|
|
184
|
+
created_at=created_at,
|
|
173
185
|
id=new_file["id"],
|
|
174
186
|
original_mime_type=original_mime_type,
|
|
175
187
|
mime_type=mime_type,
|
|
188
|
+
drive_id=new_file.get("driveId"),
|
|
189
|
+
view_link=new_file.get("webViewLink"),
|
|
176
190
|
)
|
|
177
191
|
if self.file_matches_globs(remote_file, globs):
|
|
178
192
|
yield remote_file
|
|
@@ -245,7 +259,9 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
245
259
|
except Exception as e:
|
|
246
260
|
raise ErrorFetchingMetadata(f"An error occurred while retrieving file size: {str(e)}")
|
|
247
261
|
|
|
248
|
-
def
|
|
262
|
+
def upload(
|
|
263
|
+
self, file: GoogleDriveRemoteFile, local_directory: str, logger: logging.Logger
|
|
264
|
+
) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
|
|
249
265
|
"""
|
|
250
266
|
Downloads a file from Google Drive to a specified local directory.
|
|
251
267
|
|
|
@@ -265,15 +281,18 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
265
281
|
raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
|
|
266
282
|
|
|
267
283
|
try:
|
|
268
|
-
|
|
284
|
+
file_paths = self._get_file_transfer_paths(source_file_relative_path=file.uri, staging_directory=local_directory)
|
|
285
|
+
local_file_path = file_paths[self.LOCAL_FILE_PATH]
|
|
286
|
+
file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
|
|
287
|
+
file_name = file_paths[self.FILE_NAME]
|
|
269
288
|
|
|
270
289
|
if self._is_exportable_document(file.original_mime_type):
|
|
271
290
|
request = self.google_drive_service.files().export_media(fileId=file.id, mimeType=file.mime_type)
|
|
272
291
|
|
|
273
292
|
file_extension = DOWNLOADABLE_DOCUMENTS_MIME_TYPES[file.original_mime_type][DOCUMENT_FILE_EXTENSION_KEY]
|
|
274
293
|
local_file_path += file_extension
|
|
275
|
-
absolute_file_path += file_extension
|
|
276
294
|
file_relative_path += file_extension
|
|
295
|
+
file_name += file_extension
|
|
277
296
|
else:
|
|
278
297
|
request = self.google_drive_service.files().get_media(fileId=file.id)
|
|
279
298
|
|
|
@@ -285,10 +304,26 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
285
304
|
progress = status.resumable_progress / status.total_size * 100 if status.total_size else 0
|
|
286
305
|
logger.info(f"Processing file {file.uri}, progress: {progress:.2f}%")
|
|
287
306
|
|
|
307
|
+
logger.info(f"Finished uploading file {file.uri} to {local_file_path}")
|
|
288
308
|
# native google objects seems to be reporting lower size through the api than the final download size
|
|
289
309
|
file_size = getsize(local_file_path)
|
|
290
310
|
|
|
291
|
-
|
|
311
|
+
file_record_data = FileRecordData(
|
|
312
|
+
folder=file_paths[self.FILE_FOLDER],
|
|
313
|
+
file_name=file_name,
|
|
314
|
+
bytes=file_size,
|
|
315
|
+
id=file.id,
|
|
316
|
+
mime_type=file.mime_type,
|
|
317
|
+
created_at=file.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
318
|
+
updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
319
|
+
source_uri=file.url,
|
|
320
|
+
)
|
|
321
|
+
file_reference = AirbyteRecordMessageFileReference(
|
|
322
|
+
staging_file_url=local_file_path,
|
|
323
|
+
source_file_relative_path=file_relative_path,
|
|
324
|
+
file_size_bytes=file_size,
|
|
325
|
+
)
|
|
326
|
+
return file_record_data, file_reference
|
|
292
327
|
|
|
293
328
|
except Exception as e:
|
|
294
329
|
raise ErrorDownloadingFile(f"There was an error while trying to download the file {file.uri}: {str(e)}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/run.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/spec.py
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_google_drive-0.3.3 → airbyte_source_google_drive-0.4.0}/source_google_drive/utils.py
RENAMED
|
File without changes
|