airbyte-source-google-drive 0.4.3.dev202504071123__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/PKG-INFO +4 -4
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/pyproject.toml +9 -18
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/stream_reader.py +22 -4
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/README.md +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/__init__.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/exceptions.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/run.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/schemas/file_permissions.json +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/schemas/identities.json +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/source.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/spec.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/stream_permissions_reader.py +0 -0
- {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/source_google_drive/utils.py +0 -0
{airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/PKG-INFO
RENAMED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: airbyte-source-google-drive
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: Source implementation for Google Drive.
|
|
5
|
+
Home-page: https://airbyte.com
|
|
5
6
|
License: ELv2
|
|
6
7
|
Author: Airbyte
|
|
7
8
|
Author-email: contact@airbyte.io
|
|
@@ -10,13 +11,12 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
11
|
Classifier: Programming Language :: Python :: 3
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (
|
|
14
|
+
Requires-Dist: airbyte-cdk[file-based] (>=6.45.10,<7.0.0)
|
|
14
15
|
Requires-Dist: google-api-python-client (==2.104.0)
|
|
15
16
|
Requires-Dist: google-api-python-client-stubs (==1.18.0)
|
|
16
17
|
Requires-Dist: google-auth-httplib2 (==0.1.1)
|
|
17
18
|
Requires-Dist: google-auth-oauthlib (==1.1.0)
|
|
18
19
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/google-drive
|
|
19
|
-
Project-URL: Homepage, https://airbyte.com
|
|
20
20
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
|
|
@@ -1,27 +1,19 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = [
|
|
3
|
-
"poetry-core>=1.0.0",
|
|
4
|
-
]
|
|
2
|
+
requires = [ "poetry-core>=1.0.0",]
|
|
5
3
|
build-backend = "poetry.core.masonry.api"
|
|
6
4
|
|
|
7
5
|
[tool.poetry]
|
|
8
|
-
version = "0.4.
|
|
6
|
+
version = "0.4.4"
|
|
9
7
|
name = "airbyte-source-google-drive"
|
|
10
8
|
description = "Source implementation for Google Drive."
|
|
11
|
-
authors = [
|
|
12
|
-
"Airbyte <contact@airbyte.io>",
|
|
13
|
-
]
|
|
9
|
+
authors = [ "Airbyte <contact@airbyte.io>",]
|
|
14
10
|
license = "ELv2"
|
|
15
11
|
readme = "README.md"
|
|
16
12
|
documentation = "https://docs.airbyte.com/integrations/sources/google-drive"
|
|
17
13
|
homepage = "https://airbyte.com"
|
|
18
14
|
repository = "https://github.com/airbytehq/airbyte"
|
|
19
|
-
packages
|
|
20
|
-
|
|
21
|
-
]
|
|
22
|
-
source = [
|
|
23
|
-
{ name = "testpypi", url = "https://test.pypi.org/simple/", priority = "supplemental" },
|
|
24
|
-
]
|
|
15
|
+
[[tool.poetry.packages]]
|
|
16
|
+
include = "source_google_drive"
|
|
25
17
|
|
|
26
18
|
[tool.poetry.dependencies]
|
|
27
19
|
python = "^3.10,<3.12"
|
|
@@ -29,12 +21,8 @@ google-api-python-client = "==2.104.0"
|
|
|
29
21
|
google-auth-httplib2 = "==0.1.1"
|
|
30
22
|
google-auth-oauthlib = "==1.1.0"
|
|
31
23
|
google-api-python-client-stubs = "==1.18.0"
|
|
24
|
+
airbyte-cdk = {extras = ["file-based"], version = "^6.45.10"}
|
|
32
25
|
|
|
33
|
-
[tool.poetry.dependencies.airbyte-cdk]
|
|
34
|
-
extras = [
|
|
35
|
-
"file-based",
|
|
36
|
-
]
|
|
37
|
-
version = "6.45.0.dev04101"
|
|
38
26
|
|
|
39
27
|
[tool.poetry.scripts]
|
|
40
28
|
source-google-drive = "source_google_drive.run:run"
|
|
@@ -43,7 +31,10 @@ source-google-drive = "source_google_drive.run:run"
|
|
|
43
31
|
pytest-mock = "^3.12.0"
|
|
44
32
|
pytest = "^8.0.0"
|
|
45
33
|
|
|
34
|
+
|
|
46
35
|
[tool.poe]
|
|
47
36
|
include = [
|
|
37
|
+
# Shared tasks definition file(s) can be imported here.
|
|
38
|
+
# Run `poe` or `poe --help` to see the list of available tasks.
|
|
48
39
|
"${POE_GIT_DIR}/poe-tasks/poetry-connector-tasks.toml",
|
|
49
40
|
]
|
|
@@ -63,6 +63,16 @@ class GoogleDriveRemoteFile(RemoteFile):
|
|
|
63
63
|
# The mime type of the file as returned by the Google Drive API
|
|
64
64
|
# This is not the same as the mime type when opened by the parser (e.g. google docs is exported as docx)
|
|
65
65
|
original_mime_type: str
|
|
66
|
+
view_link: str
|
|
67
|
+
# Only populated for items in shared drives.
|
|
68
|
+
drive_id: Optional[str] = None
|
|
69
|
+
created_at: datetime
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def url(self) -> str:
|
|
73
|
+
if self.drive_id:
|
|
74
|
+
return f"https://drive.google.com/open?id={self.id}&driveId={self.drive_id}"
|
|
75
|
+
return self.view_link
|
|
66
76
|
|
|
67
77
|
|
|
68
78
|
class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
@@ -134,10 +144,11 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
134
144
|
(path, folder_id) = folder_id_queue.pop()
|
|
135
145
|
# fetch all files in this folder (1000 is the max page size)
|
|
136
146
|
# supportsAllDrives and includeItemsFromAllDrives are required to access files in shared drives
|
|
147
|
+
# ref https://developers.google.com/workspace/drive/api/reference/rest/v3/files#File
|
|
137
148
|
request = service.files().list(
|
|
138
149
|
q=f"'{folder_id}' in parents",
|
|
139
150
|
pageSize=1000,
|
|
140
|
-
fields="nextPageToken, files(id, name, modifiedTime, mimeType)",
|
|
151
|
+
fields="nextPageToken, files(id, name, modifiedTime, mimeType, webViewLink, driveId, createdTime)",
|
|
141
152
|
supportsAllDrives=True,
|
|
142
153
|
includeItemsFromAllDrives=True,
|
|
143
154
|
)
|
|
@@ -160,6 +171,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
160
171
|
continue
|
|
161
172
|
else:
|
|
162
173
|
last_modified = datetime.strptime(new_file["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
174
|
+
created_at = datetime.strptime(new_file["createdTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
163
175
|
original_mime_type = new_file["mimeType"]
|
|
164
176
|
mime_type = (
|
|
165
177
|
self._get_export_mime_type(original_mime_type)
|
|
@@ -169,9 +181,12 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
169
181
|
remote_file = GoogleDriveRemoteFile(
|
|
170
182
|
uri=file_name,
|
|
171
183
|
last_modified=last_modified,
|
|
184
|
+
created_at=created_at,
|
|
172
185
|
id=new_file["id"],
|
|
173
186
|
original_mime_type=original_mime_type,
|
|
174
187
|
mime_type=mime_type,
|
|
188
|
+
drive_id=new_file.get("driveId"),
|
|
189
|
+
view_link=new_file.get("webViewLink"),
|
|
175
190
|
)
|
|
176
191
|
if self.file_matches_globs(remote_file, globs):
|
|
177
192
|
yield remote_file
|
|
@@ -266,7 +281,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
266
281
|
raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
|
|
267
282
|
|
|
268
283
|
try:
|
|
269
|
-
file_paths = self._get_file_transfer_paths(file, local_directory)
|
|
284
|
+
file_paths = self._get_file_transfer_paths(source_file_relative_path=file.uri, staging_directory=local_directory)
|
|
270
285
|
local_file_path = file_paths[self.LOCAL_FILE_PATH]
|
|
271
286
|
file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
|
|
272
287
|
file_name = file_paths[self.FILE_NAME]
|
|
@@ -289,16 +304,19 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
|
|
|
289
304
|
progress = status.resumable_progress / status.total_size * 100 if status.total_size else 0
|
|
290
305
|
logger.info(f"Processing file {file.uri}, progress: {progress:.2f}%")
|
|
291
306
|
|
|
307
|
+
logger.info(f"Finished uploading file {file.uri} to {local_file_path}")
|
|
292
308
|
# native google objects seems to be reporting lower size through the api than the final download size
|
|
293
309
|
file_size = getsize(local_file_path)
|
|
294
310
|
|
|
295
311
|
file_record_data = FileRecordData(
|
|
296
312
|
folder=file_paths[self.FILE_FOLDER],
|
|
297
|
-
|
|
313
|
+
file_name=file_name,
|
|
298
314
|
bytes=file_size,
|
|
299
315
|
id=file.id,
|
|
300
316
|
mime_type=file.mime_type,
|
|
301
|
-
|
|
317
|
+
created_at=file.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
318
|
+
updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
319
|
+
source_uri=file.url,
|
|
302
320
|
)
|
|
303
321
|
file_reference = AirbyteRecordMessageFileReference(
|
|
304
322
|
staging_file_url=local_file_path,
|
{airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.4}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|