airbyte-source-google-drive 0.4.3.dev202504071123__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-google-drive might be problematic. Click here for more details.

Files changed (13) hide show
  1. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/PKG-INFO +4 -4
  2. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/pyproject.toml +9 -18
  3. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/stream_reader.py +22 -4
  4. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/README.md +0 -0
  5. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/__init__.py +0 -0
  6. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/exceptions.py +0 -0
  7. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/run.py +0 -0
  8. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/schemas/file_permissions.json +0 -0
  9. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/schemas/identities.json +0 -0
  10. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/source.py +0 -0
  11. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/spec.py +0 -0
  12. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/stream_permissions_reader.py +0 -0
  13. {airbyte_source_google_drive-0.4.3.dev202504071123 → airbyte_source_google_drive-0.4.5}/source_google_drive/utils.py +0 -0
@@ -1,7 +1,8 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: airbyte-source-google-drive
3
- Version: 0.4.3.dev202504071123
3
+ Version: 0.4.5
4
4
  Summary: Source implementation for Google Drive.
5
+ Home-page: https://airbyte.com
5
6
  License: ELv2
6
7
  Author: Airbyte
7
8
  Author-email: contact@airbyte.io
@@ -10,13 +11,12 @@ Classifier: License :: Other/Proprietary License
10
11
  Classifier: Programming Language :: Python :: 3
11
12
  Classifier: Programming Language :: Python :: 3.10
12
13
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (==6.45.0.dev04101)
14
+ Requires-Dist: airbyte-cdk[file-based] (>=6.45.10,<7.0.0)
14
15
  Requires-Dist: google-api-python-client (==2.104.0)
15
16
  Requires-Dist: google-api-python-client-stubs (==1.18.0)
16
17
  Requires-Dist: google-auth-httplib2 (==0.1.1)
17
18
  Requires-Dist: google-auth-oauthlib (==1.1.0)
18
19
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/google-drive
19
- Project-URL: Homepage, https://airbyte.com
20
20
  Project-URL: Repository, https://github.com/airbytehq/airbyte
21
21
  Description-Content-Type: text/markdown
22
22
 
@@ -1,27 +1,19 @@
1
1
  [build-system]
2
- requires = [
3
- "poetry-core>=1.0.0",
4
- ]
2
+ requires = [ "poetry-core>=1.0.0",]
5
3
  build-backend = "poetry.core.masonry.api"
6
4
 
7
5
  [tool.poetry]
8
- version = "0.4.3.dev202504071123"
6
+ version = "0.4.5"
9
7
  name = "airbyte-source-google-drive"
10
8
  description = "Source implementation for Google Drive."
11
- authors = [
12
- "Airbyte <contact@airbyte.io>",
13
- ]
9
+ authors = [ "Airbyte <contact@airbyte.io>",]
14
10
  license = "ELv2"
15
11
  readme = "README.md"
16
12
  documentation = "https://docs.airbyte.com/integrations/sources/google-drive"
17
13
  homepage = "https://airbyte.com"
18
14
  repository = "https://github.com/airbytehq/airbyte"
19
- packages = [
20
- { include = "source_google_drive" },
21
- ]
22
- source = [
23
- { name = "testpypi", url = "https://test.pypi.org/simple/", priority = "supplemental" },
24
- ]
15
+ [[tool.poetry.packages]]
16
+ include = "source_google_drive"
25
17
 
26
18
  [tool.poetry.dependencies]
27
19
  python = "^3.10,<3.12"
@@ -29,12 +21,8 @@ google-api-python-client = "==2.104.0"
29
21
  google-auth-httplib2 = "==0.1.1"
30
22
  google-auth-oauthlib = "==1.1.0"
31
23
  google-api-python-client-stubs = "==1.18.0"
24
+ airbyte-cdk = {extras = ["file-based"], version = "^6.45.10"}
32
25
 
33
- [tool.poetry.dependencies.airbyte-cdk]
34
- extras = [
35
- "file-based",
36
- ]
37
- version = "6.45.0.dev04101"
38
26
 
39
27
  [tool.poetry.scripts]
40
28
  source-google-drive = "source_google_drive.run:run"
@@ -43,7 +31,10 @@ source-google-drive = "source_google_drive.run:run"
43
31
  pytest-mock = "^3.12.0"
44
32
  pytest = "^8.0.0"
45
33
 
34
+
46
35
  [tool.poe]
47
36
  include = [
37
+ # Shared tasks definition file(s) can be imported here.
38
+ # Run `poe` or `poe --help` to see the list of available tasks.
48
39
  "${POE_GIT_DIR}/poe-tasks/poetry-connector-tasks.toml",
49
40
  ]
@@ -63,6 +63,16 @@ class GoogleDriveRemoteFile(RemoteFile):
63
63
  # The mime type of the file as returned by the Google Drive API
64
64
  # This is not the same as the mime type when opened by the parser (e.g. google docs is exported as docx)
65
65
  original_mime_type: str
66
+ view_link: str
67
+ # Only populated for items in shared drives.
68
+ drive_id: Optional[str] = None
69
+ created_at: datetime
70
+
71
+ @property
72
+ def url(self) -> str:
73
+ if self.drive_id:
74
+ return f"https://drive.google.com/open?id={self.id}&driveId={self.drive_id}"
75
+ return self.view_link
66
76
 
67
77
 
68
78
  class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
@@ -134,10 +144,11 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
134
144
  (path, folder_id) = folder_id_queue.pop()
135
145
  # fetch all files in this folder (1000 is the max page size)
136
146
  # supportsAllDrives and includeItemsFromAllDrives are required to access files in shared drives
147
+ # ref https://developers.google.com/workspace/drive/api/reference/rest/v3/files#File
137
148
  request = service.files().list(
138
149
  q=f"'{folder_id}' in parents",
139
150
  pageSize=1000,
140
- fields="nextPageToken, files(id, name, modifiedTime, mimeType)",
151
+ fields="nextPageToken, files(id, name, modifiedTime, mimeType, webViewLink, driveId, createdTime)",
141
152
  supportsAllDrives=True,
142
153
  includeItemsFromAllDrives=True,
143
154
  )
@@ -160,6 +171,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
160
171
  continue
161
172
  else:
162
173
  last_modified = datetime.strptime(new_file["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
174
+ created_at = datetime.strptime(new_file["createdTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
163
175
  original_mime_type = new_file["mimeType"]
164
176
  mime_type = (
165
177
  self._get_export_mime_type(original_mime_type)
@@ -169,9 +181,12 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
169
181
  remote_file = GoogleDriveRemoteFile(
170
182
  uri=file_name,
171
183
  last_modified=last_modified,
184
+ created_at=created_at,
172
185
  id=new_file["id"],
173
186
  original_mime_type=original_mime_type,
174
187
  mime_type=mime_type,
188
+ drive_id=new_file.get("driveId"),
189
+ view_link=new_file.get("webViewLink"),
175
190
  )
176
191
  if self.file_matches_globs(remote_file, globs):
177
192
  yield remote_file
@@ -266,7 +281,7 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
266
281
  raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
267
282
 
268
283
  try:
269
- file_paths = self._get_file_transfer_paths(file, local_directory)
284
+ file_paths = self._get_file_transfer_paths(source_file_relative_path=file.uri, staging_directory=local_directory)
270
285
  local_file_path = file_paths[self.LOCAL_FILE_PATH]
271
286
  file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
272
287
  file_name = file_paths[self.FILE_NAME]
@@ -289,16 +304,19 @@ class SourceGoogleDriveStreamReader(AbstractFileBasedStreamReader):
289
304
  progress = status.resumable_progress / status.total_size * 100 if status.total_size else 0
290
305
  logger.info(f"Processing file {file.uri}, progress: {progress:.2f}%")
291
306
 
307
+ logger.info(f"Finished uploading file {file.uri} to {local_file_path}")
292
308
  # native google objects seems to be reporting lower size through the api than the final download size
293
309
  file_size = getsize(local_file_path)
294
310
 
295
311
  file_record_data = FileRecordData(
296
312
  folder=file_paths[self.FILE_FOLDER],
297
- filename=file_name,
313
+ file_name=file_name,
298
314
  bytes=file_size,
299
315
  id=file.id,
300
316
  mime_type=file.mime_type,
301
- updated_at=int(file.last_modified.timestamp() * 1000),
317
+ created_at=file.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
318
+ updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
319
+ source_uri=file.url,
302
320
  )
303
321
  file_reference = AirbyteRecordMessageFileReference(
304
322
  staging_file_url=local_file_path,