airbyte-source-s3 4.14.0.dev202504091813__py3-none-any.whl → 4.14.0.dev202504161719__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-s3
3
- Version: 4.14.0.dev202504091813
3
+ Version: 4.14.0.dev202504161719
4
4
  Summary: Source implementation for S3.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (==6.45.0.dev04107)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.45.4.post49.dev14495925594)
14
14
  Requires-Dist: dill (>=0.3.4,<0.4.0)
15
15
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: pytz (>=2024.2,<2025.0)
@@ -17,9 +17,9 @@ source_s3/v4/config.py,sha256=v9upt0VvzRT2m50Ym8yS2E1F0hNzP21m3zzo7cAG0Wk,4271
17
17
  source_s3/v4/cursor.py,sha256=kZj_6Wtl2yVAVeKZW67XyYQOk2XtbYazTgswEgGfKnI,7298
18
18
  source_s3/v4/legacy_config_transformer.py,sha256=OjKwGBYPHZvhZRKGO1LOAR7-cAT-9KvDRQY-G93eoic,7840
19
19
  source_s3/v4/source.py,sha256=jugIY53C_G9QhQRwKWBPcXXUgYKF_RESSaewzF_HXhc,8967
20
- source_s3/v4/stream_reader.py,sha256=qoCHJ0uxUqjaBYVo5TKy945ZPVjtlEs2ucGaKI6ZTKM,16320
20
+ source_s3/v4/stream_reader.py,sha256=moJTzT-gZI5X4LmRfDe8Ek5ekbKxjiZxiaHUyk6e-w4,16836
21
21
  source_s3/v4/zip_reader.py,sha256=5Atoxu3Si0w5fNT6l_qQ-C1ZlKYt2DgV4Lxf8tTU5hw,14608
22
- airbyte_source_s3-4.14.0.dev202504091813.dist-info/METADATA,sha256=DqL0tW1qRzp-0UIgJJ76i6P1RABkhQbMWk_0P3N_v4I,5402
23
- airbyte_source_s3-4.14.0.dev202504091813.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
24
- airbyte_source_s3-4.14.0.dev202504091813.dist-info/entry_points.txt,sha256=1ndXy1aXftjfCR82yLKg797X8-O5mD95tz6rNKA_1dE,47
25
- airbyte_source_s3-4.14.0.dev202504091813.dist-info/RECORD,,
22
+ airbyte_source_s3-4.14.0.dev202504161719.dist-info/METADATA,sha256=ZpYgAooKRjEFpnDGXwDu3BLoG0LVZftqABhkHdK5IIU,5415
23
+ airbyte_source_s3-4.14.0.dev202504161719.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
24
+ airbyte_source_s3-4.14.0.dev202504161719.dist-info/entry_points.txt,sha256=1ndXy1aXftjfCR82yLKg797X8-O5mD95tz6rNKA_1dE,47
25
+ airbyte_source_s3-4.14.0.dev202504161719.dist-info/RECORD,,
@@ -168,6 +168,19 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
168
168
  endpoint=self.config.endpoint,
169
169
  ) from exc
170
170
 
171
+ def _construct_s3_uri(self, file: RemoteFile) -> str:
172
+ """
173
+ Constructs the S3 URI for a given file, handling both regular files and files inside archives.
174
+
175
+ Args:
176
+ file: The RemoteFile object representing either a regular file or a file inside an archive
177
+
178
+ Returns:
179
+ str: The properly formatted S3 URI
180
+ """
181
+ file_path = file.uri.split("#")[0] if isinstance(file, RemoteFileInsideArchive) else file.uri
182
+ return f"s3://{self.config.bucket}/{file_path}"
183
+
171
184
  def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
172
185
  try:
173
186
  params = {"client": self.s3_client}
@@ -176,14 +189,13 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
176
189
 
177
190
  logger.debug(f"try to open {file.uri}")
178
191
  try:
192
+ s3_uri = self._construct_s3_uri(file)
179
193
  if isinstance(file, RemoteFileInsideArchive):
180
- s3_file_object = smart_open.open(f"s3://{self.config.bucket}/{file.uri.split('#')[0]}", transport_params=params, mode="rb")
194
+ s3_file_object = smart_open.open(s3_uri, transport_params=params, mode="rb")
181
195
  decompressed_stream = DecompressedStream(s3_file_object, file)
182
196
  result = ZipContentReader(decompressed_stream, encoding)
183
197
  else:
184
- result = smart_open.open(
185
- f"s3://{self.config.bucket}/{file.uri}", transport_params=params, mode=mode.value, encoding=encoding
186
- )
198
+ result = smart_open.open(s3_uri, transport_params=params, mode=mode.value, encoding=encoding)
187
199
  except OSError:
188
200
  logger.warning(
189
201
  f"We don't have access to {file.uri}. The file appears to have become unreachable during sync."
@@ -247,7 +259,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
247
259
  message = "File size exceeds the 1 GB limit."
248
260
  raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
249
261
 
250
- file_paths = self._get_file_transfer_paths(file, local_directory)
262
+ file_paths = self._get_file_transfer_paths(file.uri, local_directory)
251
263
  local_file_path = file_paths[self.LOCAL_FILE_PATH]
252
264
  file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
253
265
  file_name = file_paths[self.FILE_NAME]
@@ -267,6 +279,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
267
279
  filename=file_name,
268
280
  bytes=file_size,
269
281
  updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
282
+ source_uri=self._construct_s3_uri(file),
270
283
  )
271
284
 
272
285
  file_reference = AirbyteRecordMessageFileReference(