airbyte-source-s3 4.13.4__tar.gz → 4.14.0.dev202504072343__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-s3 might be problematic. Click here for more details.

Files changed (24) hide show
  1. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/PKG-INFO +2 -2
  2. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/pyproject.toml +2 -2
  3. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/stream_reader.py +26 -9
  4. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/README.md +0 -0
  5. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/__init__.py +0 -0
  6. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/exceptions.py +0 -0
  7. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/run.py +0 -0
  8. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source.py +0 -0
  9. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/__init__.py +0 -0
  10. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/__init__.py +0 -0
  11. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/avro_spec.py +0 -0
  12. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/csv_spec.py +0 -0
  13. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/jsonl_spec.py +0 -0
  14. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/parquet_spec.py +0 -0
  15. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/source.py +0 -0
  16. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/spec.py +0 -0
  17. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/stream.py +0 -0
  18. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/utils.py +0 -0
  19. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/__init__.py +0 -0
  20. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/config.py +0 -0
  21. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/cursor.py +0 -0
  22. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/legacy_config_transformer.py +0 -0
  23. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/source.py +0 -0
  24. {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/zip_reader.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-s3
3
- Version: 4.13.4
3
+ Version: 4.14.0.dev202504072343
4
4
  Summary: Source implementation for S3.
5
5
  License: ELv2
6
6
  Author: Airbyte
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: airbyte-cdk[file-based] (>=6,<7)
13
+ Requires-Dist: airbyte-cdk[file-based] (==6.45.0.dev04106)
14
14
  Requires-Dist: dill (>=0.3.4,<0.4.0)
15
15
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: pytz (>=2024.2,<2025.0)
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "4.13.4"
8
+ version = "4.14.0.dev202504072343"
9
9
  name = "airbyte-source-s3"
10
10
  description = "Source implementation for S3."
11
11
  authors = [
@@ -33,7 +33,7 @@ pendulum = "^3.0.0"
33
33
  extras = [
34
34
  "file-based",
35
35
  ]
36
- version = "^6"
36
+ version = "6.45.0.dev04106"
37
37
 
38
38
  [tool.poetry.dependencies.smart-open]
39
39
  extras = [
@@ -7,7 +7,8 @@ import time
7
7
  from datetime import datetime
8
8
  from io import IOBase
9
9
  from os import getenv
10
- from typing import Dict, Iterable, List, Optional, Set, cast
10
+ from os.path import basename, dirname
11
+ from typing import Dict, Iterable, List, Optional, Set, Tuple, cast
11
12
 
12
13
  import boto3.session
13
14
  import pendulum
@@ -22,8 +23,10 @@ from botocore.session import get_session
22
23
  from typing_extensions import override
23
24
 
24
25
  from airbyte_cdk import FailureType
26
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
25
27
  from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError, FileSizeLimitError
26
28
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
29
+ from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
27
30
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
28
31
  from source_s3.v4.config import Config
29
32
  from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler
@@ -221,7 +224,9 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
221
224
  return progress_handler
222
225
 
223
226
  @override
224
- def get_file(self, file: RemoteFile, local_directory: str, logger: logging.Logger) -> Dict[str, str | int]:
227
+ def upload(
228
+ self, file: RemoteFile, local_directory: str, logger: logging.Logger
229
+ ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
225
230
  """
226
231
  Downloads a file from an S3 bucket to a specified local directory.
227
232
 
@@ -231,11 +236,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
231
236
  logger (logging.Logger): Logger for logging information and errors.
232
237
 
233
238
  Returns:
234
- dict: A dictionary containing the following:
235
- - "file_url" (str): The absolute path of the downloaded file.
236
- - "bytes" (int): The file size in bytes.
237
- - "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
238
- this a mounted volume in the pod container.
239
+ Tuple[FileRecordData, AirbyteRecordMessageFileReference]: Contains file record data and file reference for Airbyte protocol.
239
240
 
240
241
  Raises:
241
242
  FileSizeLimitError: If the file size exceeds the predefined limit (1 GB).
@@ -246,7 +247,10 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
246
247
  message = "File size exceeds the 1 GB limit."
247
248
  raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
248
249
 
249
- file_relative_path, local_file_path, absolute_file_path = self._get_file_transfer_paths(file, local_directory)
250
+ file_paths = self._get_file_transfer_paths(file, local_directory)
251
+ local_file_path = file_paths[self.LOCAL_FILE_PATH]
252
+ file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
253
+ file_name = file_paths[self.FILE_NAME]
250
254
 
251
255
  logger.info(
252
256
  f"Starting to download the file {file.uri} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
@@ -258,7 +262,20 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
258
262
  write_duration = time.time() - start_download_time
259
263
  logger.info(f"Finished downloading the file {file.uri} and saved to {local_file_path} in {write_duration:,.2f} seconds.")
260
264
 
261
- return {"file_url": absolute_file_path, "bytes": file_size, "file_relative_path": file_relative_path}
265
+ file_record_data = FileRecordData(
266
+ folder=file_paths[self.FILE_FOLDER],
267
+ filename=file_name,
268
+ bytes=file_size,
269
+ updated_at=int(file.last_modified.timestamp() * 1000),
270
+ )
271
+
272
+ file_reference = AirbyteRecordMessageFileReference(
273
+ staging_file_url=local_file_path,
274
+ source_file_relative_path=file_relative_path,
275
+ file_size_bytes=file_size,
276
+ )
277
+
278
+ return file_record_data, file_reference
262
279
 
263
280
  @override
264
281
  def file_size(self, file: RemoteFile) -> int: