airbyte-source-s3 4.13.4__tar.gz → 4.14.0.dev202504072343__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-s3 might be problematic. Click here for more details.
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/PKG-INFO +2 -2
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/pyproject.toml +2 -2
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/stream_reader.py +26 -9
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/README.md +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/__init__.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/exceptions.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/run.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/__init__.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/__init__.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/avro_spec.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/csv_spec.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/jsonl_spec.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/formats/parquet_spec.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/source.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/source_files_abstract/spec.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/stream.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/utils.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/__init__.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/config.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/cursor.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/legacy_config_transformer.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/source.py +0 -0
- {airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/zip_reader.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-s3
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.14.0.dev202504072343
|
|
4
4
|
Summary: Source implementation for S3.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.45.0.dev04106)
|
|
14
14
|
Requires-Dist: dill (>=0.3.4,<0.4.0)
|
|
15
15
|
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: pytz (>=2024.2,<2025.0)
|
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "4.
|
|
8
|
+
version = "4.14.0.dev202504072343"
|
|
9
9
|
name = "airbyte-source-s3"
|
|
10
10
|
description = "Source implementation for S3."
|
|
11
11
|
authors = [
|
|
@@ -33,7 +33,7 @@ pendulum = "^3.0.0"
|
|
|
33
33
|
extras = [
|
|
34
34
|
"file-based",
|
|
35
35
|
]
|
|
36
|
-
version = "
|
|
36
|
+
version = "6.45.0.dev04106"
|
|
37
37
|
|
|
38
38
|
[tool.poetry.dependencies.smart-open]
|
|
39
39
|
extras = [
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/stream_reader.py
RENAMED
|
@@ -7,7 +7,8 @@ import time
|
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from io import IOBase
|
|
9
9
|
from os import getenv
|
|
10
|
-
from
|
|
10
|
+
from os.path import basename, dirname
|
|
11
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, cast
|
|
11
12
|
|
|
12
13
|
import boto3.session
|
|
13
14
|
import pendulum
|
|
@@ -22,8 +23,10 @@ from botocore.session import get_session
|
|
|
22
23
|
from typing_extensions import override
|
|
23
24
|
|
|
24
25
|
from airbyte_cdk import FailureType
|
|
26
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
|
25
27
|
from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError, FileSizeLimitError
|
|
26
28
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
29
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
|
27
30
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
28
31
|
from source_s3.v4.config import Config
|
|
29
32
|
from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler
|
|
@@ -221,7 +224,9 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
221
224
|
return progress_handler
|
|
222
225
|
|
|
223
226
|
@override
|
|
224
|
-
def
|
|
227
|
+
def upload(
|
|
228
|
+
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
|
229
|
+
) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
|
|
225
230
|
"""
|
|
226
231
|
Downloads a file from an S3 bucket to a specified local directory.
|
|
227
232
|
|
|
@@ -231,11 +236,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
231
236
|
logger (logging.Logger): Logger for logging information and errors.
|
|
232
237
|
|
|
233
238
|
Returns:
|
|
234
|
-
|
|
235
|
-
- "file_url" (str): The absolute path of the downloaded file.
|
|
236
|
-
- "bytes" (int): The file size in bytes.
|
|
237
|
-
- "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
|
|
238
|
-
this a mounted volume in the pod container.
|
|
239
|
+
Tuple[FileRecordData, AirbyteRecordMessageFileReference]: Contains file record data and file reference for Airbyte protocol.
|
|
239
240
|
|
|
240
241
|
Raises:
|
|
241
242
|
FileSizeLimitError: If the file size exceeds the predefined limit (1 GB).
|
|
@@ -246,7 +247,10 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
246
247
|
message = "File size exceeds the 1 GB limit."
|
|
247
248
|
raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
|
|
248
249
|
|
|
249
|
-
|
|
250
|
+
file_paths = self._get_file_transfer_paths(file, local_directory)
|
|
251
|
+
local_file_path = file_paths[self.LOCAL_FILE_PATH]
|
|
252
|
+
file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
|
|
253
|
+
file_name = file_paths[self.FILE_NAME]
|
|
250
254
|
|
|
251
255
|
logger.info(
|
|
252
256
|
f"Starting to download the file {file.uri} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
|
|
@@ -258,7 +262,20 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
258
262
|
write_duration = time.time() - start_download_time
|
|
259
263
|
logger.info(f"Finished downloading the file {file.uri} and saved to {local_file_path} in {write_duration:,.2f} seconds.")
|
|
260
264
|
|
|
261
|
-
|
|
265
|
+
file_record_data = FileRecordData(
|
|
266
|
+
folder=file_paths[self.FILE_FOLDER],
|
|
267
|
+
filename=file_name,
|
|
268
|
+
bytes=file_size,
|
|
269
|
+
updated_at=int(file.last_modified.timestamp() * 1000),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
file_reference = AirbyteRecordMessageFileReference(
|
|
273
|
+
staging_file_url=local_file_path,
|
|
274
|
+
source_file_relative_path=file_relative_path,
|
|
275
|
+
file_size_bytes=file_size,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return file_record_data, file_reference
|
|
262
279
|
|
|
263
280
|
@override
|
|
264
281
|
def file_size(self, file: RemoteFile) -> int:
|
|
File without changes
|
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/__init__.py
RENAMED
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/config.py
RENAMED
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/cursor.py
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/source.py
RENAMED
|
File without changes
|
{airbyte_source_s3-4.13.4 → airbyte_source_s3-4.14.0.dev202504072343}/source_s3/v4/zip_reader.py
RENAMED
|
File without changes
|