airbyte-source-s3 4.14.0.dev202504091813__tar.gz → 4.14.0.dev202504161719__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-s3 might be problematic. Click here for more details.
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/PKG-INFO +2 -2
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/pyproject.toml +2 -2
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/stream_reader.py +18 -5
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/README.md +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/__init__.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/exceptions.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/run.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/__init__.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/formats/__init__.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/formats/avro_spec.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/formats/csv_spec.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/formats/jsonl_spec.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/formats/parquet_spec.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/source.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/source_files_abstract/spec.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/stream.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/utils.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/__init__.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/config.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/cursor.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/legacy_config_transformer.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/source.py +0 -0
- {airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/source_s3/v4/zip_reader.py +0 -0
{airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: airbyte-source-s3
|
|
3
|
-
Version: 4.14.0.
|
|
3
|
+
Version: 4.14.0.dev202504161719
|
|
4
4
|
Summary: Source implementation for S3.
|
|
5
5
|
License: ELv2
|
|
6
6
|
Author: Airbyte
|
|
@@ -10,7 +10,7 @@ Classifier: License :: Other/Proprietary License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (==6.45.
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (==6.45.4.post49.dev14495925594)
|
|
14
14
|
Requires-Dist: dill (>=0.3.4,<0.4.0)
|
|
15
15
|
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: pytz (>=2024.2,<2025.0)
|
{airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/pyproject.toml
RENAMED
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "4.14.0.
|
|
8
|
+
version = "4.14.0.dev202504161719"
|
|
9
9
|
name = "airbyte-source-s3"
|
|
10
10
|
description = "Source implementation for S3."
|
|
11
11
|
authors = [
|
|
@@ -33,7 +33,7 @@ pendulum = "^3.0.0"
|
|
|
33
33
|
extras = [
|
|
34
34
|
"file-based",
|
|
35
35
|
]
|
|
36
|
-
version = "6.45.
|
|
36
|
+
version = "6.45.4.post49.dev14495925594"
|
|
37
37
|
|
|
38
38
|
[tool.poetry.dependencies.smart-open]
|
|
39
39
|
extras = [
|
|
@@ -168,6 +168,19 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
168
168
|
endpoint=self.config.endpoint,
|
|
169
169
|
) from exc
|
|
170
170
|
|
|
171
|
+
def _construct_s3_uri(self, file: RemoteFile) -> str:
|
|
172
|
+
"""
|
|
173
|
+
Constructs the S3 URI for a given file, handling both regular files and files inside archives.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
file: The RemoteFile object representing either a regular file or a file inside an archive
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
str: The properly formatted S3 URI
|
|
180
|
+
"""
|
|
181
|
+
file_path = file.uri.split("#")[0] if isinstance(file, RemoteFileInsideArchive) else file.uri
|
|
182
|
+
return f"s3://{self.config.bucket}/{file_path}"
|
|
183
|
+
|
|
171
184
|
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
|
|
172
185
|
try:
|
|
173
186
|
params = {"client": self.s3_client}
|
|
@@ -176,14 +189,13 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
176
189
|
|
|
177
190
|
logger.debug(f"try to open {file.uri}")
|
|
178
191
|
try:
|
|
192
|
+
s3_uri = self._construct_s3_uri(file)
|
|
179
193
|
if isinstance(file, RemoteFileInsideArchive):
|
|
180
|
-
s3_file_object = smart_open.open(
|
|
194
|
+
s3_file_object = smart_open.open(s3_uri, transport_params=params, mode="rb")
|
|
181
195
|
decompressed_stream = DecompressedStream(s3_file_object, file)
|
|
182
196
|
result = ZipContentReader(decompressed_stream, encoding)
|
|
183
197
|
else:
|
|
184
|
-
result = smart_open.open(
|
|
185
|
-
f"s3://{self.config.bucket}/{file.uri}", transport_params=params, mode=mode.value, encoding=encoding
|
|
186
|
-
)
|
|
198
|
+
result = smart_open.open(s3_uri, transport_params=params, mode=mode.value, encoding=encoding)
|
|
187
199
|
except OSError:
|
|
188
200
|
logger.warning(
|
|
189
201
|
f"We don't have access to {file.uri}. The file appears to have become unreachable during sync."
|
|
@@ -247,7 +259,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
247
259
|
message = "File size exceeds the 1 GB limit."
|
|
248
260
|
raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error)
|
|
249
261
|
|
|
250
|
-
file_paths = self._get_file_transfer_paths(file, local_directory)
|
|
262
|
+
file_paths = self._get_file_transfer_paths(file.uri, local_directory)
|
|
251
263
|
local_file_path = file_paths[self.LOCAL_FILE_PATH]
|
|
252
264
|
file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
|
|
253
265
|
file_name = file_paths[self.FILE_NAME]
|
|
@@ -267,6 +279,7 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader):
|
|
|
267
279
|
filename=file_name,
|
|
268
280
|
bytes=file_size,
|
|
269
281
|
updated_at=file.last_modified.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
282
|
+
source_uri=self._construct_s3_uri(file),
|
|
270
283
|
)
|
|
271
284
|
|
|
272
285
|
file_reference = AirbyteRecordMessageFileReference(
|
{airbyte_source_s3-4.14.0.dev202504091813 → airbyte_source_s3-4.14.0.dev202504161719}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|