airbyte-source-s3 4.14.4__tar.gz → 4.14.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/PKG-INFO +2 -2
  2. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/pyproject.toml +2 -2
  3. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/zip_reader.py +15 -1
  4. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/README.md +0 -0
  5. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/__init__.py +0 -0
  6. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/exceptions.py +0 -0
  7. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/run.py +0 -0
  8. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source.py +0 -0
  9. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/__init__.py +0 -0
  10. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/formats/__init__.py +0 -0
  11. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/formats/avro_spec.py +0 -0
  12. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/formats/csv_spec.py +0 -0
  13. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/formats/jsonl_spec.py +0 -0
  14. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/formats/parquet_spec.py +0 -0
  15. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/source.py +0 -0
  16. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/source_files_abstract/spec.py +0 -0
  17. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/stream.py +0 -0
  18. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/utils.py +0 -0
  19. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/__init__.py +0 -0
  20. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/config.py +0 -0
  21. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/cursor.py +0 -0
  22. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/legacy_config_transformer.py +0 -0
  23. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/source.py +0 -0
  24. {airbyte_source_s3-4.14.4 → airbyte_source_s3-4.14.5}/source_s3/v4/stream_reader.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-s3
3
- Version: 4.14.4
3
+ Version: 4.14.5
4
4
  Summary: Source implementation for S3.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -15,7 +15,7 @@ Requires-Dist: airbyte-cdk[file-based] (>=7.0.4,<8.0.0)
15
15
  Requires-Dist: dill (>=0.3.4,<0.4.0)
16
16
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
17
17
  Requires-Dist: pytz (>=2024.2,<2025.0)
18
- Requires-Dist: smart-open[s3] (==4.14.4)
18
+ Requires-Dist: smart-open[s3] (==4.14.5)
19
19
  Requires-Dist: transformers (>=4.38.2,<5.0.0)
20
20
  Requires-Dist: urllib3 (<2)
21
21
  Requires-Dist: wcmatch (>=10.0,<11.0)
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "4.14.4"
6
+ version = "4.14.5"
7
7
  name = "airbyte-source-s3"
8
8
  description = "Source implementation for S3."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -30,7 +30,7 @@ source-s3 = "source_s3.run:run"
30
30
 
31
31
  [tool.poetry.dependencies.smart-open]
32
32
  extras = [ "s3",]
33
- version = "4.14.4"
33
+ version = "4.14.5"
34
34
 
35
35
  [tool.poetry.group.dev.dependencies]
36
36
  pytest = "^8.0.0"
@@ -366,7 +366,21 @@ class ZipContentReader:
366
366
  data = self.buffer[:size]
367
367
  self.buffer = self.buffer[size:]
368
368
 
369
- return data.decode(self.encoding) if self.encoding else bytes(data)
369
+ try:
370
+ return data.decode(self.encoding) if self.encoding else bytes(data)
371
+ except UnicodeDecodeError:
372
+ if self.encoding == "utf_8_sig":
373
+ # utf_8_sig considers `\xef\xbb\xbf` as a single character and therefore calling `bytearray(b'\xef').decode("utf_8_sig") will
374
+ # cause an exception to be raised.
375
+ number_of_bytes_to_add = size - 1
376
+ if data.endswith(bytearray(b"\xef")):
377
+ number_of_bytes_to_add += 2
378
+ elif data.endswith(bytearray(b"\xbb")):
379
+ number_of_bytes_to_add += 1
380
+ data = data + self.buffer[:number_of_bytes_to_add]
381
+ self.buffer = self.buffer[number_of_bytes_to_add:]
382
+ return data.decode(self.encoding) if self.encoding else bytes(data)
383
+ raise
370
384
 
371
385
  def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
372
386
  """