PyPI - deltacat - Versions diffs - 1.1.30__py3-none-any.whl → 1.1.32__py3-none-any.whl - Mend

deltacat 1.1.30py3-none-any.whl → 1.1.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

deltacat/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
 deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
-__version__ = "1.1.30"
+__version__ = "1.1.32"
 __all__ = [

deltacat/compute/compactor_v2/utils/task_options.py CHANGED Viewed

@@ -1,11 +1,16 @@
 import logging
 from typing import Dict, Optional, List, Tuple, Any
 from deltacat import logs
+from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
+from deltacat.compute.compactor_v2.constants import (
+    AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
+)
 from deltacat.compute.compactor_v2.model.merge_file_group import (
     LocalMergeFileGroupsProvider,
 )
 from deltacat.storage import (
     Manifest,
+    ManifestEntry,
     interface as unimplemented_deltacat_storage,
 )
 from deltacat.compute.compactor.model.delta_annotated import DeltaAnnotated
@@ -81,16 +86,27 @@ def _get_merge_task_options(
         and compacted_delta_manifest
         and round_completion_info.hb_index_to_entry_range
     ):
-        previous_inflation = (
-            round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
-            / round_completion_info.compacted_pyarrow_write_result.file_bytes
+        logger.debug_conditional(
+            f"[Merge task {index}]: Using previous compaction rounds to calculate merge memory: {round_completion_info.compacted_pyarrow_write_result}",
+            memory_logs_enabled,
+        )
+        previous_inflation: float = (
+            (
+                round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
+                / round_completion_info.compacted_pyarrow_write_result.file_bytes
+            )
+            if round_completion_info.compacted_pyarrow_write_result.file_bytes
+            else PYARROW_INFLATION_MULTIPLIER
         )
         debug_memory_params["previous_inflation"] = previous_inflation
-        average_record_size = (
-            round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
-            / round_completion_info.compacted_pyarrow_write_result.records
+        average_record_size: float = (
+            (
+                round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
+                / round_completion_info.compacted_pyarrow_write_result.records
+            )
+            if round_completion_info.compacted_pyarrow_write_result.records
+            else DEFAULT_AVERAGE_RECORD_SIZE_BYTES
         )
         debug_memory_params["average_record_size"] = average_record_size
@@ -106,31 +122,36 @@ def _get_merge_task_options(
                 str(hb_idx)
             ]
             for entry_index in range(entry_start, entry_end):
-                entry = compacted_delta_manifest.entries[entry_index]
-                current_entry_size = estimate_manifest_entry_size_bytes(
-                    entry=entry,
-                    operation_type=OperationType.PYARROW_DOWNLOAD,
-                    estimate_resources_params=estimate_resources_params,
+                entry: ManifestEntry = compacted_delta_manifest.entries[entry_index]
+                current_entry_size: float = (
+                    estimate_manifest_entry_size_bytes(
+                        entry=entry,
+                        operation_type=OperationType.PYARROW_DOWNLOAD,
+                        estimate_resources_params=estimate_resources_params,
+                    )
+                    or 0.0
                 )
-                current_entry_rows = estimate_manifest_entry_num_rows(
-                    entry=entry,
-                    operation_type=OperationType.PYARROW_DOWNLOAD,
-                    estimate_resources_params=estimate_resources_params,
+                current_entry_rows: int = (
+                    estimate_manifest_entry_num_rows(
+                        entry=entry,
+                        operation_type=OperationType.PYARROW_DOWNLOAD,
+                        estimate_resources_params=estimate_resources_params,
+                    )
+                    or 0
                 )
+                # NOTE: We can treat the current_entry_size and current_entry_rows as 0 as a None estimated entry size implies a 0 value
                 data_size += current_entry_size
                 num_rows += current_entry_rows
                 if primary_keys:
-                    pk_size = estimate_manifest_entry_column_size_bytes(
+                    pk_size: Optional[
+                        float
+                    ] = estimate_manifest_entry_column_size_bytes(
                         entry=entry,
                         columns=primary_keys,
                         operation_type=OperationType.PYARROW_DOWNLOAD,
                         estimate_resources_params=estimate_resources_params,
                     )
-                    if pk_size is None:
+                    if not pk_size:
                         pk_size_bytes += current_entry_size
                     else:
                         pk_size_bytes += pk_size
@@ -159,7 +180,6 @@ def _get_merge_task_options(
         f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
         memory_logs_enabled,
     )
     return _get_task_options(0.01, total_memory, ray_custom_resources)

deltacat/tests/compute/compactor_v2/utils/test_task_options.py CHANGED Viewed

@@ -1,6 +1,37 @@
 import unittest
 import ray
-from deltacat.compute.compactor_v2.utils.task_options import _get_task_options
+from deltacat.compute.compactor_v2.utils.task_options import (
+    _get_task_options,
+    _get_merge_task_options,
+    logger,
+)
+from deltacat.compute.resource_estimation.model import (
+    EstimateResourcesParams,
+    ResourceEstimationMethod,
+)
+from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
+from deltacat.compute.compactor import (
+    PyArrowWriteResult,
+    RoundCompletionInfo,
+)
+from deltacat.types.media import (
+    ContentType,
+    ContentEncoding,
+)
+from deltacat.storage import (
+    DeltaLocator,
+    Manifest,
+    ManifestMeta,
+    ManifestEntry,
+    ManifestEntryList,
+    PartitionValues,
+)
+from unittest.mock import MagicMock
+from typing import Optional
+from deltacat.compute.compactor_v2.constants import (
+    AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
+)
 @ray.remote
@@ -14,11 +45,95 @@ def throwing_func():
 class TestTaskOptions(unittest.TestCase):
+    TEST_INDEX = 0
+    TEST_HB_GROUP_IDX = 0
+    TEST_STREAM_POSITION = 1_000_000
+    TEST_NUM_HASH_GROUPS = 1
     @classmethod
     def setUpClass(cls):
         ray.init(local_mode=True, ignore_reinit_error=True)
         super().setUpClass()
+    @classmethod
+    def tearDownClass(cls) -> None:
+        ray.shutdown()
+    def _make_estimate_resource_params(
+        cls,
+        resource_estimation_method: Optional[
+            ResourceEstimationMethod
+        ] = ResourceEstimationMethod.DEFAULT,
+        previous_inflation: Optional[int] = 7,
+        average_record_size_bytes: Optional[int] = 1000,
+    ):
+        return EstimateResourcesParams.of(
+            resource_estimation_method=resource_estimation_method,
+            previous_inflation=previous_inflation,
+            average_record_size_bytes=average_record_size_bytes,
+        )
+    def _make_manifest(
+        self,
+        source_content_length: Optional[int] = 1000,
+        content_type: Optional[ContentType] = ContentType.PARQUET,
+        content_encoding: Optional[ContentEncoding] = ContentEncoding.IDENTITY,
+        partition_values: Optional[PartitionValues] = None,
+        uri: Optional[str] = "test",
+        url: Optional[str] = "test",
+        author: Optional[str] = "foo",
+        entry_uuid: Optional[str] = "foo",
+        manifest_uuid: Optional[str] = "bar",
+    ) -> Manifest:
+        meta = ManifestMeta.of(
+            10,
+            10,
+            content_type=content_type,
+            content_encoding=content_encoding,
+            source_content_length=source_content_length,
+            partition_values=partition_values,
+        )
+        return Manifest.of(
+            entries=ManifestEntryList.of(
+                [
+                    ManifestEntry.of(
+                        uri=uri, url=url, meta=meta, mandatory=True, uuid=entry_uuid
+                    )
+                ]
+            ),
+            author=author,
+            uuid=manifest_uuid,
+        )
+    def make_round_completion_info(
+        self,
+        high_watermark: Optional[int] = 1_000_000,
+        compacted_delta_locator: Optional[DeltaLocator] = None,
+        records_written: Optional[int] = 10,
+        bytes_written: Optional[int] = 10,
+        files_written: Optional[int] = 10,
+        rows_dropped: Optional[int] = 10,
+        sort_keys_bit_width: Optional[int] = 0,
+        hash_bucket_count: Optional[int] = 1,
+        hb_index_to_entry_range: Optional[dict] = None,
+    ) -> RoundCompletionInfo:
+        if compacted_delta_locator is None:
+            compacted_delta_locator = MagicMock(spec=DeltaLocator)
+        hb_index_to_entry_range = hb_index_to_entry_range or {"0": (0, 1)}
+        return RoundCompletionInfo.of(
+            compacted_delta_locator=compacted_delta_locator,
+            high_watermark=high_watermark,
+            compacted_pyarrow_write_result=PyArrowWriteResult.of(
+                records_written, bytes_written, files_written, rows_dropped
+            ),
+            sort_keys_bit_width=sort_keys_bit_width,
+            hb_index_to_entry_range=hb_index_to_entry_range,
+            hash_bucket_count=hash_bucket_count,
+        )
     def test_get_task_options_sanity(self):
         opts = _get_task_options(0.01, 0.01)
         result_ref = valid_func.options(**opts).remote()
@@ -31,3 +146,160 @@ class TestTaskOptions(unittest.TestCase):
         result_ref = throwing_func.options(**opts).remote()
         self.assertRaises(ConnectionAbortedError, lambda: ray.get(result_ref))
+    def test_get_merge_task_options_memory_logs_enabled_sanity(self):
+        test_index = 0
+        test_hb_group_idx = 0
+        test_debug_memory_params = {"merge_task_index": test_index}
+        test_estimate_memory_params = self._make_estimate_resource_params()
+        test_ray_custom_resources = {}
+        test_rcf = self.make_round_completion_info()
+        test_manifest = self._make_manifest()
+        expected_task_opts = {
+            "max_retries": 3,
+            "memory": 1680.64,
+            "num_cpus": 0.01,
+            "scheduling_strategy": "SPREAD",
+        }
+        expected_previous_inflation = 1.0
+        expected_average_record_size = 1.0
+        with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+            # At least one log of level DEBUG must be emitted
+            actual_merge_tasks_opts = _get_merge_task_options(
+                index=test_index,
+                hb_group_idx=test_hb_group_idx,
+                data_size=1,
+                pk_size_bytes=1,
+                num_rows=1,
+                num_hash_groups=1,
+                total_memory_buffer_percentage=1,
+                incremental_index_array_size=1,
+                debug_memory_params=test_debug_memory_params,
+                ray_custom_resources=test_ray_custom_resources,
+                estimate_resources_params=test_estimate_memory_params,
+                round_completion_info=test_rcf,
+                compacted_delta_manifest=test_manifest,
+                memory_logs_enabled=True,
+            )
+            assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
+        log_message_round_completion_info = cm.records[0].getMessage()
+        log_message_debug_memory_params = cm.records[1].getMessage()
+        self.assertIn(
+            f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
+            log_message_round_completion_info,
+        )
+        self.assertIn(
+            f"[Merge task {test_index}]: Params used for calculating merge memory",
+            log_message_debug_memory_params,
+        )
+        self.assertIn(
+            f"'previous_inflation': {expected_previous_inflation}",
+            log_message_debug_memory_params,
+        )
+        self.assertIn(
+            f"'average_record_size': {expected_average_record_size}",
+            log_message_debug_memory_params,
+        )
+    def test_get_merge_task_options_memory_logs_enabled_fallback_previous_inflation_fallback_average_record_size(
+        self,
+    ):
+        test_index = 0
+        test_hb_group_idx = 0
+        test_debug_memory_params = {"merge_task_index": test_index}
+        test_estimate_memory_params = self._make_estimate_resource_params()
+        test_ray_custom_resources = {}
+        test_rcf = self.make_round_completion_info(
+            bytes_written=0, records_written=0, files_written=0, rows_dropped=0
+        )
+        test_manifest = self._make_manifest()
+        expected_task_opts = {
+            "max_retries": 3,
+            "memory": 1680.64,
+            "num_cpus": 0.01,
+            "scheduling_strategy": "SPREAD",
+        }
+        expected_previous_inflation = PYARROW_INFLATION_MULTIPLIER
+        expected_average_record_size = DEFAULT_AVERAGE_RECORD_SIZE_BYTES
+        with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+            # At least one log of level DEBUG must be emitted
+            actual_merge_tasks_opts = _get_merge_task_options(
+                index=test_index,
+                hb_group_idx=test_hb_group_idx,
+                data_size=1,
+                pk_size_bytes=1,
+                num_rows=1,
+                num_hash_groups=1,
+                total_memory_buffer_percentage=1,
+                incremental_index_array_size=1,
+                debug_memory_params=test_debug_memory_params,
+                ray_custom_resources=test_ray_custom_resources,
+                estimate_resources_params=test_estimate_memory_params,
+                round_completion_info=test_rcf,
+                compacted_delta_manifest=test_manifest,
+                memory_logs_enabled=True,
+            )
+            assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
+        log_message_round_completion_info = cm.records[0].getMessage()
+        log_message_debug_memory_params = cm.records[1].getMessage()
+        self.assertIn(
+            f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
+            log_message_round_completion_info,
+        )
+        self.assertIn(
+            f"[Merge task {test_index}]: Params used for calculating merge memory",
+            log_message_debug_memory_params,
+        )
+        self.assertIn(
+            f"'previous_inflation': {expected_previous_inflation}",
+            log_message_debug_memory_params,
+        )
+        self.assertIn(
+            f"'average_record_size': {expected_average_record_size}",
+            log_message_debug_memory_params,
+        )
+    def test_get_merge_task_options_memory_logs_enabled_not_using_previous_round_completion_info(
+        self,
+    ):
+        test_index = 0
+        test_hb_group_idx = 0
+        test_debug_memory_params = {"merge_task_index": test_index}
+        test_estimate_memory_params = self._make_estimate_resource_params()
+        test_ray_custom_resources = {}
+        test_rcf = None
+        test_manifest = self._make_manifest()
+        expected_task_opts = {
+            "max_retries": 3,
+            "memory": 1680.64,
+            "num_cpus": 0.01,
+            "scheduling_strategy": "SPREAD",
+        }
+        with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+            # At least one log of level DEBUG must be emitted
+            actual_merge_tasks_opts = _get_merge_task_options(
+                index=test_index,
+                hb_group_idx=test_hb_group_idx,
+                data_size=1,
+                pk_size_bytes=1,
+                num_rows=1,
+                num_hash_groups=1,
+                total_memory_buffer_percentage=1,
+                incremental_index_array_size=1,
+                debug_memory_params=test_debug_memory_params,
+                ray_custom_resources=test_ray_custom_resources,
+                estimate_resources_params=test_estimate_memory_params,
+                round_completion_info=test_rcf,
+                compacted_delta_manifest=test_manifest,
+                memory_logs_enabled=True,
+            )
+            assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
+        log_message_debug_memory_params = cm.records[0].getMessage()
+        self.assertIn(
+            f"[Merge task {test_index}]: Params used for calculating merge memory",
+            log_message_debug_memory_params,
+        )
+        self.assertNotIn(
+            "'average_record_size'",
+            log_message_debug_memory_params,
+        )

deltacat/tests/utils/test_pyarrow.py CHANGED Viewed

@@ -2,9 +2,12 @@ from unittest import TestCase
 from deltacat.utils.pyarrow import (
     s3_partial_parquet_file_to_table,
     pyarrow_read_csv,
+    ContentTypeValidationError,
     content_type_to_reader_kwargs,
     _add_column_kwargs,
+    logger,
     s3_file_to_table,
+    s3_file_to_parquet,
     ReadKwargsProviderPyArrowSchemaOverride,
     RAISE_ON_EMPTY_CSV_KWARG,
     RAISE_ON_DECIMAL_OVERFLOW,
@@ -435,7 +438,7 @@ class TestReadCSV(TestCase):
             pa.lib.ArrowInvalid,
             lambda: pyarrow_read_csv(
                 OVERFLOWING_DECIMAL_PRECISION_UTSV_PATH,
-                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
+                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True},
             ),
         )
@@ -479,7 +482,7 @@ class TestReadCSV(TestCase):
             pa.lib.ArrowInvalid,
             lambda: pyarrow_read_csv(
                 OVERFLOWING_DECIMAL_SCALE_UTSV_PATH,
-                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
+                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True},
             ),
         )
@@ -590,7 +593,7 @@ class TestReadCSV(TestCase):
             pa.lib.ArrowNotImplementedError,
             lambda: pyarrow_read_csv(
                 OVERFLOWING_DECIMAL_SCALE_UTSV_PATH,
-                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True}
+                **{**kwargs, RAISE_ON_DECIMAL_OVERFLOW: True},
             ),
         )
@@ -818,8 +821,11 @@ class TestS3FileToTable(TestCase):
         schema = pa.schema(
             [("is_active", pa.string()), ("ship_datetime_utc", pa.timestamp("us"))]
         )
         # OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG has no effect on uTSV files
+        pa_kwargs_provider = lambda content_type, kwargs: {
+            "reader_type": "pyarrow",
+            **kwargs,
+        }
         pa_kwargs_provider = lambda content_type, kwargs: {
             "reader_type": "pyarrow",
             OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
@@ -864,3 +870,99 @@ class TestS3FileToTable(TestCase):
         schema = result.schema
         schema_index = schema.get_field_index("n_legs")
         self.assertEqual(schema.field(schema_index).type, "int64")
+class TestS3FileToParquet(TestCase):
+    def test_s3_file_to_parquet_sanity(self):
+        test_s3_url = PARQUET_FILE_PATH
+        test_content_type = ContentType.PARQUET.value
+        test_content_encoding = ContentEncoding.IDENTITY.value
+        pa_kwargs_provider = lambda content_type, kwargs: {
+            "reader_type": "pyarrow",
+            **kwargs,
+        }
+        with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+            result_parquet_file: ParquetFile = s3_file_to_parquet(
+                test_s3_url,
+                test_content_type,
+                test_content_encoding,
+                ["n_legs", "animal"],
+                ["n_legs"],
+                pa_read_func_kwargs_provider=pa_kwargs_provider,
+            )
+        log_message_log_args = cm.records[0].getMessage()
+        log_message_presanitize_kwargs = cm.records[1].getMessage()
+        self.assertIn(
+            f"Reading {test_s3_url} to PyArrow ParquetFile. Content type: {test_content_type}. Encoding: {test_content_encoding}",
+            log_message_log_args,
+        )
+        self.assertIn("{'reader_type': 'pyarrow'}", log_message_presanitize_kwargs)
+        for index, field in enumerate(result_parquet_file.schema_arrow):
+            self.assertEqual(
+                field.name, result_parquet_file.schema_arrow.field(index).name
+            )
+        self.assertEqual(result_parquet_file.schema_arrow.field(0).type, "int64")
+    def test_s3_file_to_parquet_when_parquet_gzip_encoding_and_overridden_returns_success(
+        self,
+    ):
+        test_s3_url = PARQUET_FILE_PATH
+        test_content_type = ContentType.PARQUET.value
+        test_content_encoding = ContentEncoding.GZIP.value
+        pa_kwargs_provider = lambda content_type, kwargs: {
+            "reader_type": "pyarrow",
+            OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
+            **kwargs,
+        }
+        with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+            result_parquet_file: ParquetFile = s3_file_to_parquet(
+                test_s3_url,
+                test_content_type,
+                test_content_encoding,
+                ["n_legs", "animal"],
+                ["n_legs"],
+                pa_read_func_kwargs_provider=pa_kwargs_provider,
+            )
+        log_message_log_args = cm.records[0].getMessage()
+        log_message_log_new_content_encoding = cm.records[1].getMessage()
+        log_message_presanitize_kwargs = cm.records[2].getMessage()
+        self.assertIn(
+            f"Reading {test_s3_url} to PyArrow ParquetFile. Content type: {test_content_type}. Encoding: {test_content_encoding}",
+            log_message_log_args,
+        )
+        self.assertIn(
+            f"Overriding {test_s3_url} content encoding from {ContentEncoding.GZIP.value} to {ContentEncoding.IDENTITY.value}",
+            log_message_log_new_content_encoding,
+        )
+        self.assertIn("{'reader_type': 'pyarrow'}", log_message_presanitize_kwargs)
+        for index, field in enumerate(result_parquet_file.schema_arrow):
+            self.assertEqual(
+                field.name, result_parquet_file.schema_arrow.field(index).name
+            )
+        self.assertEqual(result_parquet_file.schema_arrow.field(0).type, "int64")
+    def test_s3_file_to_parquet_when_parquet_gzip_encoding_not_overridden_throws_error(
+        self,
+    ):
+        test_s3_url = PARQUET_FILE_PATH
+        test_content_type = ContentType.PARQUET.value
+        test_content_encoding = ContentEncoding.GZIP.value
+        pa_kwargs_provider = lambda content_type, kwargs: {
+            "reader_type": "pyarrow",
+            **kwargs,
+        }
+        with self.assertRaises(ContentTypeValidationError):
+            with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
+                s3_file_to_parquet(
+                    test_s3_url,
+                    test_content_type,
+                    test_content_encoding,
+                    ["n_legs", "animal"],
+                    ["n_legs"],
+                    pa_read_func_kwargs_provider=pa_kwargs_provider,
+                )
+        log_message_log_args = cm.records[0].getMessage()
+        self.assertIn(
+            f"Reading {test_s3_url} to PyArrow ParquetFile. Content type: {test_content_type}. Encoding: {test_content_encoding}",
+            log_message_log_args,
+        )

deltacat/utils/pyarrow.py CHANGED Viewed

@@ -617,7 +617,18 @@ def s3_file_to_parquet(
         f"Reading {s3_url} to PyArrow ParquetFile. "
         f"Content type: {content_type}. Encoding: {content_encoding}"
     )
+    kwargs = {}
+    if pa_read_func_kwargs_provider:
+        kwargs = pa_read_func_kwargs_provider(content_type, kwargs)
+    if OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG in kwargs:
+        new_content_encoding = kwargs.pop(OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG)
+        if content_type == ContentType.PARQUET.value:
+            logger.debug(
+                f"Overriding {s3_url} content encoding from {content_encoding} "
+                f"to {new_content_encoding}"
+            )
+            content_encoding = new_content_encoding
     if (
         content_type != ContentType.PARQUET.value
         or content_encoding != ContentEncoding.IDENTITY
@@ -630,15 +641,10 @@ def s3_file_to_parquet(
     if s3_client_kwargs is None:
         s3_client_kwargs = {}
-    kwargs = {}
     if s3_url.startswith("s3://"):
         s3_file_system = create_s3_file_system(s3_client_kwargs)
         kwargs["filesystem"] = s3_file_system
-    if pa_read_func_kwargs_provider:
-        kwargs = pa_read_func_kwargs_provider(content_type, kwargs)
     logger.debug(f"Pre-sanitize kwargs for {s3_url}: {kwargs}")
     kwargs = sanitize_kwargs_to_callable(ParquetFile.__init__, kwargs)

{deltacat-1.1.30.dist-info → deltacat-1.1.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deltacat
-Version: 1.1.30
+Version: 1.1.32
 Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
 Home-page: https://github.com/ray-project/deltacat
 Author: Ray Team

{deltacat-1.1.30.dist-info → deltacat-1.1.32.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-deltacat/__init__.py,sha256=tvf604BxhCSEXRkDh5BdZzFHPZmoSOElBRJJd34KNuo,1778
+deltacat/__init__.py,sha256=amNk91Zxauag8dm3s8SuUKinWdeAA2EaiWG9_SdboQE,1778
 deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
 deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
 deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -77,7 +77,7 @@ deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2
 deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
 deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
 deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
-deltacat/compute/compactor_v2/utils/task_options.py,sha256=W0jyWIIZ0tcSAGp8mhpnu1G8p3rmX4d3juCPpAJxnDM,12649
+deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
 deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
 deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
 deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -156,7 +156,7 @@ deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=y8nNHq9ADH
 deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
 deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
-deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
+deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
 deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
 deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
@@ -180,7 +180,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
 deltacat/tests/utils/test_daft.py,sha256=kY8lkXoQvyWunok8UvOsh1An297rb3jcnstTuIAyAlc,8232
 deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
 deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
-deltacat/tests/utils/test_pyarrow.py,sha256=JmhcuphXD8B2SLnOgrPgrqCcdHg_BL6IjFAiNRmuA1I,32790
+deltacat/tests/utils/test_pyarrow.py,sha256=tuh6HzQOuAHPFxK5Mhgjjdm76Z9Z72H3MZPcJ4RnZn8,37372
 deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
 deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
 deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -201,7 +201,7 @@ deltacat/utils/numpy.py,sha256=SpHKKvC-K8NINTWGVfTZ5-gBFTGYqaXjjgKFhsdUjwg,2049
 deltacat/utils/pandas.py,sha256=q99mlRB7tymICMcNbfGLfLqFu_C-feyPZKZm2CWJJVc,9574
 deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
 deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
-deltacat/utils/pyarrow.py,sha256=9Dggs8waJrbgP62NG4ssZsl-9fl3cJ4fjYLsJ1HjhHQ,34847
+deltacat/utils/pyarrow.py,sha256=MFCsHJKapqrhaaBeVAvwR2F1MglsNNhVZeCbk7YIdyI,35266
 deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
 deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
 deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
@@ -211,8 +211,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
 deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
 deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
 deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
-deltacat-1.1.30.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deltacat-1.1.30.dist-info/METADATA,sha256=rlPQCyZovCT28JZm694aOiYCH8SJ9R37yq_l_Yba0vg,1733
-deltacat-1.1.30.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-deltacat-1.1.30.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
-deltacat-1.1.30.dist-info/RECORD,,
+deltacat-1.1.32.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deltacat-1.1.32.dist-info/METADATA,sha256=KqU11gn6r8cnfoyKq4_C8widB7w_wdmfN_ikhHjSZfI,1733
+deltacat-1.1.32.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+deltacat-1.1.32.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
+deltacat-1.1.32.dist-info/RECORD,,

{deltacat-1.1.30.dist-info → deltacat-1.1.32.dist-info}/LICENSE RENAMED Viewed

File without changes

{deltacat-1.1.30.dist-info → deltacat-1.1.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{deltacat-1.1.30.dist-info → deltacat-1.1.32.dist-info}/top_level.txt RENAMED Viewed

File without changes

deltacat 1.1.30__py3-none-any.whl → 1.1.32__py3-none-any.whl

deltacat 1.1.30py3-none-any.whl → 1.1.32py3-none-any.whl