PyPI - deltacat - Versions diffs - 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl - Mend

deltacat 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

deltacat/__init__.py +1 -1
deltacat/aws/constants.py +21 -2
deltacat/aws/s3u.py +107 -33
deltacat/compute/compactor/model/round_completion_info.py +4 -0
deltacat/compute/compactor_v2/compaction_session.py +51 -25
deltacat/compute/compactor_v2/constants.py +12 -0
deltacat/compute/compactor_v2/model/compaction_session.py +21 -0
deltacat/compute/compactor_v2/steps/hash_bucket.py +6 -0
deltacat/compute/compactor_v2/steps/merge.py +6 -0
deltacat/compute/compactor_v2/utils/task_options.py +8 -2
deltacat/storage/interface.py +10 -3
deltacat/tests/aws/test_s3u.py +193 -0
deltacat/tests/catalog/test_default_catalog_impl.py +2 -0
deltacat/tests/compute/compact_partition_test_cases.py +61 -0
deltacat/tests/compute/compactor_v2/test_compaction_session.py +2 -0
deltacat/tests/compute/test_compact_partition_incremental.py +89 -32
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +21 -26
deltacat/tests/compute/test_util_create_table_deltas_repo.py +45 -2
deltacat/tests/local_deltacat_storage/__init__.py +38 -19
deltacat/tests/utils/ray_utils/__init__.py +0 -0
deltacat/tests/utils/ray_utils/test_concurrency.py +50 -0
deltacat/tests/utils/test_resources.py +28 -0
deltacat/utils/resources.py +45 -0
{deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/METADATA +1 -1
{deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/RECORD +28 -25
{deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/LICENSE +0 -0
{deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/WHEEL +0 -0
{deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/top_level.txt +0 -0

deltacat/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
 deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
-__version__ = "1.1.5"
+__version__ = "1.1.7"
 __all__ = [

deltacat/aws/constants.py CHANGED Viewed

@@ -1,8 +1,27 @@
-from typing import List
+import botocore
+from typing import Set
 from deltacat.utils.common import env_integer, env_string
 DAFT_MAX_S3_CONNECTIONS_PER_FILE = env_integer("DAFT_MAX_S3_CONNECTIONS_PER_FILE", 8)
 BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 5)
-TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
+BOTO_TIMEOUT_ERROR_CODES: Set[str] = {"ReadTimeoutError", "ConnectTimeoutError"}
+BOTO_THROTTLING_ERROR_CODES: Set[str] = {"Throttling", "SlowDown"}
+RETRYABLE_TRANSIENT_ERRORS = (
+    OSError,
+    botocore.exceptions.ConnectionError,
+    botocore.exceptions.HTTPClientError,
+    botocore.exceptions.NoCredentialsError,
+    botocore.exceptions.ConnectTimeoutError,
+    botocore.exceptions.ReadTimeoutError,
+)
 AWS_REGION = env_string("AWS_REGION", "us-east-1")
+UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY = env_integer(
+    "UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 10 * 60
+)
+UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY = env_integer(
+    "UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY", 30 * 60
+)
+DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY = env_integer(
+    "DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY", 30 * 60
+)

deltacat/aws/s3u.py CHANGED Viewed

@@ -4,7 +4,15 @@ from functools import partial
 from typing import Any, Callable, Dict, Generator, List, Optional, Union
 from uuid import uuid4
 from botocore.config import Config
-from deltacat.aws.constants import BOTO_MAX_RETRIES
+from deltacat.aws.constants import (
+    BOTO_MAX_RETRIES,
+    UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY,
+    BOTO_THROTTLING_ERROR_CODES,
+    RETRYABLE_TRANSIENT_ERRORS,
+    BOTO_TIMEOUT_ERROR_CODES,
+    UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY,
+    DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY,
+)
 import pyarrow as pa
 import ray
@@ -25,9 +33,6 @@ from tenacity import (
 from deltacat.utils.ray_utils.concurrency import invoke_parallel
 import deltacat.aws.clients as aws_utils
 from deltacat import logs
-from deltacat.aws.constants import (
-    TIMEOUT_ERROR_CODES,
-)
 from deltacat.exceptions import NonRetryableError, RetryableError
 from deltacat.storage import (
     DistributedDataset,
@@ -253,10 +258,21 @@ def read_file(
         )
         return table
     except ClientError as e:
-        if e.response["Error"]["Code"] in TIMEOUT_ERROR_CODES:
+        if (
+            e.response["Error"]["Code"]
+            in BOTO_TIMEOUT_ERROR_CODES | BOTO_THROTTLING_ERROR_CODES
+        ):
             # Timeout error not caught by botocore
-            raise RetryableError(f"Retry table download from: {s3_url}") from e
-        raise NonRetryableError(f"Failed table download from: {s3_url}") from e
+            raise RetryableError(
+                f"Retry table download from: {s3_url} after receiving {type(e).__name__}"
+            ) from e
+        raise NonRetryableError(
+            f"Failed table download from: {s3_url} after receiving {type(e).__name__}"
+        ) from e
+    except RETRYABLE_TRANSIENT_ERRORS as e:
+        raise RetryableError(
+            f"Retry upload for: {s3_url} after receiving {type(e).__name__}"
+        ) from e
     except BaseException as e:
         logger.warn(
             f"Read has failed for {s3_url} and content_type={content_type} "
@@ -281,7 +297,7 @@ def upload_sliced_table(
     # @retry decorator can't be pickled by Ray, so wrap upload in Retrying
     retrying = Retrying(
         wait=wait_random_exponential(multiplier=1, max=60),
-        stop=stop_after_delay(30 * 60),
+        stop=stop_after_delay(UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY),
         retry=retry_if_exception_type(RetryableError),
     )
@@ -315,7 +331,6 @@ def upload_sliced_table(
                 **s3_client_kwargs,
             )
             manifest_entries.extend(slice_entries)
     return manifest_entries
@@ -363,8 +378,23 @@ def upload_table(
         except ClientError as e:
             if e.response["Error"]["Code"] == "NoSuchKey":
                 # s3fs may swallow S3 errors - we were probably throttled
-                raise RetryableError(f"Retry table upload to: {s3_url}") from e
-            raise NonRetryableError(f"Failed table upload to: {s3_url}") from e
+                raise RetryableError(
+                    f"Retry table download from: {s3_url} after receiving {type(e).__name__}"
+                ) from e
+            if (
+                e.response["Error"]["Code"]
+                in BOTO_TIMEOUT_ERROR_CODES | BOTO_THROTTLING_ERROR_CODES
+            ):
+                raise RetryableError(
+                    f"Retry table download from: {s3_url} after receiving {type(e).__name__}"
+                ) from e
+            raise NonRetryableError(
+                f"Failed table upload to: {s3_url} after receiving {type(e).__name__}"
+            ) from e
+        except RETRYABLE_TRANSIENT_ERRORS as e:
+            raise RetryableError(
+                f"Retry upload for: {s3_url} after receiving {type(e).__name__}"
+            ) from e
         except BaseException as e:
             logger.warn(
                 f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
@@ -412,7 +442,7 @@ def download_manifest_entry(
     # @retry decorator can't be pickled by Ray, so wrap download in Retrying
     retrying = Retrying(
         wait=wait_random_exponential(multiplier=1, max=60),
-        stop=stop_after_delay(30 * 60),
+        stop=stop_after_delay(DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY),
         retry=retry_if_not_exception_type(NonRetryableError),
     )
     table = retrying(
@@ -504,41 +534,85 @@ def download_manifest_entries_distributed(
 def upload(s3_url: str, body, **s3_client_kwargs) -> Dict[str, Any]:
-    # TODO (pdames): add tenacity retrying
     parsed_s3_url = parse_s3_url(s3_url)
     s3 = s3_client_cache(None, **s3_client_kwargs)
-    return s3.put_object(
-        Body=body,
-        Bucket=parsed_s3_url.bucket,
-        Key=parsed_s3_url.key,
+    retrying = Retrying(
+        wait=wait_random_exponential(multiplier=1, max=15),
+        stop=stop_after_delay(UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY),
+        retry=retry_if_exception_type(RetryableError),
+    )
+    return retrying(
+        _put_object,
+        s3,
+        body,
+        parsed_s3_url.bucket,
+        parsed_s3_url.key,
     )
+def _put_object(
+    s3_client, body: Any, bucket: str, key: str, **s3_put_object_kwargs
+) -> Dict[str, Any]:
+    try:
+        return s3_client.put_object(
+            Body=body, Bucket=bucket, Key=key, **s3_put_object_kwargs
+        )
+    except ClientError as e:
+        if e.response["Error"]["Code"] in BOTO_THROTTLING_ERROR_CODES:
+            raise RetryableError(
+                f"Retry upload for: {bucket}/{key} after receiving {e.response['Error']['Code']}"
+            ) from e
+        raise NonRetryableError(f"Failed table upload to: {bucket}/{key}") from e
+    except RETRYABLE_TRANSIENT_ERRORS as e:
+        raise RetryableError(
+            f"Retry upload for: {bucket}/{key} after receiving {type(e).__name__}"
+        ) from e
+    except BaseException as e:
+        logger.error(
+            f"Upload has failed for {bucket}/{key}. Error: {type(e).__name__}",
+            exc_info=True,
+        )
+        raise NonRetryableError(f"Failed table upload to: {bucket}/{key}") from e
 def download(
     s3_url: str, fail_if_not_found: bool = True, **s3_client_kwargs
 ) -> Optional[Dict[str, Any]]:
-    # TODO (pdames): add tenacity retrying
     parsed_s3_url = parse_s3_url(s3_url)
     s3 = s3_client_cache(None, **s3_client_kwargs)
+    retrying = Retrying(
+        wait=wait_random_exponential(multiplier=1, max=15),
+        stop=stop_after_delay(UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY),
+        retry=retry_if_exception_type(RetryableError),
+    )
+    return retrying(
+        _get_object,
+        s3,
+        parsed_s3_url.bucket,
+        parsed_s3_url.key,
+        fail_if_not_found=fail_if_not_found,
+    )
+def _get_object(s3_client, bucket: str, key: str, fail_if_not_found: bool = True):
     try:
-        return s3.get_object(
-            Bucket=parsed_s3_url.bucket,
-            Key=parsed_s3_url.key,
+        return s3_client.get_object(
+            Bucket=bucket,
+            Key=key,
         )
     except ClientError as e:
-        if fail_if_not_found:
-            raise
-        else:
-            if e.response["Error"]["Code"] != "404":
-                if e.response["Error"]["Code"] != "NoSuchKey":
-                    raise
-            logger.info(f"file not found: {s3_url}")
-    except s3.exceptions.NoSuchKey:
-        if fail_if_not_found:
-            raise
-        else:
-            logger.info(f"file not found: {s3_url}")
+        if e.response["Error"]["Code"] == "NoSuchKey":
+            if fail_if_not_found:
+                raise NonRetryableError(
+                    f"Failed get object from: {bucket}/{key}"
+                ) from e
+            logger.info(f"file not found: {bucket}/{key}")
+    except RETRYABLE_TRANSIENT_ERRORS as e:
+        raise RetryableError(
+            f"Retry get object: {bucket}/{key} after receiving {type(e).__name__}"
+        ) from e
     return None

deltacat/compute/compactor/model/round_completion_info.py CHANGED Viewed

@@ -128,3 +128,7 @@ class RoundCompletionInfo(dict):
     @property
     def input_average_record_size_bytes(self) -> Optional[float]:
         return self.get("inputAverageRecordSizeBytes")
+    @staticmethod
+    def get_audit_bucket_name_and_key(compaction_audit_url: str) -> Tuple[str, str]:
+        return compaction_audit_url.replace("s3://", "").split("/", 1)

deltacat/compute/compactor_v2/compaction_session.py CHANGED Viewed

@@ -24,6 +24,9 @@ from deltacat.compute.compactor import (
 )
 from deltacat.compute.compactor_v2.model.merge_result import MergeResult
 from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
+from deltacat.compute.compactor_v2.model.compaction_session import (
+    ExecutionCompactionResult,
+)
 from deltacat.compute.compactor.model.materialize_result import MaterializeResult
 from deltacat.compute.compactor_v2.utils.merge import (
     generate_local_merge_input,
@@ -41,8 +44,11 @@ from deltacat.compute.compactor_v2.deletes.utils import prepare_deletes
 from deltacat.storage import (
     Delta,
     DeltaLocator,
+    DeltaType,
     Manifest,
     Partition,
+    Stream,
+    StreamLocator,
 )
 from deltacat.compute.compactor.model.compact_partition_params import (
     CompactPartitionParams,
@@ -57,7 +63,7 @@ from deltacat.compute.compactor_v2.utils import io
 from deltacat.compute.compactor.utils import round_completion_file as rcf
 from deltacat.utils.metrics import metrics
-from typing import List, Optional, Tuple
+from typing import List, Optional
 from collections import defaultdict
 from deltacat.compute.compactor.model.compaction_session_audit_info import (
     CompactionSessionAuditInfo,
@@ -81,35 +87,52 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
 @metrics
 def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
     assert (
         params.hash_bucket_count is not None and params.hash_bucket_count >= 1
     ), "hash_bucket_count is a required arg for compactor v2"
     with memray.Tracker(
-        f"compaction_partition.bin"
+        "compaction_partition.bin"
     ) if params.enable_profiler else nullcontext():
-        (new_partition, new_rci, new_rcf_partition_locator,) = _execute_compaction(
+        execute_compaction_result: ExecutionCompactionResult = _execute_compaction(
             params,
             **kwargs,
         )
+        compaction_session_type: str = (
+            "INPLACE"
+            if execute_compaction_result.is_inplace_compacted
+            else "NON-INPLACE"
+        )
         logger.info(
             f"Partition-{params.source_partition_locator} -> "
-            f"Compaction session data processing completed"
+            f"{compaction_session_type} Compaction session data processing completed"
         )
-        round_completion_file_s3_url = None
-        if new_partition:
-            logger.info(f"Committing compacted partition to: {new_partition.locator}")
-            partition: Partition = params.deltacat_storage.commit_partition(
-                new_partition, **params.deltacat_storage_kwargs
+        round_completion_file_s3_url: Optional[str] = None
+        if execute_compaction_result.new_compacted_partition:
+            previous_partition: Optional[Partition] = None
+            if execute_compaction_result.is_inplace_compacted:
+                previous_partition: Optional[
+                    Partition
+                ] = params.deltacat_storage.get_partition(
+                    params.source_partition_locator.stream_locator,
+                    params.source_partition_locator.partition_values,
+                    **params.deltacat_storage_kwargs,
+                )
+                # NOTE: Retrieving the previous partition again as the partition_id may have changed by the time commit_partition is called.
+            logger.info(
+                f"Committing compacted partition to: {execute_compaction_result.new_compacted_partition.locator} "
+                f"using previous partition: {previous_partition.locator if previous_partition else None}"
             )
-            logger.info(f"Committed compacted partition: {partition}")
+            commited_partition: Partition = params.deltacat_storage.commit_partition(
+                execute_compaction_result.new_compacted_partition,
+                previous_partition,
+                **params.deltacat_storage_kwargs,
+            )
+            logger.info(f"Committed compacted partition: {commited_partition}")
             round_completion_file_s3_url = rcf.write_round_completion_file(
                 params.compaction_artifact_s3_bucket,
-                new_rcf_partition_locator,
-                new_rci,
+                execute_compaction_result.new_round_completion_file_partition_locator,
+                execute_compaction_result.new_round_completion_info,
                 **params.s3_client_kwargs,
             )
         else:
@@ -123,7 +146,7 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
 def _execute_compaction(
     params: CompactPartitionParams, **kwargs
-) -> Tuple[Optional[Partition], Optional[RoundCompletionInfo], Optional[str]]:
+) -> ExecutionCompactionResult:
     rcf_source_partition_locator = (
         params.rebase_source_partition_locator or params.source_partition_locator
@@ -142,7 +165,7 @@ def _execute_compaction(
     compaction_start = time.monotonic()
-    task_max_parallelism = params.task_max_parallelism
+    task_max_parallelism: int = params.task_max_parallelism
     if params.pg_config:
         logger.info(
@@ -205,7 +228,7 @@ def _execute_compaction(
     )
     if not input_deltas:
         logger.info("No input deltas found to compact.")
-        return None, None, None
+        return ExecutionCompactionResult(None, None, None, False)
     delete_strategy: Optional[DeleteStrategy] = None
     delete_file_envelopes: Optional[List[DeleteFileEnvelope]] = None
@@ -217,7 +240,7 @@ def _execute_compaction(
         for delete_file_envelope in delete_file_envelopes:
             delete_file_size_bytes += delete_file_envelope.table_size_bytes
         logger.info(
-            f" Input deltas contain DELETE-type deltas. Total delete file size={delete_file_size_bytes}."
+            f" Input deltas contain {DeltaType.DELETE}-type deltas. Total delete file size={delete_file_size_bytes}."
             f" Total length of delete file envelopes={len(delete_file_envelopes)}"
         )
     uniform_deltas: List[DeltaAnnotated] = io.create_uniform_input_deltas(
@@ -247,14 +270,16 @@ def _execute_compaction(
     )
     # create a new stream for this round
-    compacted_stream_locator = params.destination_partition_locator.stream_locator
-    compacted_stream = params.deltacat_storage.get_stream(
+    compacted_stream_locator: Optional[
+        StreamLocator
+    ] = params.destination_partition_locator.stream_locator
+    compacted_stream: Stream = params.deltacat_storage.get_stream(
         compacted_stream_locator.namespace,
         compacted_stream_locator.table_name,
         compacted_stream_locator.table_version,
         **params.deltacat_storage_kwargs,
     )
-    compacted_partition = params.deltacat_storage.stage_partition(
+    compacted_partition: Partition = params.deltacat_storage.stage_partition(
         compacted_stream,
         params.destination_partition_locator.partition_values,
         **params.deltacat_storage_kwargs,
@@ -532,7 +557,7 @@ def _execute_compaction(
     # Note: An appropriate last stream position must be set
     # to avoid correctness issue.
-    merged_delta = Delta.merge_deltas(
+    merged_delta: Delta = Delta.merge_deltas(
         deltas,
         stream_position=params.last_stream_position_to_compact,
     )
@@ -545,7 +570,7 @@ def _execute_compaction(
     )
     logger.info(record_info_msg)
-    compacted_delta = params.deltacat_storage.commit_delta(
+    compacted_delta: Delta = params.deltacat_storage.commit_delta(
         merged_delta,
         properties=kwargs.get("properties", {}),
         **params.deltacat_storage_kwargs,
@@ -653,8 +678,9 @@ def _execute_compaction(
             f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
         )
         rcf_source_partition_locator = compacted_partition.locator
-    return (
+    return ExecutionCompactionResult(
         compacted_partition,
         new_round_completion_info,
         rcf_source_partition_locator,
+        is_inplace_compacted,
     )

deltacat/compute/compactor_v2/constants.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from deltacat.utils.common import env_integer
 TOTAL_BYTES_IN_SHA1_HASH = 20
 PK_DELIMITER = "L6kl7u5f"
@@ -41,6 +43,16 @@ DROP_DUPLICATES = True
 # size in metadata to pyarrow table size.
 PARQUET_TO_PYARROW_INFLATION = 4
+# A merge task will fail after this timeout
+# The default is currently double the observed maximum.
+# This timeout depends on total data processed per task.
+MERGE_TASK_TIMEOUT_IN_SECONDS = env_integer("MERGE_TASK_TIMEOUT_IN_SECONDS", 25 * 60)
+# A hash bucket task will fail after this timeout
+HASH_BUCKET_TASK_TIMEOUT_IN_SECONDS = env_integer(
+    "HASH_BUCKET_TASK_TIMEOUT_IN_SECONDS", 25 * 60
+)
 # Metric Names
 # Time taken for a hash bucket task
 HASH_BUCKET_TIME_IN_SECONDS = "hash_bucket_time"

deltacat/compute/compactor_v2/model/compaction_session.py ADDED Viewed

@@ -0,0 +1,21 @@
+from dataclasses import dataclass, fields
+from deltacat.storage import (
+    Partition,
+    PartitionLocator,
+)
+from deltacat.compute.compactor import (
+    RoundCompletionInfo,
+)
+from typing import Optional
+@dataclass(frozen=True)
+class ExecutionCompactionResult:
+    new_compacted_partition: Optional[Partition]
+    new_round_completion_info: Optional[RoundCompletionInfo]
+    new_round_completion_file_partition_locator: Optional[PartitionLocator]
+    is_inplace_compacted: bool
+    def __iter__(self):
+        return (getattr(self, field.name) for field in fields(self))

deltacat/compute/compactor_v2/steps/hash_bucket.py CHANGED Viewed

@@ -29,12 +29,14 @@ from deltacat.utils.metrics import emit_timer_metrics, failure_metric, success_m
 from deltacat.utils.resources import (
     get_current_process_peak_memory_usage_in_bytes,
     ProcessUtilizationOverTimeRange,
+    timeout,
 )
 from deltacat.constants import BYTES_PER_GIBIBYTE
 from deltacat.compute.compactor_v2.constants import (
     HASH_BUCKET_TIME_IN_SECONDS,
     HASH_BUCKET_FAILURE_COUNT,
     HASH_BUCKET_SUCCESS_COUNT,
+    HASH_BUCKET_TASK_TIMEOUT_IN_SECONDS,
 )
 if importlib.util.find_spec("memray"):
@@ -96,8 +98,12 @@ def _group_file_records_by_pk_hash_bucket(
     return hb_to_delta_file_envelopes, total_record_count, total_size_bytes
+# TODO: use timeout parameter in ray.remote
+# https://github.com/ray-project/ray/issues/18916
+# Note: order of decorators is important
 @success_metric(name=HASH_BUCKET_SUCCESS_COUNT)
 @failure_metric(name=HASH_BUCKET_FAILURE_COUNT)
+@timeout(HASH_BUCKET_TASK_TIMEOUT_IN_SECONDS)
 def _timed_hash_bucket(input: HashBucketInput):
     task_id = get_current_ray_task_id()
     worker_id = get_current_ray_worker_id()

deltacat/compute/compactor_v2/steps/merge.py CHANGED Viewed

@@ -28,6 +28,7 @@ from deltacat.utils.metrics import emit_timer_metrics, failure_metric, success_m
 from deltacat.utils.resources import (
     get_current_process_peak_memory_usage_in_bytes,
     ProcessUtilizationOverTimeRange,
+    timeout,
 )
 from deltacat.compute.compactor_v2.utils.primary_key_index import (
     generate_pk_hash_column,
@@ -46,6 +47,7 @@ from deltacat.compute.compactor_v2.constants import (
     MERGE_TIME_IN_SECONDS,
     MERGE_SUCCESS_COUNT,
     MERGE_FAILURE_COUNT,
+    MERGE_TASK_TIMEOUT_IN_SECONDS,
 )
@@ -484,8 +486,12 @@ def _copy_manifests_from_hash_bucketing(
     return materialized_results
+# TODO: use timeout parameter in ray.remote
+# https://github.com/ray-project/ray/issues/18916
+# Note: order of decorators is important
 @success_metric(name=MERGE_SUCCESS_COUNT)
 @failure_metric(name=MERGE_FAILURE_COUNT)
+@timeout(MERGE_TASK_TIMEOUT_IN_SECONDS)
 def _timed_merge(input: MergeInput) -> MergeResult:
     task_id = get_current_ray_task_id()
     worker_id = get_current_ray_worker_id()

deltacat/compute/compactor_v2/utils/task_options.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import botocore
 import logging
+import tenacity
 from typing import Dict, Optional, List, Tuple, Any
 from deltacat import logs
 from deltacat.compute.compactor_v2.model.merge_file_group import (
@@ -20,7 +21,6 @@ from deltacat.compute.compactor_v2.utils.primary_key_index import (
 from deltacat.compute.compactor_v2.constants import (
     PARQUET_TO_PYARROW_INFLATION,
 )
 from daft.exceptions import DaftTransientError
@@ -65,7 +65,12 @@ def get_task_options(
     cpu: float, memory: float, ray_custom_resources: Optional[Dict] = None
 ) -> Dict:
-    task_opts = {"num_cpus": cpu, "memory": memory}
+    # NOTE: With DEFAULT scheduling strategy in Ray 2.20.0, autoscaler does
+    # not spin up enough nodes fast and hence we see only approximately
+    # 20 tasks get scheduled out of 100 tasks in queue. Hence, we use SPREAD
+    # which is also ideal for merge and hash bucket tasks.
+    # https://docs.ray.io/en/latest/ray-core/scheduling/index.html
+    task_opts = {"num_cpus": cpu, "memory": memory, "scheduling_strategy": "SPREAD"}
     if ray_custom_resources:
         task_opts["resources"] = ray_custom_resources
@@ -80,6 +85,7 @@ def get_task_options(
         ConnectionError,
         TimeoutError,
         DaftTransientError,
+        tenacity.RetryError,
     ]
     return task_opts

deltacat/storage/interface.py CHANGED Viewed

@@ -414,11 +414,18 @@ def stage_partition(
     raise NotImplementedError("stage_partition not implemented")
-def commit_partition(partition: Partition, *args, **kwargs) -> Partition:
+def commit_partition(
+    partition: Partition,
+    previous_partition: Optional[Partition] = None,
+    *args,
+    **kwargs
+) -> Partition:
     """
     Commits the given partition to its associated table version stream,
-    replacing any previous partition registered for the same stream and
-    partition values. Returns the registered partition. If the partition's
+    replacing any previous partition (i.e., "partition being replaced") registered for the same stream and
+    partition values.
+    If the previous_partition is passed as an argument, the specified previous_partition will be the partition being replaced, otherwise it will be retrieved.
+    Returns the registered partition. If the partition's
     previous delta stream position is specified, then the commit will
     be rejected if it does not match the actual previous stream position of
     the partition being replaced. If the partition's previous partition ID is

deltacat 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

deltacat 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl