PyPI - deltacat - Versions diffs - 1.1.9__py3-none-any.whl → 1.1.10__py3-none-any.whl - Mend

deltacat 1.1.9py3-none-any.whl → 1.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

deltacat/__init__.py +1 -1
deltacat/aws/redshift/model/manifest.py +16 -0
deltacat/aws/s3u.py +19 -13
deltacat/compute/compactor/compaction_session.py +5 -1
deltacat/compute/compactor/repartition_session.py +1 -0
deltacat/compute/compactor/utils/round_completion_file.py +39 -9
deltacat/compute/compactor_v2/compaction_session.py +15 -11
deltacat/compute/compactor_v2/constants.py +3 -0
deltacat/compute/compactor_v2/model/{compaction_session.py → evaluate_compaction_result.py} +1 -2
deltacat/io/dataset.py +5 -17
deltacat/storage/__init__.py +24 -0
deltacat/storage/interface.py +42 -6
deltacat/storage/model/delta.py +23 -3
deltacat/storage/model/partition.py +6 -7
deltacat/storage/model/partition_spec.py +71 -0
deltacat/storage/model/stream.py +38 -1
deltacat/storage/model/transform.py +127 -0
deltacat/tests/aws/test_s3u.py +2 -0
deltacat/tests/compute/compactor/utils/test_round_completion_file.py +209 -0
deltacat/tests/compute/compactor_v2/test_compaction_session.py +201 -36
deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
deltacat/tests/compute/test_util_common.py +19 -4
deltacat/tests/local_deltacat_storage/__init__.py +83 -19
deltacat/tests/test_utils/pyarrow.py +4 -1
deltacat/tests/utils/ray_utils/test_dataset.py +66 -0
deltacat/utils/numpy.py +3 -3
deltacat/utils/pandas.py +3 -3
deltacat/utils/pyarrow.py +3 -3
deltacat/utils/ray_utils/dataset.py +7 -7
{deltacat-1.1.9.dist-info → deltacat-1.1.10.dist-info}/METADATA +5 -4
{deltacat-1.1.9.dist-info → deltacat-1.1.10.dist-info}/RECORD +34 -31
deltacat/io/aws/redshift/redshift_datasource.py +0 -578
{deltacat-1.1.9.dist-info → deltacat-1.1.10.dist-info}/LICENSE +0 -0
{deltacat-1.1.9.dist-info → deltacat-1.1.10.dist-info}/WHEEL +0 -0
{deltacat-1.1.9.dist-info → deltacat-1.1.10.dist-info}/top_level.txt +0 -0

deltacat/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
 deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
-__version__ = "1.1.9"
+__version__ = "1.1.10"
 __all__ = [

deltacat/aws/redshift/model/manifest.py CHANGED Viewed

@@ -99,6 +99,8 @@ class Manifest(dict):
         total_source_content_length = 0
         content_type = None
         content_encoding = None
+        partition_values_set = set()
+        partition_values = None
         if entries:
             content_type = entries[0].meta.content_type
             content_encoding = entries[0].meta.content_encoding
@@ -127,6 +129,12 @@ class Manifest(dict):
                 total_record_count += meta.record_count or 0
                 total_content_length += meta.content_length or 0
                 total_source_content_length += meta.source_content_length or 0
+                if len(partition_values_set) <= 1:
+                    partition_values_set.add(entry.meta.partition_values)
+        if len(partition_values_set) == 1:
+            partition_values = partition_values_set.pop()
         meta = ManifestMeta.of(
             total_record_count,
             total_content_length,
@@ -134,6 +142,7 @@ class Manifest(dict):
             content_encoding,
             total_source_content_length,
             entry_type=entry_type,
+            partition_values=partition_values,
         )
         manifest = Manifest._build_manifest(meta, entries, author, uuid, entry_type)
         return manifest
@@ -185,6 +194,7 @@ class ManifestMeta(dict):
         credentials: Optional[Dict[str, str]] = None,
         content_type_parameters: Optional[List[Dict[str, str]]] = None,
         entry_type: Optional[EntryType] = None,
+        partition_values: Optional[List[str]] = None,
     ) -> ManifestMeta:
         manifest_meta = ManifestMeta()
         if record_count is not None:
@@ -203,6 +213,8 @@ class ManifestMeta(dict):
             manifest_meta["credentials"] = credentials
         if entry_type is not None:
             manifest_meta["entry_type"] = entry_type.value
+        if partition_values is not None:
+            manifest_meta["partition_values"] = partition_values
         return manifest_meta
     @property
@@ -244,6 +256,10 @@ class ManifestMeta(dict):
             return EntryType(self["entry_type"])
         return val
+    @property
+    def partition_values(self) -> Optional[List[str]]:
+        return self.get("partition_values")
 class ManifestAuthor(dict):
     @staticmethod

deltacat/aws/s3u.py CHANGED Viewed

@@ -21,7 +21,7 @@ from boto3.resources.base import ServiceResource
 from botocore.client import BaseClient
 from botocore.exceptions import ClientError
 from ray.data.block import Block, BlockAccessor, BlockMetadata
-from ray.data.datasource import BlockWritePathProvider
+from ray.data.datasource import FilenameProvider
 from ray.types import ObjectRef
 from tenacity import (
     Retrying,
@@ -70,9 +70,6 @@ from deltacat.exceptions import categorize_errors
 logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
-# TODO(raghumdani): refactor redshift datasource to reuse the
-# same module for writing output files.
 class CapturedBlockWritePaths:
     def __init__(self):
@@ -100,12 +97,15 @@ class CapturedBlockWritePaths:
         return self._block_refs
-class UuidBlockWritePathProvider(BlockWritePathProvider):
+class UuidBlockWritePathProvider(FilenameProvider):
     """Block write path provider implementation that writes each
     dataset block out to a file of the form: {base_path}/{uuid}
     """
-    def __init__(self, capture_object: CapturedBlockWritePaths):
+    def __init__(
+        self, capture_object: CapturedBlockWritePaths, base_path: Optional[str] = None
+    ):
+        self.base_path = base_path
         self.write_paths: List[str] = []
         self.block_refs: List[ObjectRef[Block]] = []
         self.capture_object = capture_object
@@ -117,6 +117,19 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
                 self.block_refs,
             )
+    def get_filename_for_block(
+        self, block: Any, task_index: int, block_index: int
+    ) -> str:
+        if self.base_path is None:
+            raise ValueError(
+                "Base path must be provided to UuidBlockWritePathProvider",
+            )
+        return self._get_write_path_for_block(
+            base_path=self.base_path,
+            block=block,
+            block_index=block_index,
+        )
     def _get_write_path_for_block(
         self,
         base_path: str,
@@ -143,13 +156,6 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
         block_index: Optional[int] = None,
         file_format: Optional[str] = None,
     ) -> str:
-        """
-        TODO: BlockWritePathProvider is deprecated as of Ray version 2.20.0. Please use FilenameProvider.
-        See: https://docs.ray.io/en/master/data/api/doc/ray.data.datasource.FilenameProvider.html
-        Also See: https://github.com/ray-project/deltacat/issues/299
-        Hence, this class only works with Ray version 2.20.0 or lower when used in Ray Dataset.
-        """
         return self._get_write_path_for_block(
             base_path,
             filesystem=filesystem,

deltacat/compute/compactor/compaction_session.py CHANGED Viewed

@@ -193,6 +193,7 @@ def compact_partition(
             round_completion_file_s3_url = rcf.write_round_completion_file(
                 compaction_artifact_s3_bucket,
                 new_rcf_partition_locator,
+                partition.locator,
                 new_rci,
                 **s3_client_kwargs,
             )
@@ -312,7 +313,10 @@ def _execute_compaction_round(
     round_completion_info = None
     if not rebase_source_partition_locator:
         round_completion_info = rcf.read_round_completion_file(
-            compaction_artifact_s3_bucket, source_partition_locator, **s3_client_kwargs
+            compaction_artifact_s3_bucket,
+            source_partition_locator,
+            destination_partition_locator,
+            **s3_client_kwargs,
         )
         if not round_completion_info:
             logger.info(

deltacat/compute/compactor/repartition_session.py CHANGED Viewed

@@ -177,6 +177,7 @@ def repartition(
         s3_client_kwargs = {}
     return rcf.write_round_completion_file(
+        None,
         None,
         None,
         repartition_completion_info,

deltacat/compute/compactor/utils/round_completion_file.py CHANGED Viewed

@@ -12,10 +12,17 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
 def get_round_completion_file_s3_url(
-    bucket: str, source_partition_locator: PartitionLocator
+    bucket: str,
+    source_partition_locator: PartitionLocator,
+    destination_partition_locator: Optional[PartitionLocator] = None,
 ) -> str:
     base_url = source_partition_locator.path(f"s3://{bucket}")
+    if destination_partition_locator:
+        base_url = destination_partition_locator.path(
+            f"s3://{bucket}/{source_partition_locator.hexdigest()}"
+        )
     return f"{base_url}.json"
@@ -23,20 +30,41 @@ def get_round_completion_file_s3_url(
 def read_round_completion_file(
     bucket: str,
     source_partition_locator: PartitionLocator,
+    destination_partition_locator: Optional[PartitionLocator] = None,
     **s3_client_kwargs: Optional[Dict[str, Any]],
 ) -> RoundCompletionInfo:
-    round_completion_file_url = get_round_completion_file_s3_url(
+    all_uris = []
+    if destination_partition_locator:
+        round_completion_file_url_with_destination = get_round_completion_file_s3_url(
+            bucket,
+            source_partition_locator,
+            destination_partition_locator,
+        )
+        all_uris.append(round_completion_file_url_with_destination)
+    # Note: we read from RCF at two different URI for backward
+    # compatibility reasons.
+    round_completion_file_url_prev = get_round_completion_file_s3_url(
         bucket,
         source_partition_locator,
     )
-    logger.info(f"reading round completion file from: {round_completion_file_url}")
+    all_uris.append(round_completion_file_url_prev)
     round_completion_info = None
-    result = s3_utils.download(round_completion_file_url, False, **s3_client_kwargs)
-    if result:
-        json_str = result["Body"].read().decode("utf-8")
-        round_completion_info = RoundCompletionInfo(json.loads(json_str))
-        logger.info(f"read round completion info: {round_completion_info}")
+    for rcf_uri in all_uris:
+        logger.info(f"Reading round completion file from: {rcf_uri}")
+        result = s3_utils.download(rcf_uri, False, **s3_client_kwargs)
+        if result:
+            json_str = result["Body"].read().decode("utf-8")
+            round_completion_info = RoundCompletionInfo(json.loads(json_str))
+            logger.info(f"Read round completion info: {round_completion_info}")
+            break
+        else:
+            logger.warn(f"Round completion file not present at {rcf_uri}")
     return round_completion_info
@@ -44,8 +72,9 @@ def read_round_completion_file(
 def write_round_completion_file(
     bucket: Optional[str],
     source_partition_locator: Optional[PartitionLocator],
+    destination_partition_locator: Optional[PartitionLocator],
     round_completion_info: RoundCompletionInfo,
-    completion_file_s3_url: str = None,
+    completion_file_s3_url: Optional[str] = None,
     **s3_client_kwargs: Optional[Dict[str, Any]],
 ) -> str:
     if bucket is None and completion_file_s3_url is None:
@@ -56,6 +85,7 @@ def write_round_completion_file(
         completion_file_s3_url = get_round_completion_file_s3_url(
             bucket,
             source_partition_locator,
+            destination_partition_locator,
         )
     logger.info(f"writing round completion file to: {completion_file_s3_url}")
     s3_utils.upload(

deltacat/compute/compactor_v2/compaction_session.py CHANGED Viewed

@@ -24,7 +24,7 @@ from deltacat.compute.compactor import (
 )
 from deltacat.compute.compactor_v2.model.merge_result import MergeResult
 from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
-from deltacat.compute.compactor_v2.model.compaction_session import (
+from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
     ExecutionCompactionResult,
 )
 from deltacat.compute.compactor.model.materialize_result import MaterializeResult
@@ -78,6 +78,7 @@ from deltacat.compute.compactor_v2.utils.task_options import (
 )
 from deltacat.compute.compactor.model.compactor_version import CompactorVersion
 from deltacat.exceptions import categorize_errors
+from deltacat.compute.compactor_v2.constants import COMPACT_PARTITION_METRIC_PREFIX
 if importlib.util.find_spec("memray"):
     import memray
@@ -86,7 +87,7 @@ if importlib.util.find_spec("memray"):
 logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
-@metrics
+@metrics(prefix=COMPACT_PARTITION_METRIC_PREFIX)
 @categorize_errors
 def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
     assert (
@@ -109,7 +110,6 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
             f"Partition-{params.source_partition_locator} -> "
             f"{compaction_session_type} Compaction session data processing completed"
         )
-        round_completion_file_s3_url: Optional[str] = None
         if execute_compaction_result.new_compacted_partition:
             previous_partition: Optional[Partition] = None
             if execute_compaction_result.is_inplace_compacted:
@@ -131,19 +131,13 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
                 **params.deltacat_storage_kwargs,
             )
             logger.info(f"Committed compacted partition: {committed_partition}")
-            round_completion_file_s3_url = rcf.write_round_completion_file(
-                params.compaction_artifact_s3_bucket,
-                execute_compaction_result.new_round_completion_file_partition_locator,
-                execute_compaction_result.new_round_completion_info,
-                **params.s3_client_kwargs,
-            )
         else:
             logger.warning("No new partition was committed during compaction.")
         logger.info(
             f"Completed compaction session for: {params.source_partition_locator}"
         )
-        return round_completion_file_s3_url
+        return execute_compaction_result.round_completion_file_s3_url
 def _execute_compaction(
@@ -188,6 +182,7 @@ def _execute_compaction(
         round_completion_info = rcf.read_round_completion_file(
             params.compaction_artifact_s3_bucket,
             params.source_partition_locator,
+            params.destination_partition_locator,
             **params.s3_client_kwargs,
         )
         if not round_completion_info:
@@ -684,9 +679,18 @@ def _execute_compaction(
             f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
         )
         rcf_source_partition_locator = compacted_partition.locator
+    round_completion_file_s3_url = rcf.write_round_completion_file(
+        params.compaction_artifact_s3_bucket,
+        rcf_source_partition_locator,
+        compacted_partition.locator,
+        new_round_completion_info,
+        **params.s3_client_kwargs,
+    )
     return ExecutionCompactionResult(
         compacted_partition,
         new_round_completion_info,
-        rcf_source_partition_locator,
+        round_completion_file_s3_url,
         is_inplace_compacted,
     )

deltacat/compute/compactor_v2/constants.py CHANGED Viewed

@@ -68,3 +68,6 @@ DISCOVER_DELTAS_METRIC_PREFIX = "discover_deltas"
 # Metric prefix for prepare deletes
 PREPARE_DELETES_METRIC_PREFIX = "prepare_deletes"
+# Metric prefix for compact partition method
+COMPACT_PARTITION_METRIC_PREFIX = "compact_partition"

deltacat/compute/compactor_v2/model/{compaction_session.py → evaluate_compaction_result.py} RENAMED Viewed

@@ -2,7 +2,6 @@ from dataclasses import dataclass, fields
 from deltacat.storage import (
     Partition,
-    PartitionLocator,
 )
 from deltacat.compute.compactor import (
     RoundCompletionInfo,
@@ -14,7 +13,7 @@ from typing import Optional
 class ExecutionCompactionResult:
     new_compacted_partition: Optional[Partition]
     new_round_completion_info: Optional[RoundCompletionInfo]
-    new_round_completion_file_partition_locator: Optional[PartitionLocator]
+    round_completion_file_s3_url: Optional[str]
     is_inplace_compacted: bool
     def __iter__(self):

deltacat/io/dataset.py CHANGED Viewed

@@ -6,9 +6,6 @@ from typing import Any, Callable, Dict, Optional, TypeVar, Union, cast
 import pyarrow as pa
 import s3fs
 from ray.data import Dataset
-from ray.data.datasource import BlockWritePathProvider, DefaultBlockWritePathProvider
-from deltacat.io.aws.redshift.redshift_datasource import RedshiftDatasource
 T = TypeVar("T")
@@ -27,7 +24,6 @@ class DeltacatDataset(Dataset[T]):
         filesystem: Optional[Union[pa.fs.FileSystem, s3fs.S3FileSystem]] = None,
         try_create_dir: bool = True,
         arrow_open_stream_args: Optional[Dict[str, Any]] = None,
-        block_path_provider: BlockWritePathProvider = DefaultBlockWritePathProvider(),
         arrow_parquet_args_fn: Callable[[], Dict[str, Any]] = lambda: {},
         **arrow_parquet_args,
     ) -> None:
@@ -59,9 +55,8 @@ class DeltacatDataset(Dataset[T]):
                 if True. Does nothing if all directories already exist.
             arrow_open_stream_args: kwargs passed to
                 pyarrow.fs.FileSystem.open_output_stream
-            block_path_provider: BlockWritePathProvider implementation
-                to write each dataset block to a custom output path. Uses
-                DefaultBlockWritePathProvider if None.
+            filename_provider: FilenameProvider implementation
+                to write each dataset block to a custom output path.
             arrow_parquet_args_fn: Callable that returns a dictionary of write
                 arguments to use when writing each block to a file. Overrides
                 any duplicate keys from arrow_parquet_args. This should be used
@@ -72,14 +67,7 @@ class DeltacatDataset(Dataset[T]):
                 pyarrow.parquet.write_table(), which is used to write out each
                 block to a file.
         """
-        self.write_datasource(
-            RedshiftDatasource(),
-            path=path,
-            dataset_uuid=self._uuid,
-            filesystem=filesystem,
-            try_create_dir=try_create_dir,
-            open_stream_args=arrow_open_stream_args,
-            block_path_provider=block_path_provider,
-            write_args_fn=arrow_parquet_args_fn,
-            **arrow_parquet_args,
+        raise NotImplementedError(
+            "Writing to Redshift is not yet supported. "
+            "Please use DeltacatDataset.write_parquet() instead."
         )

deltacat/storage/__init__.py CHANGED Viewed

@@ -14,6 +14,20 @@ from deltacat.storage.model.stream import Stream, StreamLocator
 from deltacat.storage.model.table import Table, TableLocator
 from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
 from deltacat.storage.model.delete_parameters import DeleteParameters
+from deltacat.storage.model.partition_spec import (
+    PartitionFilter,
+    PartitionValues,
+    DeltaPartitionSpec,
+    StreamPartitionSpec,
+)
+from deltacat.storage.model.transform import (
+    Transform,
+    TransformName,
+    TransformParameters,
+    BucketingStrategy,
+    BucketTransformParameters,
+    IdentityTransformParameters,
+)
 from deltacat.storage.model.types import (
     CommitState,
@@ -56,4 +70,14 @@ __all__ = [
     "TableVersionLocator",
     "SortKey",
     "SortOrder",
+    "PartitionFilter",
+    "PartitionValues",
+    "DeltaPartitionSpec",
+    "StreamPartitionSpec",
+    "Transform",
+    "TransformName",
+    "TransformParameters",
+    "BucketingStrategy",
+    "BucketTransformParameters",
+    "IdentityTransformParameters",
 ]

deltacat/storage/interface.py CHANGED Viewed

@@ -23,6 +23,10 @@ from deltacat.storage import (
     TableVersion,
     SortKey,
     PartitionLocator,
+    PartitionFilter,
+    PartitionValues,
+    DeltaPartitionSpec,
+    StreamPartitionSpec,
 )
 from deltacat.types.media import (
     ContentType,
@@ -86,12 +90,13 @@ def list_stream_partitions(stream: Stream, *args, **kwargs) -> ListResult[Partit
 def list_deltas(
     namespace: str,
     table_name: str,
-    partition_values: Optional[List[Any]] = None,
+    partition_values: Optional[PartitionValues] = None,
     table_version: Optional[str] = None,
     first_stream_position: Optional[int] = None,
     last_stream_position: Optional[int] = None,
     ascending_order: Optional[bool] = None,
     include_manifest: bool = False,
+    partition_filter: Optional[PartitionFilter] = None,
     *args,
     **kwargs
 ) -> ListResult[Delta]:
@@ -107,6 +112,9 @@ def list_deltas(
     To conserve memory, the deltas returned do not include manifests by
     default. The manifests can either be optionally retrieved as part of this
     call or lazily loaded via subsequent calls to `get_delta_manifest`.
+    Note: partition_values is deprecated and will be removed in future releases.
+    Use partition_filter instead.
     """
     raise NotImplementedError("list_deltas not implemented")
@@ -134,9 +142,10 @@ def get_delta(
     namespace: str,
     table_name: str,
     stream_position: int,
-    partition_values: Optional[List[Any]] = None,
+    partition_values: Optional[PartitionValues] = None,
     table_version: Optional[str] = None,
     include_manifest: bool = False,
+    partition_filter: Optional[PartitionFilter] = None,
     *args,
     **kwargs
 ) -> Optional[Delta]:
@@ -149,6 +158,9 @@ def get_delta(
     To conserve memory, the delta returned does not include a manifest by
     default. The manifest can either be optionally retrieved as part of this
     call or lazily loaded via a subsequent call to `get_delta_manifest`.
+    Note: partition_values is deprecated and will be removed in future releases.
+    Use partition_filter instead.
     """
     raise NotImplementedError("get_delta not implemented")
@@ -156,9 +168,10 @@ def get_delta(
 def get_latest_delta(
     namespace: str,
     table_name: str,
-    partition_values: Optional[List[Any]] = None,
+    partition_values: Optional[PartitionValues] = None,
     table_version: Optional[str] = None,
     include_manifest: bool = False,
+    partition_filter: Optional[PartitionFilter] = None,
     *args,
     **kwargs
 ) -> Optional[Delta]:
@@ -172,6 +185,9 @@ def get_latest_delta(
     To conserve memory, the delta returned does not include a manifest by
     default. The manifest can either be optionally retrieved as part of this
     call or lazily loaded via a subsequent call to `get_delta_manifest`.
+    Note: partition_values is deprecated and will be removed in future releases.
+    Use partition_filter instead.
     """
     raise NotImplementedError("get_latest_delta not implemented")
@@ -185,6 +201,7 @@ def download_delta(
     file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
     ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
     distributed_dataset_type: DistributedDatasetType = DistributedDatasetType.RAY_DATASET,
+    partition_filter: Optional[PartitionFilter] = None,
     *args,
     **kwargs
 ) -> Union[LocalDataset, DistributedDataset]:  # type: ignore
@@ -194,6 +211,10 @@ def download_delta(
     across this Ray cluster's object store memory. Ordered table N of a local
     table list, or ordered block N of a distributed dataset, always contain
     the contents of ordered delta manifest entry N.
+    partition_filter is an optional parameter which determines which files to
+    download from the delta manifest. A delta manifest contains all the data files
+    for a given delta.
     """
     raise NotImplementedError("download_delta not implemented")
@@ -268,6 +289,7 @@ def create_table_version(
     table_description: Optional[str] = None,
     table_properties: Optional[Dict[str, str]] = None,
     supported_content_types: Optional[List[ContentType]] = None,
+    partition_spec: Optional[StreamPartitionSpec] = None,
     *args,
     **kwargs
 ) -> Stream:
@@ -300,6 +322,8 @@ def create_table_version(
     Validate: Raise an error for any fields that don't fit the schema. An
     explicit subset of column names to validate may optionally be specified.
+    Either partition_keys or partition_spec must be specified but not both.
     """
     raise NotImplementedError("create_table_version not implemented")
@@ -402,7 +426,7 @@ def get_stream(
 def stage_partition(
-    stream: Stream, partition_values: Optional[List[Any]] = None, *args, **kwargs
+    stream: Stream, partition_values: Optional[PartitionValues] = None, *args, **kwargs
 ) -> Partition:
     """
     Stages a new partition for the given stream and partition values. Returns
@@ -410,6 +434,9 @@ def stage_partition(
     with the same partition values, then it will have its previous partition ID
     set to the ID of the partition being replaced. Partition keys should not be
     specified for unpartitioned tables.
+    The partition_values must represents the results of transforms in a partition
+    spec specified in the stream.
     """
     raise NotImplementedError("stage_partition not implemented")
@@ -439,7 +466,7 @@ def delete_partition(
     namespace: str,
     table_name: str,
     table_version: Optional[str] = None,
-    partition_values: Optional[List[Any]] = None,
+    partition_values: Optional[PartitionValues] = None,
     *args,
     **kwargs
 ) -> None:
@@ -454,7 +481,7 @@ def delete_partition(
 def get_partition(
     stream_locator: StreamLocator,
-    partition_values: Optional[List[Any]] = None,
+    partition_values: Optional[PartitionValues] = None,
     *args,
     **kwargs
 ) -> Optional[Partition]:
@@ -477,6 +504,8 @@ def stage_delta(
     s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
     content_type: ContentType = ContentType.PARQUET,
     delete_parameters: Optional[DeleteParameters] = None,
+    partition_spec: Optional[DeltaPartitionSpec] = None,
+    partition_values: Optional[PartitionValues] = None,
     *args,
     **kwargs
 ) -> Delta:
@@ -484,6 +513,13 @@ def stage_delta(
     Writes the given table to 1 or more S3 files. Returns an unregistered
     delta whose manifest entries point to the uploaded files. Applies any
     schema consistency policies configured for the parent table version.
+    The partition spec will be used to split the input table into
+    multiple files. Optionally, partition_values can be provided to avoid
+    this method to recompute partition_values from the provided data.
+    Raises an error if the provided data does not conform to a unique ordered
+    list of partition_values
     """
     raise NotImplementedError("stage_delta not implemented")

deltacat 1.1.9__py3-none-any.whl → 1.1.10__py3-none-any.whl

deltacat 1.1.9py3-none-any.whl → 1.1.10py3-none-any.whl