PyPI - deltacat - Versions diffs - 2.0.0b3__py3-none-any.whl → 2.0.0b7__py3-none-any.whl - Mend

deltacat 2.0.0b3py3-none-any.whl → 2.0.0b7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

deltacat/__init__.py +1 -1
deltacat/catalog/iceberg/impl.py +15 -2
deltacat/catalog/iceberg/overrides.py +12 -14
deltacat/catalog/main/impl.py +1 -1
deltacat/daft/__init__.py +0 -0
deltacat/daft/daft_scan.py +111 -0
deltacat/daft/model.py +258 -0
deltacat/examples/iceberg/iceberg_bucket_writer.py +64 -17
deltacat/experimental/__init__.py +0 -0
deltacat/experimental/daft/__init__.py +4 -0
deltacat/experimental/daft/daft_catalog.py +229 -0
deltacat/storage/model/manifest.py +49 -0
deltacat/tests/storage/model/test_manifest.py +129 -0
deltacat/utils/daft.py +2 -5
{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/METADATA +3 -3
{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/RECORD +19 -12
{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/LICENSE +0 -0
{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/WHEEL +0 -0
{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/top_level.txt +0 -0

deltacat/__init__.py CHANGED Viewed

@@ -67,7 +67,7 @@ if importlib.util.find_spec("pyiceberg") is not None:
 deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
-__version__ = "2.0.0b3"
+__version__ = "2.0.0b7"
 __all__ = [

deltacat/catalog/iceberg/impl.py CHANGED Viewed

@@ -2,10 +2,13 @@ import logging
 from typing import Any, Dict, List, Optional, Union
-from daft import DataFrame
+from daft import DataFrame, context
+from daft.daft import ScanOperatorHandle, StorageConfig
+from daft.logical.builder import LogicalPlanBuilder
 from deltacat import logs
 from deltacat.catalog.model.table_definition import TableDefinition
+from deltacat.daft.daft_scan import DeltaCatScanOperator
 from deltacat.exceptions import TableAlreadyExistsError
 from deltacat.storage.iceberg.iceberg_scan_planner import IcebergScanPlanner
 from deltacat.storage.iceberg.model import PartitionSchemeMapper, SchemaMapper
@@ -144,7 +147,17 @@ def read_table(
     table: str, *args, namespace: Optional[str] = None, **kwargs
 ) -> DistributedDataset:
     """Read a table into a distributed dataset."""
-    raise NotImplementedError("read_table not implemented")
+    # TODO: more proper IO configuration
+    io_config = context.get_context().daft_planning_config.default_io_config
+    multithreaded_io = context.get_context().get_or_create_runner().name != "ray"
+    storage_config = StorageConfig(multithreaded_io, io_config)
+    dc_table = get_table(name=table, namespace=namespace, **kwargs)
+    dc_scan_operator = DeltaCatScanOperator(dc_table, storage_config)
+    handle = ScanOperatorHandle.from_python_scan_operator(dc_scan_operator)
+    builder = LogicalPlanBuilder.from_tabular_scan(scan_operator=handle)
+    return DataFrame(builder)
 def alter_table(

deltacat/catalog/iceberg/overrides.py CHANGED Viewed

@@ -5,12 +5,11 @@ from typing import Iterator, List
 from pyarrow.fs import FileSystem
 from pyiceberg.io.pyarrow import (
-    fill_parquet_file_metadata,
+    data_file_statistics_from_parquet_metadata,
     compute_statistics_plan,
     parquet_path_to_id_mapping,
 )
-from pyiceberg.table import Table, _MergingSnapshotProducer
-from pyiceberg.table.snapshots import Operation
+from pyiceberg.table import Table
 from pyiceberg.manifest import DataFile, DataFileContent, FileFormat
 from pyiceberg.types import StructType, NestedField, IntegerType
 from pyiceberg.typedef import Record
@@ -24,11 +23,10 @@ def append(table: Table, paths: List[str]) -> None:
     #    raise ValueError("Cannot write to tables with a sort-order")
     data_files = write_file(table, paths)
-    merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=table)
-    for data_file in data_files:
-        merge.append_data_file(data_file)
-    merge.commit()
+    with table.transaction() as txn:
+        with txn.update_snapshot().fast_append() as snapshot_update:
+            for data_file in data_files:
+                snapshot_update.append_data_file(data_file)
 def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
@@ -41,6 +39,11 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
         fs_path = fs_tuple[1]
         with fs.open_input_file(fs_path) as native_file:
             parquet_metadata = pq.read_metadata(native_file)
+            statistics = data_file_statistics_from_parquet_metadata(
+                parquet_metadata=parquet_metadata,
+                stats_columns=compute_statistics_plan(table.schema(), table.properties),
+                parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
+            )
             data_file = DataFile(
                 content=DataFileContent.DATA,
                 file_path=file_path,
@@ -63,12 +66,7 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
                 spec_id=table.spec().spec_id,
                 equality_ids=None,
                 key_metadata=None,
-            )
-            fill_parquet_file_metadata(
-                data_file=data_file,
-                parquet_metadata=parquet_metadata,
-                stats_columns=compute_statistics_plan(table.schema(), table.properties),
-                parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
+                **statistics.to_serialized_dict(),
             )
             data_files.append(data_file)
     return data_files

deltacat/catalog/main/impl.py CHANGED Viewed

@@ -709,7 +709,7 @@ def _get_deltas_from_partition_filter(
 def _get_storage(**kwargs):
     """
-    Returns the implementation of `deltacat.storage.interface` to use with this catalog.
+    Returns the implementation of `deltacat.storage.interface` to use with this catalog
     This is configured in the `CatalogProperties` stored during initialization and passed through `delegate.py`
     """

deltacat/daft/__init__.py ADDED Viewed

File without changes

deltacat/daft/daft_scan.py ADDED Viewed

@@ -0,0 +1,111 @@
+from typing import Iterator
+from daft import Schema
+from daft.daft import (
+    StorageConfig,
+    PartitionField,
+    Pushdowns,
+    ScanTask,
+    FileFormatConfig,
+    ParquetSourceConfig,
+)
+from daft.io.scan import ScanOperator
+from deltacat.catalog.model.table_definition import TableDefinition
+from deltacat.daft.model import DaftPartitionKeyMapper
+class DeltaCatScanOperator(ScanOperator):
+    def __init__(self, table: TableDefinition, storage_config: StorageConfig) -> None:
+        super().__init__()
+        self.table = table
+        self._schema = self._infer_schema()
+        self.partition_keys = self._infer_partition_keys()
+        self.storage_config = storage_config
+    def schema(self) -> Schema:
+        return self._schema
+    def name(self) -> str:
+        return "DeltaCatScanOperator"
+    def display_name(self) -> str:
+        return f"DeltaCATScanOperator({self.table.table.namespace}.{self.table.table.table_name})"
+    def partitioning_keys(self) -> list[PartitionField]:
+        return self.partition_keys
+    def multiline_display(self) -> list[str]:
+        return [
+            self.display_name(),
+            f"Schema = {self._schema}",
+            f"Partitioning keys = {self.partitioning_keys}",
+            f"Storage config = {self.storage_config}",
+        ]
+    def to_scan_tasks(self, pushdowns: Pushdowns) -> Iterator[ScanTask]:
+        # TODO: implement pushdown predicate on DeltaCAT
+        dc_scan_plan = self.table.create_scan_plan()
+        scan_tasks = []
+        file_format_config = FileFormatConfig.from_parquet_config(
+            # maybe this: ParquetSourceConfig(field_id_mapping=self._field_id_mapping)
+            ParquetSourceConfig()
+        )
+        for dc_scan_task in dc_scan_plan.scan_tasks:
+            for data_file in dc_scan_task.data_files():
+                st = ScanTask.catalog_scan_task(
+                    file=data_file.file_path,
+                    file_format=file_format_config,
+                    schema=self._schema._schema,
+                    storage_config=self.storage_config,
+                    pushdowns=pushdowns,
+                )
+                scan_tasks.append(st)
+        return iter(scan_tasks)
+    def can_absorb_filter(self) -> bool:
+        return False
+    def can_absorb_limit(self) -> bool:
+        return False
+    def can_absorb_select(self) -> bool:
+        return True
+    def _infer_schema(self) -> Schema:
+        if not (
+            self.table and self.table.table_version and self.table.table_version.schema
+        ):
+            raise RuntimeError(
+                f"Failed to infer schema for DeltaCAT Table "
+                f"{self.table.table.namespace}.{self.table.table.table_name}"
+            )
+        return Schema.from_pyarrow_schema(self.table.table_version.schema.arrow)
+    def _infer_partition_keys(self) -> list[PartitionField]:
+        if not (
+            self.table
+            and self.table.table_version
+            and self.table.table_version.partition_scheme
+            and self.table.table_version.schema
+        ):
+            raise RuntimeError(
+                f"Failed to infer partition keys for DeltaCAT Table "
+                f"{self.table.table.namespace}.{self.table.table.table_name}"
+            )
+        schema = self.table.table_version.schema
+        partition_keys = self.table.table_version.partition_scheme.keys
+        if not partition_keys:
+            return []
+        partition_fields = []
+        for key in partition_keys:
+            field = DaftPartitionKeyMapper.unmap(key, schema)
+            # Assert that the returned value is not None.
+            assert field is not None, f"Unmapping failed for key {key}"
+            partition_fields.append(field)
+        return partition_fields

deltacat/daft/model.py ADDED Viewed

@@ -0,0 +1,258 @@
+from typing import Optional
+import pyarrow as pa
+from pyarrow import Field as PaField
+from daft import Schema as DaftSchema, DataType
+from daft.daft import (
+    PartitionField as DaftPartitionField,
+    PartitionTransform as DaftTransform,
+)
+from daft.logical.schema import Field as DaftField
+from daft.io.scan import make_partition_field
+from deltacat.storage.model.schema import Schema
+from deltacat.storage.model.interop import ModelMapper
+from deltacat.storage.model.partition import PartitionKey
+from deltacat.storage.model.transform import (
+    BucketingStrategy,
+    Transform,
+    BucketTransform,
+    HourTransform,
+    DayTransform,
+    MonthTransform,
+    YearTransform,
+    IdentityTransform,
+    TruncateTransform,
+)
+class DaftFieldMapper(ModelMapper[DaftField, PaField]):
+    @staticmethod
+    def map(
+        obj: Optional[DaftField],
+        **kwargs,
+    ) -> Optional[PaField]:
+        """Convert Daft Field to PyArrow Field.
+        Args:
+            obj: The Daft Field to convert
+            **kwargs: Additional arguments
+        Returns:
+            Converted PyArrow Field object
+        """
+        if obj is None:
+            return None
+        return pa.field(
+            name=obj.name,
+            type=obj.dtype.to_arrow_dtype(),
+        )
+    @staticmethod
+    def unmap(
+        obj: Optional[PaField],
+        **kwargs,
+    ) -> Optional[DaftField]:
+        """Convert PyArrow Field to Daft Field.
+        Args:
+            obj: The PyArrow Field to convert
+            **kwargs: Additional arguments
+        Returns:
+            Converted Daft Field object
+        """
+        if obj is None:
+            return None
+        return DaftField.create(
+            name=obj.name,
+            dtype=DataType.from_arrow_type(obj.type),  # type: ignore
+        )
+class DaftTransformMapper(ModelMapper[DaftTransform, Transform]):
+    @staticmethod
+    def map(
+        obj: Optional[DaftTransform],
+        **kwargs,
+    ) -> Optional[Transform]:
+        """Convert DaftTransform to DeltaCAT Transform.
+        Args:
+            obj: The DaftTransform to convert
+            **kwargs: Additional arguments
+        Returns:
+            Converted Transform object
+        """
+        # daft.PartitionTransform doesn't have a Python interface for accessing its attributes,
+        # thus conversion is not possible.
+        # TODO: request Daft to expose Python friendly interface for daft.PartitionTransform
+        raise NotImplementedError(
+            "Converting transform from Daft to DeltaCAT is not supported"
+        )
+    @staticmethod
+    def unmap(
+        obj: Optional[Transform],
+        **kwargs,
+    ) -> Optional[DaftTransform]:
+        """Convert DeltaCAT Transform to DaftTransform.
+        Args:
+            obj: The Transform to convert
+            **kwargs: Additional arguments
+        Returns:
+            Converted DaftTransform object
+        """
+        if obj is None:
+            return None
+        # Map DeltaCAT transforms to Daft transforms using isinstance
+        if isinstance(obj, IdentityTransform):
+            return DaftTransform.identity()
+        elif isinstance(obj, HourTransform):
+            return DaftTransform.hour()
+        elif isinstance(obj, DayTransform):
+            return DaftTransform.day()
+        elif isinstance(obj, MonthTransform):
+            return DaftTransform.month()
+        elif isinstance(obj, YearTransform):
+            return DaftTransform.year()
+        elif isinstance(obj, BucketTransform):
+            if obj.parameters.bucketing_strategy == BucketingStrategy.ICEBERG:
+                return DaftTransform.iceberg_bucket(obj.parameters.num_buckets)
+            else:
+                raise ValueError(
+                    f"Unsupported Bucketing Strategy: {obj.parameters.bucketing_strategy}"
+                )
+        elif isinstance(obj, TruncateTransform):
+            return DaftTransform.iceberg_truncate(obj.parameters.width)
+        raise ValueError(f"Unsupported Transform: {obj}")
+class DaftPartitionKeyMapper(ModelMapper[DaftPartitionField, PartitionKey]):
+    @staticmethod
+    def map(
+        obj: Optional[DaftPartitionField],
+        schema: Optional[DaftSchema] = None,
+        **kwargs,
+    ) -> Optional[PartitionKey]:
+        """Convert DaftPartitionField to PartitionKey.
+        Args:
+            obj: The DaftPartitionField to convert
+            schema: The Daft schema containing field information
+            **kwargs: Additional arguments
+        Returns:
+            Converted PartitionKey object
+        """
+        # Daft PartitionField only exposes 1 attribute `field` which is not enough
+        # to convert to DeltaCAT PartitionKey
+        # TODO: request Daft to expose more Python friendly interface for PartitionField
+        raise NotImplementedError(
+            f"Converting Daft PartitionField to DeltaCAT PartitionKey is not supported"
+        )
+    @staticmethod
+    def unmap(
+        obj: Optional[PartitionKey],
+        schema: Optional[Schema] = None,
+        **kwargs,
+    ) -> Optional[DaftPartitionField]:
+        """Convert PartitionKey to DaftPartitionField.
+        Args:
+            obj: The DeltaCAT PartitionKey to convert
+            schema: The Schema containing field information
+            **kwargs: Additional arguments
+        Returns:
+            Converted DaftPartitionField object
+        """
+        if obj is None:
+            return None
+        if obj.name is None:
+            raise ValueError("Name is required for PartitionKey conversion")
+        if not schema:
+            raise ValueError("Schema is required for PartitionKey conversion")
+        if len(obj.key) < 1:
+            raise ValueError(
+                f"At least 1 PartitionKey FieldLocator is expected, instead got {len(obj.key)}. FieldLocators: {obj.key}."
+            )
+        # Get the source field from schema - FieldLocator in PartitionKey.key points to the source field of partition field
+        dc_source_field = schema.field(obj.key[0]).arrow
+        daft_source_field = DaftFieldMapper.unmap(obj=dc_source_field)
+        # Convert transform if present
+        daft_transform = DaftTransformMapper.unmap(obj.transform)
+        daft_partition_field = DaftPartitionKeyMapper.get_daft_partition_field(
+            partition_field_name=obj.name,
+            daft_source_field=daft_source_field,
+            dc_transform=obj.transform,
+        )
+        # Create DaftPartitionField
+        return make_partition_field(
+            field=daft_partition_field,
+            source_field=daft_source_field,
+            transform=daft_transform,
+        )
+    @staticmethod
+    def get_daft_partition_field(
+        partition_field_name: str,
+        daft_source_field: Optional[DaftField],
+        # TODO: replace DeltaCAT transform with Daft Transform for uniformality
+        # We cannot use Daft Transform here because Daft Transform doesn't have a Python interface for us to
+        # access its attributes.
+        # TODO: request Daft to provide a more python friendly interface for Daft Tranform
+        dc_transform: Optional[Transform],
+    ) -> DaftField:
+        """Generate Daft Partition Field given partition field name, source field and transform.
+        Partition field type is inferred using source field type and transform.
+        Args:
+            partition_field_name (str): the specified result field name
+            daft_source_field (DaftField): the source field of the partition field
+            daft_transform (DaftTransform): transform applied on the source field to create partition field
+        Returns:
+            DaftField: Daft Field representing the partition field
+        """
+        if daft_source_field is None:
+            raise ValueError("Source field is required for PartitionField conversion")
+        if dc_transform is None:
+            raise ValueError("Transform is required for PartitionField conversion")
+        result_type = None
+        # Below type conversion logic references Daft - Iceberg conversion logic:
+        # https://github.com/Eventual-Inc/Daft/blob/7f2e9b5fb50fdfe858be17572f132b37dd6e5ab2/daft/iceberg/iceberg_scan.py#L61-L85
+        if isinstance(dc_transform, IdentityTransform):
+            result_type = daft_source_field.dtype
+        elif isinstance(dc_transform, YearTransform):
+            result_type = DataType.int32()
+        elif isinstance(dc_transform, MonthTransform):
+            result_type = DataType.int32()
+        elif isinstance(dc_transform, DayTransform):
+            result_type = DataType.int32()
+        elif isinstance(dc_transform, HourTransform):
+            result_type = DataType.int32()
+        elif isinstance(dc_transform, BucketTransform):
+            result_type = DataType.int32()
+        elif isinstance(dc_transform, TruncateTransform):
+            result_type = daft_source_field.dtype
+        else:
+            raise ValueError(f"Unsupported transform: {dc_transform}")
+        return DaftField.create(
+            name=partition_field_name,
+            dtype=result_type,
+        )

deltacat/examples/iceberg/iceberg_bucket_writer.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import os
 import logging
+import uuid
 import daft
+from pyiceberg.catalog import CatalogType
 import deltacat as dc
 from deltacat import logs
 from deltacat import IcebergCatalog
+from deltacat.catalog.iceberg import IcebergCatalogConfig
 from deltacat.examples.common.fixtures import (
     store_cli_args_in_os_environ,
 )
@@ -30,6 +34,24 @@ driver_logger = logs.configure_application_logger(logging.getLogger(__name__))
 def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
+    """
+    This is an e2e example that
+        1. creates a DeltaCAT Table (backed by an Iceberg Table) in Glue
+        2. writes data into the DeltaCAT Table
+        3. reads data from the DeltaCAT Table using Daft
+    To run the script:
+        1. prepare an AWS Account
+            1. prepare a S3 location where the data will be written to, which will be used in Step 3.
+            2. prepare an IAM Role that has access to the S3 location and Glue
+        2. retrieve the IAM Role AWS Credential and cache locally in ~/.aws/credentials
+        3. run below command to execute the example
+        ```
+        make venv && source venv/bin/activate
+        python -m deltacat.examples.iceberg.iceberg_bucket_writer --warehouse=s3://<YOUR_S3_LOCATION>
+        ```
+    """
     # create any runtime environment required to run the example
     runtime_env = create_ray_runtime_environment()
@@ -38,6 +60,7 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
     # Only the `iceberg` data catalog is provided so it will become the default.
     # If initializing multiple catalogs, use the `default_catalog_name` param
     # to specify which catalog should be the default.
     dc.init(
         catalogs={
             # the name of the DeltaCAT catalog is "iceberg"
@@ -49,11 +72,13 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
                 name="example-iceberg-catalog",
                 # for additional properties see:
                 # https://py.iceberg.apache.org/configuration/
-                properties={
-                    "type": "glue",
-                    "region_name": "us-east-1",
-                    "warehouse": warehouse,
-                },
+                config=IcebergCatalogConfig(
+                    type=CatalogType.GLUE,
+                    properties={
+                        "warehouse": warehouse,
+                        "region_name": "us-east-1",
+                    },
+                ),
             )
         },
         # pass the runtime environment into ray.init()
@@ -89,10 +114,10 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
         }
     )
-    # write to a table named `test_namespace.test_table_bucketed`
+    # write to a table named `test_namespace.test_table_bucketed-<SUFFIX>`
     # we don't need to specify which catalog to create this table in since
     # only the "iceberg" catalog is available
-    table_name = "test_table_bucketed"
+    table_name = f"test_table_bucketed-{uuid.uuid4().hex[:8]}"
     namespace = "test_namespace"
     print(f"Creating Glue Table: {namespace}.{table_name}")
     dc.write_to_table(
@@ -106,9 +131,40 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
     )
     print(f"Getting Glue Table: {namespace}.{table_name}")
-    table_definition = dc.get_table(table_name, namespace)
+    table_definition = dc.get_table(name=table_name, namespace=namespace)
     print(f"Retrieved Glue Table: {table_definition}")
+    # Read Data from DeltaCAT Table (backed by Iceberg) using Daft
+    daft_dataframe = dc.read_table(table=table_name, namespace=namespace)
+    daft_dataframe.where(df["bid"] > 200.0).show()
+    # Expected result:
+    # ╭────────┬─────────┬─────────╮
+    # │ symbol ┆ bid     ┆ ask     │
+    # │ ---    ┆ ---     ┆ ---     │
+    # │ Utf8   ┆ Float64 ┆ Float64 │
+    # ╞════════╪═════════╪═════════╡
+    # │ meta   ┆ 392.03  ┆ 392.09  │
+    # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    # │ msft   ┆ 403.25  ┆ 403.27  │
+    # ╰────────┴─────────┴─────────╯
+    daft_dataframe.select("symbol").show()
+    # Expected result:
+    # ╭────────╮
+    # │ symbol │
+    # │ ---    │
+    # │ Utf8   │
+    # ╞════════╡
+    # │ meta   │
+    # ├╌╌╌╌╌╌╌╌┤
+    # │ amzn   │
+    # ├╌╌╌╌╌╌╌╌┤
+    # │ goog   │
+    # ├╌╌╌╌╌╌╌╌┤
+    # │ msft   │
+    # ╰────────╯
 if __name__ == "__main__":
     example_script_args = [
@@ -121,15 +177,6 @@ if __name__ == "__main__":
                 "type": str,
             },
         ),
-        (
-            [
-                "--STAGE",
-            ],
-            {
-                "help": "Example runtime environment stage (e.g. dev, alpha, beta, prod).",
-                "type": str,
-            },
-        ),
     ]
     # store any CLI args in the runtime environment

deltacat/experimental/__init__.py ADDED Viewed

File without changes

deltacat/experimental/daft/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Daft integration package for DeltaCAT.
+This package provides integration between DeltaCAT and Daft.
+"""

deltacat/experimental/daft/daft_catalog.py ADDED Viewed

@@ -0,0 +1,229 @@
+from __future__ import annotations
+from typing import Tuple, Optional
+from deltacat.catalog.model.catalog import Catalog as DCCatalog
+from deltacat.catalog.model.table_definition import TableDefinition
+from daft.catalog import Catalog, Identifier, Table
+from daft.dataframe import DataFrame
+from daft.logical.schema import Schema
+from deltacat.constants import DEFAULT_NAMESPACE
+class DaftCatalog(Catalog):
+    """
+    Wrapper class to create a Daft catalog from a DeltaCAT catalog.
+    The initialization of DeltaCAT and Daft catalogs is managed in `deltacat.catalog.catalog.py`. The user
+    is just expected to initialize catalogs through the DeltaCAT public interface (init / put_catalog).
+    TODO (mccember) in follow up PR we need to consider how to keep the DeltaCAT Catalogs class and Daft session in sync,
+      and the user-facing entrypoint to get a Daft catalog
+    This class itself expects a `Catalog` and will invoke the underlying implementation
+    similar to `deltacat.catalog.delegate.py`, like:
+      catalog.impl.create_namespace(namespace, inner=catalog.inner)
+    We cannot route calls through the higher level catalog registry / delegate since this wrapper class is at a lower
+     layer and does not manage registering catalogs.
+    """
+    def __init__(self, catalog: DCCatalog, name: str):
+        """
+        Initialize given DeltaCAT catalog. This catalog is also registered with DeltaCAT (via deltacat.put_catalog) given the provided Name
+        :param catalog: DeltaCAT Catalog object. If None, the catalog will be fetched from `deltacat.Catalogs`
+            given the catalog name.
+        :param name: Name of DeltaCAT catalog. If the name is not yet registered with `deltacat.Catalogs`,
+            it will be registered upon creation to ensure that the DeltaCAT and Daft catalogs keep in sync.
+        :param kwargs: Additional keyword arguments passed to deltacat.get_catalog or deltacat.put_catalog,
+                       such as 'namespace' for tests.
+        """
+        self.dc_catalog = catalog
+        self._name = name
+    @property
+    def name(self) -> str:
+        return self._name
+    ###
+    # create_*
+    ###
+    def create_namespace(self, identifier: Identifier | str):
+        """Create a new namespace in the catalog."""
+        if isinstance(identifier, Identifier):
+            identifier = str(identifier)
+        self.dc_catalog.impl.create_namespace(identifier, inner=self.dc_catalog.inner)
+    def create_table(
+        self, identifier: Identifier | str, source: Schema | DataFrame, **kwargs
+    ) -> Table:
+        """
+        Create a DeltaCAT table via Daft catalog API
+        End users calling create_table through the daft table API may provide kwargs which will be plumbed through
+        to deltacat create_table. For full list of keyword arguments accepted by create_table.
+        Note: as of 4/22, Daft create_table does not yet support kwargs. Tracked at: https://github.com/Eventual-Inc/Daft/issues/4195
+        :param identifier: Daft table identifier. Sequence of strings of the format (namespace) or (namespace, table)
+            or (namespace, table, table version). If this is a string, it is a dot delimited string of the same format.
+            Identifiers can be created either like Identifier("namespace", "table", "version") OR
+                Identifier.from_str("namespace.table.version")
+        :param source: a TableSource, either a Daft DataFrame, Daft Schema, or str (filesystem path)
+        """
+        if isinstance(source, DataFrame):
+            return self._create_table_from_df(identifier, source)
+        elif isinstance(source, Schema):
+            return self._create_table_from_schema(identifier, source)
+        else:
+            raise Exception(
+                f"Expected table source to be Schema or DataFrame. Found: {type(source)}"
+            )
+    def _create_table_from_df(
+        self, ident: Identifier | str, source: DataFrame, **kwargs
+    ) -> Table:
+        """
+        Create a table from a DataFrame.
+        """
+        t = self._create_table_from_schema(ident, source.schema(), **kwargs)
+        # TODO (mccember) append data upon creation
+        return t
+    def _create_table_from_schema(
+        self, ident: Identifier | str, source: Schema, **kwargs
+    ) -> Table:
+        """
+        Create a table from a schema.
+        """
+        namespace, name, version = self._extract_namespace_name_version(ident)
+        # Convert the Daft schema to a DeltaCAT schema
+        # This is a simplified version, would need to be enhanced for production
+        deltacat_schema = self._convert_schema_to_deltacat(source)
+        # Create the table in DeltaCAT
+        table_def = self.dc_catalog.impl.create_table(
+            name,
+            namespace=namespace,
+            version=version,
+            schema=deltacat_schema,
+            inner=self.dc_catalog.inner,
+            **kwargs,
+        )
+        return DaftTable._from_obj(table_def)
+    ###
+    # drop_*
+    ###
+    def drop_namespace(self, identifier: Identifier | str):
+        raise NotImplementedError()
+    def drop_table(self, identifier: Identifier | str):
+        raise NotImplementedError()
+    ###
+    # get_*
+    ###
+    def get_table(self, identifier: Identifier | str, **kwargs) -> Table:
+        namespace, table, version = self._extract_namespace_name_version(identifier)
+        table_def = self.dc_catalog.impl.get_table(
+            table,
+            namespace=namespace,
+            table_version=version,
+            inner=self.dc_catalog.inner,
+            **kwargs,
+        )
+        if not table_def:
+            raise ValueError(f"Table {identifier} not found")
+        return DaftTable._from_obj(table_def)
+    ###
+    # list_*
+    ###
+    def list_namespaces(self, pattern: str | None = None) -> list[Identifier]:
+        raise NotImplementedError("Not implemented")
+    def list_tables(self, pattern: str | None = None) -> list[str]:
+        raise NotImplementedError("Not implemented")
+    def _extract_namespace_name_version(
+        self, ident: Identifier | str
+    ) -> Tuple[str, str, Optional[str]]:
+        """
+        Extract namespace, name,version from identifier
+        Returns a 3-tuple. If no namespace is provided, uses DeltaCAT defualt namespace
+        """
+        default_namespace = DEFAULT_NAMESPACE
+        if isinstance(ident, str):
+            ident = Identifier.from_str(ident)
+        if isinstance(ident, Identifier):
+            if len(ident) == 1:
+                return (default_namespace, ident[0], None)
+            elif len(ident) == 2:
+                return (ident[0], ident[1], None)
+            elif len(ident) == 3:
+                return (ident[0], ident[1], ident[2])
+            else:
+                raise ValueError(
+                    f"Expected table identifier to be in format (table) or (namespace, table)"
+                    f"or (namespace, table, version). Found: {ident}"
+                )
+    def _convert_schema_to_deltacat(self, schema: Schema):
+        """Convert Daft schema to DeltaCAT schema.
+        For now, just use PyArrow schema as intermediary
+        TODO look into how enhancements on schema can be propagated between Daft<=>DeltaCAT
+        """
+        from deltacat.storage.model.schema import Schema as DeltaCATSchema
+        return DeltaCATSchema.of(schema=schema.to_pyarrow_schema())
+class DaftTable(Table):
+    """
+    Wrapper class to create a Daft table from a DeltaCAT table
+    """
+    _inner: TableDefinition
+    _read_options = set()
+    _write_options = set()
+    def __init__(self, inner: TableDefinition):
+        self._inner = inner
+    @property
+    def name(self) -> str:
+        """Return the table name."""
+        return self._inner.table_version.table_name
+    @staticmethod
+    def _from_obj(obj: object) -> DaftTable:
+        """Returns a DeltaCATTable if the given object can be adapted so."""
+        if isinstance(obj, TableDefinition):
+            t = DaftTable.__new__(DaftTable)
+            t._inner = obj
+            return t
+        raise ValueError(f"Unsupported DeltaCAT table type: {type(obj)}")
+    def read(self, **options) -> DataFrame:
+        raise NotImplementedError("Not implemented")
+    def write(self, df: DataFrame | object, mode: str = "append", **options):
+        raise NotImplementedError("Not implemented")

deltacat/storage/model/manifest.py CHANGED Viewed

@@ -11,6 +11,8 @@ from deltacat import logs
 from deltacat.storage.model.schema import FieldLocator
+import json
 logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
@@ -192,6 +194,20 @@ class Manifest(dict):
         manifest = Manifest._build_manifest(meta, entries, author, uuid)
         return manifest
+    @staticmethod
+    def from_json(json_string: str) -> Manifest:
+        parsed_dict = json.loads(json_string)
+        return Manifest.of(
+            entries=ManifestEntryList.of(
+                [
+                    ManifestEntry.from_dict(entry)
+                    for entry in parsed_dict.get("entries", [])
+                ]
+            ),
+            author=ManifestAuthor.from_dict(parsed_dict.get("author")),
+            uuid=parsed_dict.get("id"),
+        )
     @staticmethod
     def merge_manifests(
         manifests: List[Manifest], author: Optional[ManifestAuthor] = None
@@ -264,6 +280,23 @@ class ManifestMeta(dict):
             manifest_meta["entry_params"] = entry_params
         return manifest_meta
+    @staticmethod
+    def from_dict(obj: dict) -> Optional[ManifestMeta]:
+        if obj is None:
+            return None
+        return ManifestMeta.of(
+            record_count=obj.get("record_count"),
+            content_length=obj.get("content_length"),
+            content_type=obj.get("content_type"),
+            content_encoding=obj.get("content_encoding"),
+            source_content_length=obj.get("source_content_length"),
+            credentials=obj.get("credentials"),
+            content_type_parameters=obj.get("content_type_parameters"),
+            entry_type=obj.get("entry_type"),
+            entry_params=obj.get("entry_params"),
+        )
     @property
     def record_count(self) -> Optional[int]:
         return self.get("record_count")
@@ -358,6 +391,16 @@ class ManifestEntry(dict):
         manifest_entry = ManifestEntry.of(url, manifest_entry_meta)
         return manifest_entry
+    @staticmethod
+    def from_dict(obj: dict) -> ManifestEntry:
+        return ManifestEntry.of(
+            url=obj.get("url"),
+            uri=obj.get("uri"),
+            meta=ManifestMeta.from_dict(obj.get("meta")),
+            mandatory=obj.get("mandatory", True),
+            uuid=obj.get("id"),
+        )
     @property
     def uri(self) -> Optional[str]:
         return self.get("uri")
@@ -392,6 +435,12 @@ class ManifestAuthor(dict):
             manifest_author["version"] = version
         return manifest_author
+    @staticmethod
+    def from_dict(obj: dict) -> Optional[ManifestAuthor]:
+        if obj is None:
+            return None
+        return ManifestAuthor.of(obj.get("name"), obj.get("version"))
     @property
     def name(self) -> Optional[str]:
         return self.get("name")

deltacat/tests/storage/model/test_manifest.py ADDED Viewed

@@ -0,0 +1,129 @@
+import json
+import pytest
+from deltacat.storage.model.manifest import Manifest, ManifestEntry
+@pytest.fixture
+def manifest_a():
+    return """
+        {
+          "entries":[
+            {
+              "uri":"s3://test_bucket/file1.tsv.gz",
+              "mandatory":true,
+              "meta":{
+                "record_count":0,
+                "content_length":123,
+                "source_content_length":0,
+                "content_type":"application/x-amzn-unescaped-tsv",
+                "content_encoding":"gzip"
+              }
+            },
+            {
+              "uri":"s3://test_bucket/file2.tsv.gz",
+              "mandatory":true,
+              "meta":{
+                "record_count":0,
+                "content_length":456,
+                "source_content_length":0,
+                "content_type":"application/x-amzn-unescaped-tsv",
+                "content_encoding":"gzip"
+              }
+            }
+          ],
+          "meta":{
+            "record_count":0,
+            "content_length":579,
+            "source_content_length":0,
+            "content_type":"application/x-amzn-unescaped-tsv",
+            "content_encoding":"gzip"
+          },
+          "id":"052f62c0-5082-4935-9937-18a705156123",
+          "author":{
+            "name":"Dave",
+            "version":"1.0"
+          }
+        }
+           """
+@pytest.fixture
+def manifest_no_author():
+    return """
+        {
+          "entries":[
+            {
+              "uri":"s3://test_bucket/file1.tsv.gz",
+              "mandatory":true,
+              "meta":{
+                "record_count":0,
+                "content_length":123,
+                "source_content_length":0,
+                "content_type":"application/x-amzn-unescaped-tsv",
+                "content_encoding":"gzip"
+              }
+            },
+            {
+              "uri":"s3://test_bucket/file2.tsv.gz",
+              "mandatory":true,
+              "meta":{
+                "record_count":0,
+                "content_length":456,
+                "source_content_length":0,
+                "content_type":"application/x-amzn-unescaped-tsv",
+                "content_encoding":"gzip"
+              }
+            }
+          ],
+          "meta":{
+            "record_count":0,
+            "content_length":579,
+            "source_content_length":0,
+            "content_type":"application/x-amzn-unescaped-tsv",
+            "content_encoding":"gzip"
+          },
+          "id":"052f62c0-5082-4935-9937-18a705156123"
+        }
+           """
+@pytest.fixture()
+def manifest_entry_no_meta():
+    return """
+            {
+              "uri":"s3://test_bucket/file1.tsv.gz",
+              "mandatory":true
+            }
+           """
+def test_manifest_from_json(manifest_a):
+    manifest = Manifest.from_json(manifest_a)
+    assert manifest.entries is not None
+    assert len(manifest.entries) == 2
+    assert manifest.entries[0].uri == "s3://test_bucket/file1.tsv.gz"
+    assert manifest.entries[0].meta.record_count == 0
+    assert manifest.meta.content_length == 579
+    assert manifest.author.name == "Dave"
+def test_manifest_from_json_no_author(manifest_no_author):
+    manifest = Manifest.from_json(manifest_no_author)
+    assert manifest.entries is not None
+    assert len(manifest.entries) == 2
+    assert manifest.entries[0].uri == "s3://test_bucket/file1.tsv.gz"
+    assert manifest.entries[0].meta is not None
+    assert manifest.author is None
+def test_manifest_entry_from_dict_no_meta(manifest_entry_no_meta):
+    entry = ManifestEntry.from_dict(json.loads(manifest_entry_no_meta))
+    assert entry is not None
+    assert entry.meta is None
+    assert entry.uri == "s3://test_bucket/file1.tsv.gz"
+    assert entry.mandatory is True

deltacat/utils/daft.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 from typing import Optional, List, Any, Dict, Callable
 import daft
 import ray
-from daft.table import read_parquet_into_pyarrow
+from daft.recordbatch import read_parquet_into_pyarrow
 from daft import TimeUnit, DataFrame
 from daft.io import IOConfig, S3Config
 import pyarrow as pa
@@ -10,7 +10,6 @@ import pyarrow as pa
 from deltacat import logs
 from deltacat.utils.common import ReadKwargsProvider
 from deltacat.utils.schema import coerce_pyarrow_table_to_schema
 from deltacat.types.media import ContentType, ContentEncoding
 from deltacat.aws.constants import (
     BOTO_MAX_RETRIES,
@@ -72,9 +71,7 @@ def s3_files_to_dataframe(
         f"Preparing to read S3 object from {len(uris)} files into daft dataframe"
     )
-    df, latency = timed_invocation(
-        daft.read_parquet, path=uris, io_config=io_config, use_native_downloader=True
-    )
+    df, latency = timed_invocation(daft.read_parquet, path=uris, io_config=io_config)
     logger.debug(f"Time to create daft dataframe from {len(uris)} files is {latency}s")

{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deltacat
-Version: 2.0.0b3
+Version: 2.0.0b7
 Summary: A portable, scalable, fast, and Pythonic Data Lakehouse for AI.
 Home-page: https://github.com/ray-project/deltacat
 Author: Ray Team
@@ -17,11 +17,11 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: aws-embedded-metrics==3.2.0
 Requires-Dist: boto3~=1.34
-Requires-Dist: getdaft==0.3.6
+Requires-Dist: getdaft>=0.4.11
 Requires-Dist: intervaltree==3.1.0
 Requires-Dist: numpy==1.21.5
 Requires-Dist: pandas==1.3.5
-Requires-Dist: pyarrow==17.0.0
+Requires-Dist: pyarrow==16.0.0
 Requires-Dist: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3
 Requires-Dist: pymemcache==4.0.0
 Requires-Dist: ray>=2.20.0

{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-deltacat/__init__.py,sha256=GCLov4iY1E1wvwH6d8j0edbjmuyEHRWEvGEJ2Zs6UHo,2474
+deltacat/__init__.py,sha256=EXieJNtfMEoIDqJ3wlqkpjUyMXs5y6hHmK0bA-AU_yg,2474
 deltacat/annotations.py,sha256=9lBi34DpIV_RPjCCK2Aiz_6nMyd-e-_CfQ1XtdRQQlM,1196
 deltacat/api.py,sha256=fYKurVlM97VKb_fh7kJ1rDcl-VAAuSflxPeqrsUt1u8,5257
 deltacat/constants.py,sha256=_JfHTRktDTM70Nls-LMnSmLeCRG17UwQYCmY6gQSGBg,3482
@@ -24,10 +24,10 @@ deltacat/catalog/delegate.py,sha256=x3jj_T61gyExuAnbDqhU6smbaAbIN4UxrVMZuBEOg0A,
 deltacat/catalog/interface.py,sha256=YB-qNBFsWupqyWJuHr7eQ-_MshhZZ5HpLphoZ64yn2g,12244
 deltacat/catalog/iceberg/__init__.py,sha256=LOENcLTQQlu_694MvRhMd2TQDLzwfg2vz0D8DuVO3M8,190
 deltacat/catalog/iceberg/iceberg_catalog_config.py,sha256=LfHxv8pk-YmTRQy5LvKFzwSqZ8ek2Y6v0KY7xihhIN0,786
-deltacat/catalog/iceberg/impl.py,sha256=hFAX0QGfWq25t9miYHACye_t_3fxUAmQXpQ9kf3w_xQ,13591
-deltacat/catalog/iceberg/overrides.py,sha256=HGev1Us2zJpavAoClCCMHrf6sQ8fG0poSxyLEJOB-Ss,2668
+deltacat/catalog/iceberg/impl.py,sha256=c_ONnLLyh8Vyqo5PusQSHySQ92iM4Qgk-rucHMfdd7s,14288
+deltacat/catalog/iceberg/overrides.py,sha256=WmM2mxf7ihDl8anb5GzBxo5-sxBkot8ZSRTxDpaauRA,2687
 deltacat/catalog/main/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deltacat/catalog/main/impl.py,sha256=y7sya4BVfYMbp0-smgs_00cktw7QHkJxXTWADSr0W3s,23093
+deltacat/catalog/main/impl.py,sha256=E9gCPaARJAaiIS2HTdXXz0-GwTjOaWIBX2TK2MsL194,23092
 deltacat/catalog/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/catalog/model/catalog.py,sha256=zGuNl1Czqbk2QQx9qGpMWCTK9ay4b3tm3SJzNkLlw-I,10198
 deltacat/catalog/model/properties.py,sha256=wdXjd39-JEj-zZLL5pH6wyIXAdpph-CD7yEIF96Wn-A,4110
@@ -130,14 +130,20 @@ deltacat/compute/stats/models/delta_stats.py,sha256=hBith8_hbF9TVr6HocLAt6RJ_kZZ
 deltacat/compute/stats/models/delta_stats_cache_result.py,sha256=mbJYxpZd5jaER_BWrCD2hROFy3p1nNdBrj66nUpc6io,1624
 deltacat/compute/stats/models/manifest_entry_stats.py,sha256=NCDAe2nPDEI4kOkuwNkRFgGPS-rqQaQqLuaLoKk20KQ,2419
 deltacat/compute/stats/models/stats_result.py,sha256=XQAlmzhUqRmg4jzEMUAOqcYn1HUOBTMryBH1CCVlet8,3820
+deltacat/daft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+deltacat/daft/daft_scan.py,sha256=u0RpSZTujF9ScuFkXBLkEXfG2eMkoww5ypG2Eri0HrU,3778
+deltacat/daft/model.py,sha256=6NaKkp9R0ruE0K2x-moyARNrQisswUl6TjMeA6YHtBM,9078
 deltacat/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/examples/basic_logging.py,sha256=IwUa-MwQbmH5vuzRvmz5NtfYXU2qNUID_0zkO5HlUZo,2826
 deltacat/examples/hello_world.py,sha256=hXpMUvJINB2qWTpV3QFPlRNu0uE31BvEs2sLyQ3CWZk,530
 deltacat/examples/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/examples/common/fixtures.py,sha256=MS0Hz1c__f9Axm3JgTajfWuMVeDAQmFmZ7KB7vz_1q4,430
 deltacat/examples/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deltacat/examples/iceberg/iceberg_bucket_writer.py,sha256=9i78x8WBgp-vvMBsvbCWkcRo6oEZ8SDtGfjMlNXAO30,4521
+deltacat/examples/iceberg/iceberg_bucket_writer.py,sha256=PdJG3jXcgPVds4UanfyNWB1egv-Os7LnZCPhdgv9Yyk,6586
 deltacat/examples/iceberg/iceberg_reader.py,sha256=mlF-277vT04at-2jibAjgRJG6Y-zle_NNy1-pXwS2YQ,5023
+deltacat/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+deltacat/experimental/daft/__init__.py,sha256=0d1SsgjbDher8TKgS0gSBBdy5TGi01fewiwpG0BMwck,108
+deltacat/experimental/daft/daft_catalog.py,sha256=112wDqqzdtxmtZVwiZW59MektbRsFMjSRgqYHrUOuok,8396
 deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/io/file_object_store.py,sha256=YoNL3Qla8uLOHaWnyBmIgotjSGAy3Td3Tumah0kk73Y,1868
 deltacat/io/memcached_object_store.py,sha256=C96t77-4BQe0XZ4vC76Ygi2o1POUoMN4t4BiyPmulz0,10997
@@ -158,7 +164,7 @@ deltacat/storage/model/delta.py,sha256=PhkjME0dItGgPd37SrQbI8VjQcIaYW2OfIq0KJKgD
 deltacat/storage/model/interop.py,sha256=CzXdu1NuJF5ER3IjQJztkNECD6MRDwbmMezlfN4SRH0,536
 deltacat/storage/model/list_result.py,sha256=5DpRAu-c0M48cHtKdTRPSgQiq2nCWfjAY8LOVqp5wxI,2703
 deltacat/storage/model/locator.py,sha256=Q16y-eGSQSZpDPKDYQhOjSA9c5ajwg1jLw_13MIB4SM,4707
-deltacat/storage/model/manifest.py,sha256=iV53LLQY83pDv9YwUqlyzjfLiqFHWuJf9J0dZdR7yO4,15153
+deltacat/storage/model/manifest.py,sha256=3I4Vohd-PnEQ5NdQu9yN3jvFchqnzb8hQ3bq6w_tO4E,16808
 deltacat/storage/model/metafile.py,sha256=UVWPvvYvA0tj_pM8ig7NKfVFrVWU4l3eDP7I2n9Upeg,53404
 deltacat/storage/model/namespace.py,sha256=gLli1V64O9RHIf-FesmqWA29Wi7P1kwt01uz5sDdJR0,2409
 deltacat/storage/model/partition.py,sha256=qNCvc74o_4pmFVL-FCyKCZMH4lHSjRO560sb3vaF_H0,20759
@@ -277,6 +283,7 @@ deltacat/tests/storage/main/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
 deltacat/tests/storage/main/test_main_storage.py,sha256=9dtsAcp9GZ4XQ5-8XhKnAcFF7upowJpTIuqZUB2EYig,58124
 deltacat/tests/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/storage/model/test_delete_parameters.py,sha256=RcNRMIed0zUzkX9tRXDoYPXHb7721OEt8viY9tpWXZM,822
+deltacat/tests/storage/model/test_manifest.py,sha256=udp9YUNvIBpnT-NutjMaF25abEQOXEcPkQm8Aay_UCs,3733
 deltacat/tests/storage/model/test_metafile_io.py,sha256=116U9aNJPzR0JS6iadJyyx0_4KyAi3D47WCNbndag6o,101639
 deltacat/tests/storage/model/test_schema.py,sha256=5m4BscbxbbOiry-lDI8j4vQcnvkG2Y-f0ZfshncPiSI,9599
 deltacat/tests/storage/model/test_shard.py,sha256=6QBr-ws3zQkJjjGyB7QEOhtNC5ql0cdjOPB2wxGNW3Q,755
@@ -323,7 +330,7 @@ deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
 deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
 deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
-deltacat/utils/daft.py,sha256=nd4XBKcZTFYxf_VH9jm-wqqbrIujKAeisCt2vVbW2BA,5807
+deltacat/utils/daft.py,sha256=RsOGzxI6ltsRcH6SfbK6PDBEaKyLZaUisCBXBlUvjbI,5770
 deltacat/utils/export.py,sha256=As5aiwOw9vLxtfolPLU0yak6W2RVR0rkuaYQ5YCy49U,1952
 deltacat/utils/filesystem.py,sha256=DthBgrVGzIcsQcGnyD3QYEQIpkYFxB19XmpF9DfCaeo,11709
 deltacat/utils/metafile_locator.py,sha256=_3yEW9n49jiEBuXHZmUKsFdYx6RxWWuS-Mu2gs_a1bw,2933
@@ -342,8 +349,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
 deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
 deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
 deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
-deltacat-2.0.0b3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deltacat-2.0.0b3.dist-info/METADATA,sha256=mRoST3kb94Civ8ipex9LlT7_BQ1Sz2vMbukcv10AT6g,2808
-deltacat-2.0.0b3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-deltacat-2.0.0b3.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
-deltacat-2.0.0b3.dist-info/RECORD,,
+deltacat-2.0.0b7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deltacat-2.0.0b7.dist-info/METADATA,sha256=qLbeNiyQZXKd0ZfQ0AZF9GqIXH8A64YGDUJulu0Rb2k,2809
+deltacat-2.0.0b7.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+deltacat-2.0.0b7.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
+deltacat-2.0.0b7.dist-info/RECORD,,

{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/LICENSE RENAMED Viewed

File without changes

{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/WHEEL RENAMED Viewed

File without changes

{deltacat-2.0.0b3.dist-info → deltacat-2.0.0b7.dist-info}/top_level.txt RENAMED Viewed

File without changes

deltacat 2.0.0b3__py3-none-any.whl → 2.0.0b7__py3-none-any.whl

deltacat 2.0.0b3py3-none-any.whl → 2.0.0b7py3-none-any.whl