PyPI - deltacat - Versions diffs - 2.0.0.post1__py3-none-any.whl → 2.0.0.post2__py3-none-any.whl - Mend

deltacat 2.0.0.post1py3-none-any.whl → 2.0.0.post2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

deltacat/__init__.py +1 -1
deltacat/api.py +44 -7
deltacat/catalog/main/impl.py +34 -110
deltacat/examples/hello_world.py +10 -4
deltacat/examples/indexer/indexer.py +3 -0
deltacat/examples/indexer/job_runner.py +6 -1
deltacat/storage/model/schema.py +17 -4
deltacat/tests/aws/test_s3u.py +9 -1
deltacat/tests/catalog/test_default_catalog_impl.py +198 -7
deltacat/types/media.py +282 -0
deltacat/types/tables.py +5 -11
deltacat/utils/pandas.py +11 -3
deltacat/utils/polars.py +3 -1
deltacat/utils/pyarrow.py +7 -3
deltacat/utils/url.py +22 -0
{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/METADATA +161 -47
{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/RECORD +20 -20
{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/WHEEL +0 -0
{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/licenses/LICENSE +0 -0
{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/top_level.txt +0 -0

deltacat/utils/url.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# Allow classes to use self-referencing Type hints in Python 3.7.
+from __future__ import annotations
 import functools
 import json
 from typing import Callable, List, Tuple, Any, Union, Optional
@@ -231,7 +234,18 @@ RAY_DATASTORE_TYPE_TO_WRITER = {
     ),
 }
+def _daft_binary_reader(url_path: str) -> daft.DataFrame:
+    df = daft.from_pydict({"url": [url_path]})
+    return df.with_column("data", df["url"].url.download())
 DAFT_DATASTORE_TYPE_TO_READER = {
+    DatastoreType.BINARY: lambda url: functools.partial(
+        _daft_binary_reader,
+        url.url_path,
+        **url.query_params,
+    ),
     DatastoreType.CSV: lambda url: functools.partial(
         daft.io.read_csv,
         url.url_path,
@@ -629,17 +643,25 @@ class DeltaCatUrl:
     avro+<scheme>://<path>?param1=val1&param2=val2&...
     binary+<scheme>://<path>?param1=val1&param2=val2&...
     csv+<scheme>://<path>?param1=val1&param2=val2&...
+    deltalake+<scheme>://<path>?param1=val1&param2=val2&...
     deltasharing+<scheme>://<path>?param1=val1&param2=val2&...
+    feather+<scheme>://<path>?param1=val1&param2=val2&...
+    hdf+<scheme>://<path>?param1=val1&param2=val2&...
+    html+<scheme>://<path>?param1=val1&param2=val2&...
     hudi+<scheme>://<path>?param1=val1&param2=val2&...
     images+<scheme>://<path>?param1=val1&param2=val2&...
     json+<scheme>://<path>?param1=val1&param2=val2&...
     lance+<scheme>://<path>?param1=val1&param2=val2&...
     numpy+<scheme>://<path>?param1=val1&param2=val2&...
+    orc+<scheme>://<path>?param1=val1&param2=val2&...
     parquet+<scheme>://<path>?param1=val1&param2=val2&...
     text+<scheme>://<path>?param1=val1&param2=val2&...
     tfrecords+<scheme>://<path>?param1=val1&param2=val2&...
+    text+<scheme>://<path>?param1=val1&param2=val2&...
+    warc+<scheme>://<path>?param1=val1&param2=val2&...
     videos+<scheme>://<path>?param1=val1&param2=val2&...
     webdataset+<scheme>://<path>?param1=val1&param2=val2&...
+    xml+<scheme>://<path>?param1=val1&param2=val2&...
     Some DeltaCAT URLs reference special types of external objects
     locatable via custom URLs that don't conform to the usual

{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deltacat
-Version: 2.0.0.post1
+Version: 2.0.0.post2
 Summary: DeltaCAT is a portable Pythonic Data Lakehouse powered by Ray.
 Home-page: https://github.com/ray-project/deltacat
 Author: Ray Team
@@ -57,6 +57,8 @@ DeltaCAT is a portable Pythonic Data Lakehouse powered by [Ray](https://github.c
 fast, scalable, ACID-compliant multimodal data lakes, and has been used to [successfully manage exabyte-scale enterprise
 data lakes](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
+It provides data lake level transactions & time travel, fast schema evolution for feature enrichment, zero-copy multimodal file processing, schemaless dataset management, and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads.
 It uses the Ray distributed compute framework together with [Apache Arrow](https://github.com/apache/arrow) and
 [Daft](https://github.com/Eventual-Inc/Daft) to efficiently scale common table management tasks, like petabyte-scale
 merge-on-read and copy-on-write operations.
@@ -79,10 +81,14 @@ Data consumers that prefer to stay within the ecosystem of Pythonic data managem
 ## Getting Started
 DeltaCAT applications run anywhere that Ray runs, including your local laptop, cloud computing cluster, or on-premise cluster.
-DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of one or more data files. A **Catalog** provides a root location (e.g., a local file path or S3 Bucket) to store table information, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
+DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake containing a set of **Tables**. It provides a root location (e.g., a local file path or S3 Bucket) to store table information, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
 ### Quick Start
+Install DeltaCAT with: `pip install deltacat`
+Then run this script to create and read your first table:
 ```python
 import deltacat as dc
 import pandas as pd
@@ -109,7 +115,7 @@ daft_df = dc.read("users")  # Returns Daft DataFrame (default)
 daft_df.show()  # Materialize and print the DataFrame
 # Append more data and add a new column.
-# Compaction and schema evolution are handled automatically.
+# Compaction and zero-copy schema evolution are handled automatically.
 data = pd.DataFrame({
     "id": [4, 5, 6],
     "name": ["Tom", "Simpkin", "Delta"],
@@ -129,7 +135,7 @@ DeltaCAT can do much more than just append data to tables and read it back again
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Replacing and Dropping Tables</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Idempotent Writes</span></summary>
 If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **appends** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
@@ -239,7 +245,7 @@ assert dc.dataset_length(daft_df) == 6
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Supported Dataset and File Formats</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Multi-Format Data Processing</span></summary>
 DeltaCAT natively supports a variety of open dataset and file formats already integrated with Ray and Arrow. You can use `dc.read` to read tables back as a Daft DataFrame, Ray Dataset, Pandas DataFrame, PyArrow Table, Polars DataFrame, NumPy Array, or list of PyArrow ParquetFile objects:
@@ -329,7 +335,7 @@ print("\n=== NumPy Table ===")
 dc.read("my_numpy_table").show()
 ```
-Or write to different table file formats:
+DeltaCAT tables also support persisting data in heterogeneous table file formats like Avro, ORC, or Feather:
 ```python
 data = pd.DataFrame({"id": [1], "name": ["Cheshire"], "age": [3]})
@@ -372,9 +378,9 @@ print(pandas_df)
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Merging and Deleting Data</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Live Feature Enrichment</span></summary>
-DeltaCAT can automatically merge and delete data by defining a table schema with one or more merge keys:
+DeltaCAT can update your datasets on-the-fly to keep up with a continuous stream of new insights, and support common ML use-cases like feature enrichment. Just define a table schema with one or more merge keys to start updating and deleting existing records:
 ```python
 import deltacat as dc
@@ -385,53 +391,50 @@ import tempfile
 # Initialize DeltaCAT with a fresh temporary catalog
 dc.init_local(tempfile.mkdtemp())
-# Define a schema with user_id as a merge key.
-schema = dc.Schema.of([
+# Start with minimal schema - just user_id as merge key and name
+initial_schema = dc.Schema.of([
     dc.Field.of(pa.field("user_id", pa.int64()), is_merge_key=True),
     dc.Field.of(pa.field("name", pa.string())),
-    dc.Field.of(pa.field("age", pa.int32())),
-    dc.Field.of(pa.field("status", pa.string())),
 ])
-# Initial user data
+# Initial user data - just basic info
 initial_users = pd.DataFrame({
     "user_id": [1, 2, 3],
-    "name": ["Cheshire", "Dinah", "Felix"],
-    "age": [3, 7, 2],
-    "status": ["active", "active", "inactive"]
+    "name": ["Jim", "Dinah", "Bob"],
 })
-# Write initial data with the merge key schema
-dc.write(initial_users, "users", schema=schema)
+# Write initial data with minimal schema
+dc.write(initial_users, "users", schema=initial_schema)
-# Read the data back as a Pandas DataFrame.
+# Read the data back as a Pandas DataFrame
 df = dc.read("users", read_as=dc.DatasetType.PANDAS)
-print("=== Initial Users ===")
+print("=== Initial Users (Basic Info) ===")
 print(df.sort_values("user_id"))
-# Update data for existing users + add new users
-updated_users = pd.DataFrame({
-    "user_id": [2, 3, 4, 5, 6],
-    "name": ["Dinah", "Felix", "Tom", "Simpkin", "Delta"],
-    "age": [7, 2, 5, 12, 4],
-    "status": ["premium", "active", "active", "active", "active"]
+# Later, enrich with new insights: add age/job features + new users
+enriched_data = pd.DataFrame({
+    "user_id": [1, 3, 4, 5, 6],
+    "name": ["Cheshire", "Felix", "Tom", "Simpkin", "Delta"],
+    "age": [3, 2, 5, 12, 4],
+    "job": ["Tour Guide", "Drifter", "Housekeeper", "Mouser", "Engineer"]
 })
-# Write automatically detects that the schema has a merge key and:
-# 1. Updates existing records with matching user IDs.
-# 2. Inserts new records with new user IDs.
-dc.write(updated_users, "users", schema=schema)
+# DeltaCAT automatically evolves the schema and merges by user_id:
+# 1. Enriches existing users (Jim -> Cheshire age=3, job="Tour Guide"; Bob -> Felix)
+# 2. Adds new age/job columns with automatic schema evolution
+# 3. Inserts new users (Tom, Simpkin, Delta) with full feature set
+dc.write(enriched_data, "users")
-# Read back to see merged results
+# Read back to see live feature enrichment results
 df = dc.read("users", read_as=dc.DatasetType.PANDAS)
-print("\n=== After Merge ===")
+print("\n=== Enriched Users (Age & Job) ===")
 print(df.sort_values("user_id"))
-# - Cheshire (user_id=1) remains unchanged
-# - Dinah (user_id=2) status updated to "premium"
-# - Felix (user_id=3) updated to "active"
-# - New users (4,5,6), (Tom, Simpkin, Delta) added
-# - No duplicate user_id values exist
+# - Cheshire (user_id=1) name updated from Jim, gets age=3, job="Tour Guide"
+# - Dinah (user_id=2) keeps original name, gets null age/job (missing features)
+# - Felix (user_id=3) name updated from Bob, gets age=2, job="Drifter"
+# - New users (4,5,6) added with complete feature set
+# - Schema automatically evolved to include age/job columns
 # Specify the users to delete.
 # We only need to specify matching merge key values.
@@ -440,7 +443,7 @@ users_to_delete = pd.DataFrame({
 })
 # Delete the records that match our merge keys.
-dc.write(users_to_delete, "users", schema=schema, mode=dc.TableWriteMode.DELETE)
+dc.write(users_to_delete, "users", mode=dc.TableWriteMode.DELETE)
 # Read the table back to confirm target users have been deleted.
 df = dc.read("users", read_as=dc.DatasetType.PANDAS)
@@ -456,6 +459,117 @@ print(df.sort_values("user_id"))
 <details>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Zero-Copy Multimodal URL Processing</span></summary>
+DeltaCAT can register and process existing multimodal datasets from local or remote URLs. This enables zero-copy distributed processing of images, audio, text, and other file formats:
+```python
+import deltacat as dc
+import pandas as pd
+import pyarrow as pa
+import tempfile
+import ray
+# Initialize DeltaCAT with a fresh temporary catalog
+dc.init_local(tempfile.mkdtemp())
+# Create dataset with DeltaCAT URLs pointing to existing files
+urls_df = pd.DataFrame({
+    "file_id": [1, 2, 3, 4, 5, 6],
+    "url": [
+        # URLs with common file extensions will have their content type inferred.
+        "https://picsum.photos/id/237/400/300.jpg",
+        "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv",
+        "https://raw.githubusercontent.com/SergLam/Audio-Sample-files/master/sample.mp3",
+        "https://raw.githubusercontent.com/burningtree/awesome-json/master/README.md",
+        "https://raw.githubusercontent.com/microsoft/vscode/main/package.json",
+        # URLs without common file extensions will be read as binary by default.
+        "https://picsum.photos/200"
+    ]
+})
+# Create empty table with merge key to efficiently add insights about each file
+dc.create_table(
+    "multimodal_files",
+    schema=dc.Schema.of([
+        dc.Field.of(pa.field("file_id", pa.int64()), is_merge_key=True),
+        dc.Field.of(pa.field("url", pa.string()))
+    ])
+)
+# Write URLs to DeltaCAT table
+dc.write(urls_df, "multimodal_files")
+# UDF to process each file in parallel using Ray Dataset map method
+def analyze_file(row):
+    file_id = row["file_id"]
+    url = row["url"]
+    # DeltaCAT automatically infers the right Ray Data reader for the URL
+    dataset = dc.get(url)
+    records = dataset.take_all()
+    url_type = dc.DatastoreType.from_url(url)
+    # Extract standard Ray Dataset fields for each file type
+    if url_type == dc.DatastoreType.IMAGES:
+        image = records[0]["image"]
+        analysis = f"Image {image.shape[1]}x{image.shape[0]} pixels"
+    elif url_type == dc.DatastoreType.CSV:
+        analysis = f"CSV with {len(records)} rows, {len(records[0].keys())} columns"
+    elif url_type == dc.DatastoreType.AUDIO:
+        sample_rate = records[0]["sample_rate"]
+        duration = len(records[0]["amplitude"][0]) / sample_rate
+        analysis = f"Audio {duration:.1f}s, {sample_rate}Hz"
+    elif url_type == dc.DatastoreType.JSON:
+        analysis = f"JSON with {len(records[0].keys())} fields"
+    elif url_type == dc.DatastoreType.TEXT:
+        analysis = f"Text with {len(records)} records"
+    else:
+        analysis = f"Binary with {len(records[0]['bytes'])} bytes"
+    return {"file_id": file_id, "analysis": analysis}
+# Read the multimodal_files table as a Ray Dataset
+ray_dataset = dc.read("multimodal_files", read_as=dc.DatasetType.RAY_DATASET)
+# Download and analyze each URL in parallel using map
+results_dataset = ray_dataset.map(analyze_file)
+# Write results back to the multimodal_files table
+dc.write(results_dataset, "multimodal_files", mode=dc.TableWriteMode.MERGE)
+# Read final results and compare to initial dataset
+print("\n=== Initial Dataset ===")
+print(dc.to_pandas(ray_dataset))
+print("\n=== Final Results with Analysis ===")
+print(dc.read("multimodal_files", read_as=dc.DatasetType.PANDAS))
+```
+The default dataset type used by `dc.get` is a Ray Dataset but, similar to `dc.read`, `dc.get` can also read URLs into other dataset types like Daft:
+```python
+import deltacat as dc
+# Create dataset with DeltaCAT URLs pointing to existing files
+urls = [
+    # URLs with common file extensions will have their content type inferred.
+    "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv",
+    "https://raw.githubusercontent.com/burningtree/awesome-json/master/README.md",
+    # URLs without common file extensions will be read as binary by default.
+    "https://picsum.photos/200"
+]
+# Download each URL into a Daft DataFrame serially
+for url in urls:
+    dataset = dc.get(url, read_as=dc.DatasetType.DAFT)
+    print(f"\n=== {url} ===")
+    print(dataset.show())
+```
+</details>
+<details>
 <summary><span style="font-size: 1.25em; font-weight: bold;">Organizing Tables with Namespaces</span></summary>
 In DeltaCAT, table **Namespaces** are optional but useful for organizing related tables within a catalog:
@@ -534,9 +648,9 @@ print(finance_df)
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Multi-Table Transactions</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Transactions</span></summary>
-DeltaCAT transactions can span multiple tables and namespaces. Since all operations within a transaction either succeed or fail together, this simplifies keeping related datasets in sync across your entire catalog.
+DeltaCAT transactions can span multiple tables and namespaces. Since transaction history is maintained at the catalog level, every transaction operates against a consistent snapshot of every object in your data lake. Since all operations within a transaction either succeed or fail together, this simplifies keeping related datasets in sync across your entire catalog.
 Consider the previous example that organized tables with namespaces. One table tracked customer orders, and another table tracked the lifetime payments of each customer. If one table was updated but not the other, then it would result in an accounting discrepancy. This edge case can be eliminated by using multi-table transactions:
@@ -630,7 +744,7 @@ print(dc.read("users", namespace="finance", read_as=dc.DatasetType.PANDAS))
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Working with Multiple Catalogs</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Managing Multiple Data Lakes</span></summary>
 DeltaCAT lets you work with multiple catalogs in a single application. All catalogs registered with DeltaCAT are tracked by a Ray Actor to make them available to all workers in your Ray application.
@@ -652,8 +766,8 @@ dc.init(catalogs={
         filesystem=pa.fs.LocalFileSystem()
     )),
     "prod": dc.Catalog(config=dc.CatalogProperties(
-        root=tempfile.mkdtemp(),  # Use temporary directory for prod
-        filesystem=pa.fs.LocalFileSystem()
+        root="s3://example/deltacat/",  # Use S3 for prod
+        filesystem=pa.fs.S3FileSystem()
     ))
 })
@@ -705,9 +819,9 @@ print(dc.read("financial_data", catalog="prod", read_as=dc.DatasetType.PANDAS))
 <details>
-<summary><span style="font-size: 1.25em; font-weight: bold;">Transaction History & Time Travel</span></summary>
+<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Time Travel</span></summary>
-DeltaCAT supports time travel queries that let you read all tables in a catalog as they existed at any point in the past. Combined with multi-table transactions, this enables consistent point-in-time views across your entire data catalog.
+DeltaCAT supports time travel queries that let you read all tables in a catalog as they existed at any point in the past. Combined with catalog-level transactions, this enables consistent point-in-time views across your entire data lake.
 ```python
 import deltacat as dc
@@ -847,7 +961,7 @@ print("\nTime travel validation successful!")
 <summary><span style="font-size: 1.25em; font-weight: bold;">Multimodal Batch Inference</span></summary>
-DeltaCAT's support for merging new fields into existing records and multimodal datasets can be used to build a multimodal batch inference pipeline. For example, the following code indexes images of cats, then merges in new fields with breed precitions predictions for each image:
+DeltaCAT's support for merging new fields into existing records and multimodal datasets can be used to build a multimodal batch inference pipeline. For example, the following code indexes images of cats, then merges in new fields with breed predictions for each image:
 > **Requirements**: This example requires PyTorch ≥ 2.8.0 and torchvision ≥ 0.23.0. Install via: `pip install torch>=2.8.0 torchvision>=0.23.0`
@@ -938,7 +1052,7 @@ final_df.show()
 <summary><span style="font-size: 1.25em; font-weight: bold;">LLM Batch Inference</span></summary>
-DeltaCAT multi-table transactions, time travel queries, and automatic schema evolution can be used to create auditable LLM batch inference pipelines. For example, the following code tries different approaches to analyze the overall tone of customer feedback, then generates customer service responses based on the analysis:
+DeltaCAT multi-table transactions, data lake time travel, and automatic schema evolution can be used to create auditable LLM batch inference pipelines. For example, the following code tries different approaches to analyze the overall tone of customer feedback, then generates customer service responses based on the analysis:
 ```python
 import deltacat as dc

{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-deltacat/__init__.py,sha256=mTb9CK1GTZuGmALUxQtr717n6eUNusG9tbcPyTpXNI8,4452
+deltacat/__init__.py,sha256=8oHmukh7qFhVfUT89l4zBtonbu_wsoj2hJsPaka0PoA,4452
 deltacat/annotations.py,sha256=9lBi34DpIV_RPjCCK2Aiz_6nMyd-e-_CfQ1XtdRQQlM,1196
-deltacat/api.py,sha256=MwCB60tWzEru-Jv1tTWcxYWuID3e5GbCy1jwn4XiDXs,20497
+deltacat/api.py,sha256=W7u3jeKZsvTWNi9zkhOC2O6BMwZ3TvMRbnbpcn6lFBo,21940
 deltacat/constants.py,sha256=HPE3SbK1-LRjtTu3OKD9s4N__LWMwj3xFP2N3Qy8fzM,4701
 deltacat/env.py,sha256=BJdTt8od3IVR4RMLjBxy4oRUHM7Lb16AzMOz8-hpwOI,2303
 deltacat/exceptions.py,sha256=dqZizcMKC3VwO7EgHXdAC4YUivBKVJgNwQLibMP93MA,16051
@@ -23,7 +23,7 @@ deltacat/catalog/__init__.py,sha256=lsu9N2G6P6HkyvrIpGY34SVkJM8-lwVaNfZanNTRjAc,
 deltacat/catalog/delegate.py,sha256=RDOQHaYvpvwc3RTZNaJhv00yXV1WHgE8YcD4i19H6g0,26870
 deltacat/catalog/interface.py,sha256=rmJSVi8dNORVa0ydzRFRwMcbpXwhDjYEpGAIGi-4O08,18486
 deltacat/catalog/main/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deltacat/catalog/main/impl.py,sha256=lztAIZ4MRGC2RFtGpbDwJdZgIWPuooX8JW_zG9tyzVk,103157
+deltacat/catalog/main/impl.py,sha256=V-JeQkAIQOVwbupJ_-2sSMUQ5P12crcWF3qArFVHMg8,100224
 deltacat/catalog/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/catalog/model/catalog.py,sha256=gvfczu9yhvDIjPjx5ZE69IUu1I_nhZOHURsOAakzhcQ,12765
 deltacat/catalog/model/properties.py,sha256=Bt7JgmG9UQD9ABqrCXniGrbRWpYWbini9ZCY8dBhifU,5416
@@ -132,7 +132,7 @@ deltacat/docs/autogen/schema/inference/generate_type_mappings.py,sha256=ZH30xcsA
 deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py,sha256=_N37jw5nmNlf40V2mOjDcXdJNhm1qoEa_fQdz_XRk1c,28929
 deltacat/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/examples/basic_logging.py,sha256=Umrum-gvY3gJjDNJ4hOMslMMq9bzeTM-s_DO4dGqJiw,2833
-deltacat/examples/hello_world.py,sha256=FvxkEDB1qVPJv55Fe1I7Coy0VLYJIisU7ZFYYkw9U2g,525
+deltacat/examples/hello_world.py,sha256=dm4GNvNL_HElPtE50sZzaZFrV48BcRL89nZp9SnLSIw,799
 deltacat/examples/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/examples/compactor/bootstrap.py,sha256=6BXDWsvH3QuSDmd31Wc0I4_qLy9lZTW4_029MGRslzA,35126
 deltacat/examples/compactor/compactor.py,sha256=_FbM9paIly4JK3FYP3t5nDPNL98I6K9UbhidachNaAE,12431
@@ -155,8 +155,8 @@ deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py,sha256=N
 deltacat/examples/experimental/iceberg/converter/beam/utils/common.py,sha256=wrUk-8sojz4sudZPMzCHyNVLsw1opBg23C9_q6z8AhA,6388
 deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py,sha256=CAMzNgeDDt4UKVTnUCEu8oRTB57rjBUwK6MxLLO3GBA,10046
 deltacat/examples/indexer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deltacat/examples/indexer/indexer.py,sha256=A5gfWomVtfu9uRrAB6UbitQ158ZFkviUscSveri1ggs,6334
-deltacat/examples/indexer/job_runner.py,sha256=M3ZKTcPL5gy1u_E3aZPal2u5INWZlBGDrsRhb5-F7n4,5921
+deltacat/examples/indexer/indexer.py,sha256=7SqMfzte-PzSKcOsVQ9k-F9dODio70yzU0M0S3CldH8,6553
+deltacat/examples/indexer/job_runner.py,sha256=Xwm6raw-Bx_Gq-8uMcw8ohdja2L6HgDlBLwCWDsRnbg,6398
 deltacat/examples/indexer/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/examples/indexer/gcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -251,7 +251,7 @@ deltacat/storage/model/manifest.py,sha256=NnriTMm6waxixsjykfTABh4EWf985tA-A19AA2
 deltacat/storage/model/metafile.py,sha256=g-dgFX5fmW51EGhjiN5jpHR1LTOhf0jhUIMBapRD1Rw,58619
 deltacat/storage/model/namespace.py,sha256=9V1Qj232uc_UrVzZPIRzXyeYhJOYZ25wPLCx15-dx1Y,2630
 deltacat/storage/model/partition.py,sha256=UcHcBQV0Kf_RnVIFzoYQo8MUdOrjZdWsaiGUv8FKXx4,24298
-deltacat/storage/model/schema.py,sha256=jF8LvLvXUDI7pE2kv-LgGvGAuWRWC75JekqcS7147lc,122416
+deltacat/storage/model/schema.py,sha256=bGtrm3xB0cr20HRICcRE6vQ5JKaTTrPGKIQ2cPaaWC8,122763
 deltacat/storage/model/shard.py,sha256=boPOW45bwLwBazfXZpa3-C5SUSlgelpHf8Yl6357Bq0,1575
 deltacat/storage/model/sort_key.py,sha256=68TJavprndKLESnWfCjXaeMwFE6tcq3ZVOHloE9rV6Q,7287
 deltacat/storage/model/stream.py,sha256=VJgqVy4NS6IHLBLRf5OyehldZbIrarqGZYN07XF4Yp4,12609
@@ -285,10 +285,10 @@ deltacat/tests/_io/reader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
 deltacat/tests/_io/reader/test_deltacat_read_api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
-deltacat/tests/aws/test_s3u.py,sha256=kL3cL37d-myF_NE0oP3SVTkhEt9yrqmJI83Xhr-i74Q,6869
+deltacat/tests/aws/test_s3u.py,sha256=M27w8BDbv638ReYtr5kA2eXcd3xJmRwfOYxLV0tax_s,7268
 deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/catalog/test_catalogs.py,sha256=MFS_fISQq7VGzjaVVM1K9O_QSE-02SN0gHwUtQ7m-HU,11696
-deltacat/tests/catalog/test_default_catalog_impl.py,sha256=ISlJcuBVkYxUDsZuJoH1FyTeWtb7espbcFvcbY9OZ-o,468810
+deltacat/tests/catalog/test_default_catalog_impl.py,sha256=_YiLAHCxECd_lQKaFu22qlH1dDWMQmxo6qRkpxuxPrI,476575
 deltacat/tests/catalog/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/catalog/main/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py,sha256=XvMoW1yjjoIs4-0A8_dqeY7ArysN6HJkSSHk7JnHeUI,4313
@@ -404,9 +404,9 @@ deltacat/tests/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 deltacat/tests/utils/ray_utils/test_concurrency.py,sha256=TjZpX0cjMDEIS79p_--j_BfT0zXKNkTLY1ZzNokBTs0,1211
 deltacat/tests/utils/ray_utils/test_dataset.py,sha256=glfihM4FBqqIWcW5SdU-SYqhmeMIPfl8Krfzj0oEviI,6418
 deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deltacat/types/media.py,sha256=8V02OmOhkoWODaB2M6fiJy4FBcVzODuynR4QHtEHbTg,12283
+deltacat/types/media.py,sha256=yIWs6Wcb00bnZeQvwdpQoXYbjvw4BL81dWZU50XE23g,22317
 deltacat/types/partial_download.py,sha256=QIpNTSwaiZ4TVl4A1N4PtblevKT5GwdXtGrouQMQs1E,2510
-deltacat/types/tables.py,sha256=xAS_XBIOLr_Lp0C7kPD8Lk-ubDh6h-ZZtCFZLh-9vMs,85384
+deltacat/types/tables.py,sha256=SrnPQB2-VFxkUSSRIDnImVhV39OnFJYHpZ3yH9Y3BI8,85083
 deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/utils/arguments.py,sha256=WzEjt8N4rXE5Vkcirz18ppZguBENxYm8F8m97LshV1Y,2162
 deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
@@ -417,23 +417,23 @@ deltacat/utils/filesystem.py,sha256=jQ_vY0lBJcSuKqSOjSwB7q-s52ckYhJSvnGT_aYZvUc,
 deltacat/utils/metafile_locator.py,sha256=AJ6o2V5Cc7rJE89wWyKmsFIWOxmGM2APs8DCynmuTjg,2984
 deltacat/utils/metrics.py,sha256=HYKyZSrtVLu8gXezg_TMNUKJp4h1WWI0VEzn0Xlzf-I,10778
 deltacat/utils/numpy.py,sha256=tgq4j_9q9bERxsr0-h3t55BrciS2ivr1AZe7R1DldkA,5524
-deltacat/utils/pandas.py,sha256=4C9cdGDDUP9SSytjtgSdkhivrmmF43TwxN2qJhxsBKg,31226
+deltacat/utils/pandas.py,sha256=v8wS_pArpyVJ1p3oYFs2uy6qt38httUYXfVHdAGlPso,31418
 deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
 deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
-deltacat/utils/polars.py,sha256=iYmgGRWrCjPEqwrf8bFY-oIjKfvo7jZar_GRagYxhTg,28838
-deltacat/utils/pyarrow.py,sha256=YwtYrWCWzEVFipQ-oE7lHpCYjJTdJujl9sPub8xtLYo,74233
+deltacat/utils/polars.py,sha256=-_6CGDhhZ_g8Z2sTc_GBWvHSNXNWOszitxYeckrOQ9g,28906
+deltacat/utils/pyarrow.py,sha256=i3__I5c1UCEjG8N1i2szWc2vHYUM4Hz1T1g1-mfQYfw,74410
 deltacat/utils/reader_compatibility_mapping.py,sha256=fZcNdw4kamkQF-ZzvBC4Zp_sbjxp0yOVIhLgV6V2Ee8,91409
 deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
 deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
-deltacat/utils/url.py,sha256=Meg4PQGzd_NQa966O0bcdyhalUoZ6-lt_A2g6suuqfI,44832
+deltacat/utils/url.py,sha256=H9L6Pgr8MNtUqFBwPAygogbZ2mZ1Het308J7mu0kpyQ,45690
 deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
 deltacat/utils/ray_utils/concurrency.py,sha256=Ceui6nQYKHTUOTltHNQIdb0OWHFhD73o8DhSXP-DYRQ,5457
 deltacat/utils/ray_utils/dataset.py,sha256=5RnVqFlKoZ6zabnQfjfXAKWuXDMKvLp4eNcDgpFj3OM,6480
 deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
 deltacat/utils/ray_utils/runtime.py,sha256=cf5koY9q4TzRg--BjPtC6y0jztq45F39KcC4K6Wmg4w,6946
-deltacat-2.0.0.post1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deltacat-2.0.0.post1.dist-info/METADATA,sha256=RPLgM5MfzkMaH55vich_RCULmhvzmDi_2Zq6uK3-5aA,46952
-deltacat-2.0.0.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-deltacat-2.0.0.post1.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
-deltacat-2.0.0.post1.dist-info/RECORD,,
+deltacat-2.0.0.post2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deltacat-2.0.0.post2.dist-info/METADATA,sha256=lIEs05JT0ZWkYJoM2JPh77nkvFhgVQ2yyieQPxA0Ofo,52106
+deltacat-2.0.0.post2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+deltacat-2.0.0.post2.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
+deltacat-2.0.0.post2.dist-info/RECORD,,

{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/WHEEL RENAMED Viewed

File without changes

{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deltacat-2.0.0.post1.dist-info → deltacat-2.0.0.post2.dist-info}/top_level.txt RENAMED Viewed

File without changes

deltacat 2.0.0.post1__py3-none-any.whl → 2.0.0.post2__py3-none-any.whl

deltacat 2.0.0.post1py3-none-any.whl → 2.0.0.post2py3-none-any.whl