PyPI - pyspiral - Versions diffs - 0.8.9__cp311-abi3-macosx_11_0_arm64.whl → 0.9.9__cp311-abi3-macosx_11_0_arm64.whl - Mend

pyspiral 0.8.9__cp311-abi3-macosx_11_0_arm64.whl → 0.9.9__cp311-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/METADATA +4 -2
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/RECORD +39 -34
spiral/__init__.py +3 -2
spiral/_lib.abi3.so +0 -0
spiral/api/__init__.py +7 -0
spiral/api/client.py +86 -8
spiral/api/projects.py +4 -2
spiral/api/tables.py +77 -0
spiral/arrow_.py +4 -155
spiral/cli/app.py +10 -4
spiral/cli/chooser.py +30 -0
spiral/cli/fs.py +3 -2
spiral/cli/iceberg.py +1 -1
spiral/cli/key_spaces.py +4 -4
spiral/cli/orgs.py +1 -1
spiral/cli/projects.py +2 -2
spiral/cli/tables.py +47 -20
spiral/cli/telemetry.py +13 -6
spiral/cli/text.py +4 -4
spiral/cli/transactions.py +84 -0
spiral/cli/{types.py → types_.py} +6 -6
spiral/cli/workloads.py +4 -4
spiral/client.py +70 -8
spiral/core/client/__init__.pyi +25 -16
spiral/core/table/__init__.pyi +24 -22
spiral/debug/manifests.py +21 -9
spiral/debug/scan.py +4 -6
spiral/demo.py +145 -38
spiral/enrichment.py +18 -23
spiral/expressions/__init__.py +3 -75
spiral/expressions/base.py +5 -10
spiral/huggingface.py +456 -0
spiral/input.py +131 -0
spiral/ray_.py +75 -0
spiral/scan.py +218 -64
spiral/table.py +5 -4
spiral/transaction.py +95 -15
spiral/iterable_dataset.py +0 -106
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/WHEEL +0 -0
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/entry_points.txt +0 -0

spiral/input.py ADDED Viewed

@@ -0,0 +1,131 @@
+import builtins
+from typing import TYPE_CHECKING, TypeAlias, Union
+import numpy as np
+import pyarrow as pa
+from spiral import arrow_
+if TYPE_CHECKING:
+    import pandas as pd
+ArrayLike: TypeAlias = Union[pa.Array, pa.ChunkedArray, builtins.list, np.ndarray, "pd.Series"]
+TableLike: TypeAlias = Union[
+    pa.Table,
+    pa.RecordBatch,
+    pa.RecordBatchReader,
+    pa.StructArray,
+    pa.ChunkedArray,  # must be of struct type
+    builtins.list[dict],  # list of objects, each element is a row
+    dict[str, ArrayLike],  # dot-separated field names are nested
+    "pd.DataFrame",
+]
+def evaluate(table: TableLike) -> pa.RecordBatchReader:
+    if isinstance(table, pa.RecordBatchReader):
+        return table
+    if isinstance(table, pa.Table):
+        return table.to_reader()
+    if isinstance(table, pa.RecordBatch):
+        return pa.RecordBatchReader.from_batches(table.schema, [table])
+    if isinstance(table, pa.StructArray):
+        return pa.Table.from_struct_array(table).to_reader()
+    if isinstance(table, pa.ChunkedArray):
+        if not pa.types.is_struct(table.type):
+            raise ValueError(f"Arrow ChunkedArray must have a struct type, got {table.type}.")
+        struct_type: pa.StructType = table.type  # type: ignore[assignment]
+        def _iter_batches():
+            for chunk in table.chunks:
+                chunk: pa.StructArray
+                yield pa.RecordBatch.from_struct_array(chunk)
+        return pa.RecordBatchReader.from_batches(pa.schema(struct_type.fields), _iter_batches())
+    if isinstance(table, pa.Array):
+        raise ValueError(f"Arrow Array must be a struct array, got {type(table)}.")
+    if isinstance(table, builtins.list):
+        # Handle empty array case
+        if len(table) == 0:
+            return pa.RecordBatchReader.from_batches(pa.schema([]), [])
+        return evaluate(pa.array(table))
+    if isinstance(table, dict):
+        table: dict = dot_separated_dict_to_nested(table)
+        return evaluate(_evaluate_dict(table))
+    try:
+        import pandas as pd
+        if isinstance(table, pd.DataFrame):
+            return evaluate(pa.Table.from_pandas(table))
+    except ImportError:
+        pass
+    raise TypeError(f"Unsupported table-like type: {type(table)}")
+def _evaluate_dict(table: dict) -> pa.StructArray:
+    """Handle dot-separated field names as nested dictionaries."""
+    table = dot_separated_dict_to_nested(table)
+    return _dict_to_struct_array(table)
+def _dict_to_struct_array(table) -> pa.StructArray:
+    data = {}
+    for key, value in table.items():
+        data[key] = _evaluate_array_like(value) if not isinstance(value, dict) else _dict_to_struct_array(value)
+    return arrow_.dict_to_struct_array(data)
+def _evaluate_array_like(array: ArrayLike) -> pa.Array:
+    if isinstance(array, pa.Array):
+        return array
+    if isinstance(array, pa.ChunkedArray):
+        return array.combine_chunks()
+    if isinstance(array, np.ndarray):
+        return _evaluate_array_like(pa.array(array))
+    if isinstance(array, builtins.list):
+        return _evaluate_array_like(pa.array(array))
+    try:
+        import pandas as pd
+        if isinstance(array, pd.Series):
+            return _evaluate_array_like(pa.Array.from_pandas(array))
+    except ImportError:
+        pass
+    raise TypeError(f"Unsupported array-like type: {type(array)}")
+def dot_separated_dict_to_nested(expr: dict) -> dict:
+    """Handle dot-separated field names as nested dictionaries."""
+    data = {}
+    for name in expr.keys():
+        if "." not in name:
+            if name in data:
+                raise KeyError(f"Conflicting field name: {name}")
+            data[name] = expr[name]
+            continue
+        parts = name.split(".")
+        child_data = data
+        for part in parts[:-1]:
+            if part not in child_data:
+                child_data[part] = {}
+            if not isinstance(child_data[part], dict):
+                raise KeyError(f"Conflicting field name: {name}")
+            child_data = child_data[part]
+        if parts[-1] in child_data:
+            raise KeyError(f"Conflicting field name: {name}")
+        child_data[parts[-1]] = expr[name]
+    return data

spiral/ray_.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+from collections.abc import Iterable
+from typing import TYPE_CHECKING
+import pyarrow as pa
+import ray
+from ray.data.block import Block
+from ray.data.datasource.datasink import WriteResult
+from spiral import Spiral, Transaction
+from spiral.core.config import ClientSettings
+from spiral.transaction import TransactionOps
+from spiral.types_ import Timestamp
+if TYPE_CHECKING:
+    from ray.data._internal.execution.interfaces import TaskContext
+# TODO(DK): we should just ship the serde bytes not JSON-serialized strings.
+class Datasink(ray.data.Datasink[tuple[Timestamp, list[str]]]):
+    def __init__(self, txn: Transaction):
+        super().__init__()
+        self._table_id: str = txn.table.table_id
+        self._spiral_config_json = txn.table.spiral.config.to_json()
+        self._txn = txn
+    def __getstate__(self):
+        state = dict(self.__dict__)
+        state["_txn"] = None  # do not serialize the transaction
+        return state
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+    def on_write_complete(self, write_result: WriteResult[TransactionOps]):
+        assert self._txn is not None  # on_write_complete happens on the driver
+        for tx_ops in write_result.write_returns:
+            self._txn.include(tx_ops)
+    def on_write_failed(self, error: Exception):
+        pass
+    def on_write_start(self, schema: pa.Schema | None = None):
+        pass
+    def write(
+        self,
+        blocks: Iterable[Block],
+        ctx: TaskContext,
+    ) -> TransactionOps:
+        assert self._txn is None  # writes happen on workers
+        import pyarrow
+        sp = Spiral(config=ClientSettings.from_json(self._spiral_config_json))
+        # Do *not* use a context manager and do *not* call commit/abort.
+        # We instead `take` and send the operations to the driver node.
+        txn = sp.table(self._table_id).txn()
+        for block in blocks:
+            if not isinstance(block, pyarrow.Table):
+                try:
+                    import pandas as pd
+                    assert isinstance(block, pd.DataFrame)
+                    block = pyarrow.Table.from_pandas(block)
+                except ImportError:
+                    raise TypeError(f"Expected block to be a pyarrow.Table or pandas.DataFrame, got {type(block)}")
+            txn.write(block)
+        return txn.take()

spiral/scan.py CHANGED Viewed

@@ -1,18 +1,19 @@
-from functools import partial
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, TypedDict, cast
 import pyarrow as pa
+from typing_extensions import Unpack
 from spiral.core.client import Shard, ShuffleConfig
 from spiral.core.table import Scan as CoreScan
 from spiral.core.table.spec import Schema
-from spiral.settings import CI, TEST
+from spiral.input import TableLike, evaluate
 if TYPE_CHECKING:
     import dask.dataframe as dd
     import datasets.iterable_dataset as hf  # noqa
     import pandas as pd
     import polars as pl
+    import ray.data
     import streaming  # noqa
     import torch.utils.data as torchdata  # noqa
@@ -20,6 +21,25 @@ if TYPE_CHECKING:
     from spiral.dataloader import SpiralDataLoader, World  # noqa
+class ExecuteKwargs(TypedDict):
+    shards: list[Shard] | None
+    key_table: pa.Table | pa.RecordBatchReader | None
+    batch_readahead: int | None
+    batch_aligned: bool | None
+    hide_progress_bar: bool | None
+class DistributedExecuteKwargs(TypedDict):
+    shards: list[Shard] | None
+    batch_readahead: int | None
+    hide_progress_bar: bool | None
+class _PoppedDistributedExecuteKwargs(TypedDict):
+    batch_readahead: int | None
+    hide_progress_bar: bool | None
 class Scan:
     """Scan object."""
@@ -54,10 +74,10 @@ class Scan:
         self,
         *,
         shards: list[Shard] | None = None,
-        key_table: pa.Table | pa.RecordBatchReader | None = None,
-        batch_size: int | None = None,
+        key_table: TableLike | None = None,
         batch_readahead: int | None = None,
-        hide_progress_bar: bool = False,
+        batch_aligned: bool | None = None,
+        hide_progress_bar: bool | None = None,
     ) -> pa.RecordBatchReader:
         """Read as a stream of RecordBatches.
@@ -67,32 +87,38 @@ class Scan:
                 Must not be provided together with key_table.
             key_table: a table of keys to "take" (including aux columns for cell-push-down).
                 If None, the scan will be executed without a key table.
-            batch_size: the maximum number of rows per returned batch.
-                This is currently only respected when the key_table is used. If key table is a
-                RecordBatchReader, the batch_size argument must be None, and the existing batching is respected.
             batch_readahead: the number of batches to prefetch in the background.
+            batch_aligned: if True, ensures that batches are aligned with key_table batches.
+                The stream will yield batches that correspond exactly to the batches in key_table,
+                but may be less efficient and use more memory (aligning batches requires buffering and maybe a copy).
+                Must only be used when key_table is provided.
             hide_progress_bar: If True, disables the progress bar during reading.
         """
-        if isinstance(key_table, pa.RecordBatchReader):
-            if batch_size is not None:
-                raise ValueError(
-                    "batch_size must be None when key_table is a RecordBatchReader, the existing batching is respected."
-                )
-        elif isinstance(key_table, pa.Table):
-            key_table = key_table.to_reader(max_chunksize=batch_size)
+        batch_aligned = False if batch_aligned is None else batch_aligned
+        hide_progress_bar = False if hide_progress_bar is None else hide_progress_bar
+        if key_table is not None:
+            key_table = evaluate(key_table)
+        # NOTE(marko): Uncomment for better debuggability.
+        # rb: pa.RecordBatch = self.core.to_record_batch(shards=shards, key_table=key_table)
+        # return pa.RecordBatchReader.from_batches(rb.schema, [rb])
         return self.core.to_record_batches(
-            shards=shards, key_table=key_table, batch_readahead=batch_readahead, progress=(not hide_progress_bar)
+            shards=shards,
+            key_table=key_table,
+            batch_readahead=batch_readahead,
+            batch_aligned=batch_aligned,
+            hide_progress_bar=hide_progress_bar,
         )
     def to_unordered_record_batches(
         self,
         *,
         shards: list[Shard] | None = None,
-        key_table: pa.Table | pa.RecordBatchReader | None = None,
-        batch_size: int | None = None,
+        key_table: TableLike | None = None,
         batch_readahead: int | None = None,
-        hide_progress_bar: bool = False,
+        hide_progress_bar: bool | None = None,
     ) -> pa.RecordBatchReader:
         """Read as a stream of RecordBatches, NOT ordered by key.
@@ -102,34 +128,43 @@ class Scan:
                 Must not be provided together with key_table.
             key_table: a table of keys to "take" (including aux columns for cell-push-down).
                 If None, the scan will be executed without a key table.
-            batch_size: the maximum number of rows per returned batch.
-                This is currently only respected when the key_table is used. If key table is a
-                RecordBatchReader, the batch_size argument must be None, and the existing batching is respected.
             batch_readahead: the number of batches to prefetch in the background.
             hide_progress_bar: If True, disables the progress bar during reading.
         """
-        if isinstance(key_table, pa.RecordBatchReader):
-            if batch_size is not None:
-                raise ValueError(
-                    "batch_size must be None when key_table is a RecordBatchReader, the existing batching is respected."
-                )
-        elif isinstance(key_table, pa.Table):
-            key_table = key_table.to_reader(max_chunksize=batch_size)
+        hide_progress_bar = False if hide_progress_bar is None else hide_progress_bar
+        if key_table is not None:
+            key_table = evaluate(key_table)
         return self.core.to_unordered_record_batches(
-            shards=shards, key_table=key_table, batch_readahead=batch_readahead, progress=(not hide_progress_bar)
+            shards=shards,
+            key_table=key_table,
+            batch_readahead=batch_readahead,
+            hide_progress_bar=hide_progress_bar,
         )
-    def to_table(self, **kwargs) -> pa.Table:
-        """Read into a single PyArrow Table."""
-        # NOTE: Evaluates fully on Rust side which improved debuggability.
-        if TEST and not CI:
-            rb = self.core.to_record_batch(**kwargs)
-            return pa.Table.from_batches([rb])
+    def to_table(self, **kwargs: Unpack[ExecuteKwargs]) -> pa.Table:
+        """Read into a single PyArrow Table.
+        Warnings:
+            This downloads the entire Spiral Table into memory on this machine.
+        Args:
+            shards: Optional list of shards to evaluate.
+                If provided, only the specified shards will be read.
+                Must not be provided together with key_table.
+            key_table: a table of keys to "take" (including aux columns for cell-push-down).
+                If None, the scan will be executed without a key table.
+            batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
+        Returns:
+            pyarrow.Table
+        """
         return self.to_record_batches(**kwargs).read_all()
-    def to_dask(self) -> "dd.DataFrame":
+    def to_dask(self, **kwargs: Unpack[DistributedExecuteKwargs]) -> "dd.DataFrame":
         """Read into a Dask DataFrame.
         Requires the `dask` package to be installed.
@@ -137,31 +172,126 @@ class Scan:
         Dask execution has some limitations, e.g. UDFs are not currently supported. These limitations
         usually manifest as serialization errors when Dask workers attempt to serialize the state. If you are
         encountering such issues, please reach out to the support for assistance.
+        Args:
+            shards: Optional list of shards to evaluate.
+                If provided, only the specified shards will be read.
+            batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
+        Returns:
+            dask.dataframe.DataFrame
         """
         import dask.dataframe as dd
-        _read_shard = partial(
-            _read_shard_task,
-            config_json=self.spiral.config.to_json(),
-            state_json=self.core.plan_state().to_json(),
-        )
-        return dd.from_map(_read_shard, self.shards())
+        config_json = self.spiral.config.to_json()
+        state_bytes = self.core.plan_context().to_bytes_compressed()
+        shards = kwargs.pop("shards", None) or self.shards()
+        task_kwargs = cast(_PoppedDistributedExecuteKwargs, kwargs)
+        def _read_shard(shard: Shard) -> "pd.DataFrame":
+            arrow_table = _read_shard_task(
+                shard,
+                config_json=config_json,
+                state_bytes=state_bytes,
+                **task_kwargs,
+            )
+            return arrow_table.to_pandas()
+        return dd.from_map(_read_shard, shards)
-    def to_pandas(self, **kwargs) -> "pd.DataFrame":
+    def to_ray_dataset(self, **kwargs: Unpack[DistributedExecuteKwargs]) -> "ray.data.Dataset":
+        """Read into a Ray Dataset.
+        Requires the `ray` package to be installed.
+        Warnings:
+            If the Scan returns zero rows, the resulting Ray Dataset will have [an empty
+            schema](https://github.com/ray-project/ray/issues/59946).
+        Args:
+            shards: Optional list of shards to evaluate.
+                If provided, only the specified shards will be read.
+            batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
+        Returns:
+            ray.data.Dataset: A Ray Dataset distributed across shards.
+        """
+        import ray
+        config_json = self.spiral.config.to_json()
+        state_bytes = self.core.plan_context().to_bytes_compressed()
+        shards = kwargs.pop("shards", None) or self.shards()
+        task_kwargs = cast(_PoppedDistributedExecuteKwargs, kwargs)
+        read_shard_remote = ray.remote(_read_shard_task)
+        refs = [
+            read_shard_remote.remote(
+                shard,
+                config_json=config_json,
+                state_bytes=state_bytes,
+                **task_kwargs,
+            )
+            for shard in shards
+        ]
+        return ray.data.from_arrow_refs(refs)
+    def to_pandas(self, **kwargs: Unpack[ExecuteKwargs]) -> "pd.DataFrame":
         """Read into a Pandas DataFrame.
         Requires the `pandas` package to be installed.
+        Warnings:
+            This downloads the entire Spiral Table into memory on this machine.
+        Args:
+            shards: Optional list of shards to evaluate.
+                If provided, only the specified shards will be read.
+                Must not be provided together with key_table.
+            key_table: a table of keys to "take" (including aux columns for cell-push-down).
+                If None, the scan will be executed without a key table.
+            batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
+        Returns:
+            pandas.DataFrame
         """
         return self.to_record_batches(**kwargs).read_all().to_pandas()
-    def to_polars(self, **kwargs) -> "pl.DataFrame":
+    def to_polars(self, **kwargs: Unpack[ExecuteKwargs]) -> "pl.DataFrame":
         """Read into a Polars DataFrame.
         Requires the `polars` package to be installed.
+        Warnings:
+            This downloads the entire Spiral Table into memory on this machine. To lazily interact
+            with a Spiral Table try Table.to_polars_lazy_frame.
+        Args:
+            shards: Optional list of shards to evaluate.
+                If provided, only the specified shards will be read.
+                Must not be provided together with key_table.
+            key_table: a table of keys to "take" (including aux columns for cell-push-down).
+                If None, the scan will be executed without a key table.
+            batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
+        Returns:
+            polars.DataFrame
         """
         import polars as pl
-        return pl.from_arrow(self.to_record_batches(**kwargs))
+        df = pl.from_arrow(self.to_record_batches(**kwargs))
+        assert isinstance(df, pl.DataFrame)
+        return df
     def to_data_loader(
         self, seed: int = 42, shuffle_buffer_size: int = 0, batch_size: int = 32, **kwargs
@@ -186,7 +316,7 @@ class Scan:
     def to_distributed_data_loader(
         self,
-        world: Optional["World"] = None,
+        world: "World | None" = None,
         shards: list[Shard] | None = None,
         seed: int = 42,
         shuffle_buffer_size: int = 0,
@@ -315,7 +445,8 @@ class Scan:
                 If None, no shuffling is performed.
             batch_readahead: Controls how many batches to read ahead concurrently.
                 If pipeline includes work after reading (e.g. decoding, transforming, ...) this can be set higher.
-                Otherwise, it should be kept low to reduce next batch latency. Defaults to 2.
+                Otherwise, it should be kept low to reduce next batch latency.
+                Defaults to min(number of CPU cores, 64) or to shuffle.buffer_size/16 if shuffle is not None.
             infinite: If True, the returned IterableDataset will loop infinitely over the data,
                 re-shuffling ranges after exhausting all data.
         """
@@ -326,7 +457,7 @@ class Scan:
             infinite=infinite,
         )
-        from spiral.iterable_dataset import to_iterable_dataset
+        from spiral.huggingface import to_iterable_dataset
         return to_iterable_dataset(stream)
@@ -342,15 +473,15 @@ class Scan:
         """
         return self.core.shards()
-    def state_json(self) -> str:
-        """Get the scan state as a JSON string.
+    def state_bytes(self) -> bytes:
+        """Get the scan state as bytes.
         This state can be used to resume the scan later using Spiral.resume_scan().
         Returns:
-            JSON string representing the internal scan state.
+            Compressed bytes representing the internal scan state.
         """
-        return self.core.plan_state().to_json()
+        return self.core.plan_context().to_bytes_compressed()
     def _debug(self):
         # Visualizes the scan, mainly for debugging purposes.
@@ -358,12 +489,6 @@ class Scan:
         show_scan(self.core)
-    def _dump_manifests(self):
-        # Print manifests in a human-readable format.
-        from spiral.debug.manifests import display_scan_manifests
-        display_scan_manifests(self.core)
     def _dump_metrics(self):
         # Print metrics in a human-readable format.
         from spiral.debug.metrics import display_metrics
@@ -372,12 +497,41 @@ class Scan:
 # NOTE(marko): This function must be picklable!
-def _read_shard_task(shard: Shard, *, config_json: str, state_json: str) -> "pd.DataFrame":
+def _read_shard_task(
+    shard: Shard,
+    *,
+    config_json: str,
+    state_bytes: bytes,
+    key_table: pa.Table | pa.RecordBatchReader | None = None,
+    batch_readahead: int | None = None,
+    hide_progress_bar: bool | None = None,
+) -> pa.Table:
+    """Ray worker function to read a single shard as Arrow table.
+    Args:
+        shard: The shard to read
+        config_json: Serialized ClientSettings
+        state_bytes: Serialized scan state
+        key_table: a table of keys to "take" (including aux columns for cell-push-down).
+            If None, the scan will be executed without a key table.
+        batch_readahead: the number of batches to prefetch in the background.
+        hide_progress_bar: If True, disables the progress bar during reading.
+    Returns:
+        PyArrow Table containing the shard data
+    """
     from spiral import Spiral
     from spiral.settings import ClientSettings
     config = ClientSettings.from_json(config_json)
     sp = Spiral(config=config)
-    task_scan = sp.resume_scan(state_json)
-    return task_scan.to_pandas(shards=[shard], hide_progress_bar=True)
+    task_scan = sp.resume_scan(state_bytes)
+    return task_scan.to_table(
+        shards=[shard],
+        key_table=key_table,
+        batch_readahead=batch_readahead,
+        hide_progress_bar=hide_progress_bar,
+    )

spiral/table.py CHANGED Viewed

@@ -5,6 +5,7 @@ from spiral.core.table import Table as CoreTable
 from spiral.core.table.spec import Schema
 from spiral.enrichment import Enrichment
 from spiral.expressions.base import Expr, ExprLike
+from spiral.input import TableLike
 from spiral.snapshot import Snapshot
 from spiral.transaction import Transaction
@@ -99,17 +100,17 @@ class Table(Expr):
         """
         return self.core.get_schema(asof=None)
-    def write(self, expr: ExprLike, push_down_nulls: bool = False, **kwargs) -> None:
+    def write(self, table: TableLike, push_down_nulls: bool = False, **kwargs) -> None:
         """Write an item to the table inside a single transaction.
         :param push_down_nulls: Whether to push down nullable structs down its children. E.g. `[{"a": 1}, null]` would
         become `[{"a": 1}, {"a": null}]`. SpiralDB doesn't allow struct-level nullability, so use this option if your
         data contains nullable structs.
-        :param expr: The expression to write. Must evaluate to a struct array.
+        :param table: The table to write.
         """
         with self.txn(**kwargs) as txn:
-            txn.write(expr, push_down_nulls=push_down_nulls)
+            txn.write(table, push_down_nulls=push_down_nulls)
     def enrich(
         self,
@@ -157,7 +158,7 @@ class Table(Expr):
              it is important that the primary key columns are unique within the transaction.
              The behavior is undefined if this is not the case.
         """
-        return Transaction(self.spiral.core.transaction(self.core, **kwargs))
+        return Transaction(self, self.spiral.core.transaction(self.core, **kwargs))
     def to_arrow_dataset(self) -> "ds.Dataset":
         """Returns a PyArrow Dataset representing the table."""