PyPI - pyspiral - Versions diffs - 0.8.9__cp311-abi3-macosx_11_0_arm64.whl → 0.9.9__cp311-abi3-macosx_11_0_arm64.whl - Mend

pyspiral 0.8.9__cp311-abi3-macosx_11_0_arm64.whl → 0.9.9__cp311-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/METADATA +4 -2
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/RECORD +39 -34
spiral/__init__.py +3 -2
spiral/_lib.abi3.so +0 -0
spiral/api/__init__.py +7 -0
spiral/api/client.py +86 -8
spiral/api/projects.py +4 -2
spiral/api/tables.py +77 -0
spiral/arrow_.py +4 -155
spiral/cli/app.py +10 -4
spiral/cli/chooser.py +30 -0
spiral/cli/fs.py +3 -2
spiral/cli/iceberg.py +1 -1
spiral/cli/key_spaces.py +4 -4
spiral/cli/orgs.py +1 -1
spiral/cli/projects.py +2 -2
spiral/cli/tables.py +47 -20
spiral/cli/telemetry.py +13 -6
spiral/cli/text.py +4 -4
spiral/cli/transactions.py +84 -0
spiral/cli/{types.py → types_.py} +6 -6
spiral/cli/workloads.py +4 -4
spiral/client.py +70 -8
spiral/core/client/__init__.pyi +25 -16
spiral/core/table/__init__.pyi +24 -22
spiral/debug/manifests.py +21 -9
spiral/debug/scan.py +4 -6
spiral/demo.py +145 -38
spiral/enrichment.py +18 -23
spiral/expressions/__init__.py +3 -75
spiral/expressions/base.py +5 -10
spiral/huggingface.py +456 -0
spiral/input.py +131 -0
spiral/ray_.py +75 -0
spiral/scan.py +218 -64
spiral/table.py +5 -4
spiral/transaction.py +95 -15
spiral/iterable_dataset.py +0 -106
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/WHEEL +0 -0
{pyspiral-0.8.9.dist-info → pyspiral-0.9.9.dist-info}/entry_points.txt +0 -0

spiral/core/client/__init__.pyi CHANGED Viewed

@@ -5,7 +5,8 @@ import pyarrow as pa
 from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableId, TableName
 from spiral.core.authn import Authn
 from spiral.core.config import ClientSettings
-from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
+from spiral.core.table import KeyRange, Scan, ScanContext, Snapshot, Table, Transaction
+from spiral.core.table.manifests import FragmentManifest
 from spiral.core.table.spec import ColumnGroup, Schema
 from spiral.expressions import Expr
@@ -36,16 +37,23 @@ class Spiral:
         asof: int | None = None,
         shard: Shard | None = None,
         key_columns: KeyColumns | None = None,
+        progress: bool = True,
     ) -> Scan:
         """Construct a table scan."""
         ...
-    def load_scan(self, plan_state: ScanState) -> Scan:
-        """Load a scan from a serialized scan state."""
+    def load_scan(self, context: ScanContext) -> Scan:
+        """Load a scan from a serialized scan context."""
         ...
-    def transaction(self, table: Table, *, partition_max_bytes: int | None = None) -> Transaction:
-        """Being a table transaction."""
+    def transaction(
+        self,
+        table: Table,
+        *,
+        partition_max_bytes: int | None = None,
+        compact_threshold: int | None = None,
+    ) -> Transaction:
+        """Begin a table transaction."""
         ...
     def search(
@@ -220,31 +228,32 @@ class Internal:
         Flush the write-ahead log of the table.
         """
         ...
-    def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
+    def truncate_metadata(self, table: Table) -> None:
         """
-        Index table changes up to the given snapshot.
+        Truncate the column group metadata of the table.
+        This removes compacted fragments from metadata.
+        IMPORTANT: The command will break as-of before truncation for the table.
         """
         ...
-    def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
+    def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
         """
         Index table changes up to the given snapshot.
         """
         ...
-    def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
+    def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
         """
-        The key space state for the table.
+        Index table changes up to the given snapshot.
         """
         ...
-    def column_group_state(
-        self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
-    ) -> ColumnGroupState:
+    def key_space_manifest(self, snapshot: Snapshot) -> FragmentManifest:
         """
-        The state the column group of the table.
+        The manifest of the key space of the table as of the given snapshot.
         """
         ...
-    def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
+    def column_group_manifest(self, snapshot: Snapshot, column_group: ColumnGroup) -> FragmentManifest:
         """
-        The state of each column group of the table.
+        The manifest of the given column group of the table as of the given snapshot.
         """
         ...
     def key_space_index_shards(self, index: KeySpaceIndex) -> list[Shard]:

spiral/core/table/__init__.pyi CHANGED Viewed

@@ -52,10 +52,12 @@ class Snapshot:
     table: Table
     wal: WriteAheadLog
-class ScanState:
-    def to_json(self) -> str: ...
+    def column_groups(self) -> list[ColumnGroup]: ...
+class ScanContext:
+    def to_bytes_compressed(self) -> bytes: ...
     @staticmethod
-    def from_json(json: str) -> ScanState: ...
+    def from_bytes_compressed(compressed: bytes) -> ScanContext: ...
 class MaterializablePlan:
     pass
@@ -73,26 +75,36 @@ class Scan:
     def is_empty(self) -> bool: ...
     def shards(self) -> list[Shard]: ...
     def table_ids(self) -> list[str]: ...
+    def context(self) -> ScanContext: ...
     def column_groups(self) -> list[ColumnGroup]: ...
-    def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
-    def key_space_state(self, table_id: str) -> KeySpaceState: ...
-    def plan_state(self) -> ScanState: ...
+    def key_space_manifest(self, table_id: str) -> FragmentManifest:
+        """
+        Manifest of the key fragments for the given table id.
+        """
+        ...
+    def column_group_manifest(self, column_group: ColumnGroup) -> FragmentManifest:
+        """
+        Manifest of the fragments for the given column group.
+        """
+        ...
+    def plan_context(self) -> ScanContext: ...
     def materializable_plan(self) -> MaterializablePlan: ...
     def to_record_batches(
         self,
         *,
         shards: list[Shard] | None = None,
-        key_table: pa.Table | pa.RecordBatch | None = None,
+        key_table: pa.Table | pa.RecordBatchReader | None = None,
         batch_readahead: int | None = None,
-        progress: bool = True,
+        batch_aligned: bool = False,
+        hide_progress_bar: bool = False,
     ) -> pa.RecordBatchReader: ...
     def to_unordered_record_batches(
         self,
         *,
         shards: list[Shard] | None = None,
-        key_table: pa.Table | pa.RecordBatch | None = None,
+        key_table: pa.Table | pa.RecordBatchReader | None = None,
         batch_readahead: int | None = None,
-        progress: bool = True,
+        hide_progress_bar: bool = False,
     ) -> pa.RecordBatchReader: ...
     def to_shuffled_record_batches(
         self,
@@ -115,17 +127,6 @@ class Scan:
     ) -> EvaluatedPlanStream: ...
     def metrics(self) -> dict[str, Any]: ...
-class KeySpaceState:
-    manifest: FragmentManifest
-    def key_schema(self) -> Schema: ...
-class ColumnGroupState:
-    manifest: FragmentManifest
-    column_group: ColumnGroup
-    def schema(self) -> Schema: ...
 class Transaction:
     status: str
@@ -137,6 +138,7 @@ class Transaction:
     def ops(self) -> list[Operation]: ...
     def take(self) -> list[Operation]: ...
     def include(self, ops: list[Operation]): ...
-    def commit(self, *, compact: bool = False): ...
+    def commit(self): ...
     def abort(self): ...
     def is_empty(self) -> bool: ...
+    def snapshot(self) -> Snapshot: ...

spiral/debug/manifests.py CHANGED Viewed

@@ -14,9 +14,9 @@ def display_scan_manifests(scan: Scan):
     if len(scan.table_ids()) != 1:
         raise NotImplementedError("Multiple table scans are not supported.")
     table_id = scan.table_ids()[0]
-    key_space_manifest = scan.key_space_state(table_id).manifest
+    key_space_manifest = scan.key_space_manifest(table_id)
     column_group_manifests = [
-        (column_group, scan.column_group_state(column_group).manifest) for column_group in scan.column_groups()
+        (column_group, scan.column_group_manifest(column_group)) for column_group in scan.column_groups()
     ]
     display_manifests(key_space_manifest, column_group_manifests, scan.key_schema(), None)
@@ -61,8 +61,10 @@ def _table_of_fragments(manifest: FragmentManifest, title: str, key_schema: Sche
     # Create rich table
     table = Table(title=None, show_header=True, header_style="bold")
     table.add_column("ID", style="cyan", no_wrap=True)
-    table.add_column("Size (Metadata)", justify="right")
+    table.add_column("Data", justify="right")
+    table.add_column("Metadata", justify="right")
     table.add_column("Format", justify="center")
+    table.add_column("Key Space", justify="center")
     table.add_column("Key Span", justify="center")
     table.add_column("Key Range", justify="center")
     table.add_column("Level", justify="center")
@@ -74,12 +76,20 @@ def _table_of_fragments(manifest: FragmentManifest, title: str, key_schema: Sche
         if max_rows is not None and i >= max_rows:
             break
-        committed_str = str(datetime_.from_timestamp_micros(fragment.committed_at)) if fragment.committed_at else "N/A"
-        compacted_str = str(datetime_.from_timestamp_micros(fragment.compacted_at)) if fragment.compacted_at else "N/A"
-        size_with_metadata = (
-            f"{_format_bytes(fragment.size_bytes)} ({_format_bytes(len(fragment.format_metadata or b''))})"
+        committed_str = (
+            datetime_.from_timestamp_micros(fragment.committed_at).strftime("%Y-%m-%d %H:%M:%S")
+            if fragment.committed_at
+            else "N/A"
+        )
+        compacted_str = (
+            datetime_.from_timestamp_micros(fragment.compacted_at).strftime("%Y-%m-%d %H:%M:%S")
+            if fragment.compacted_at
+            else "N/A"
         )
+        data_size = _format_bytes(fragment.size_bytes)
+        metadata_size = _format_bytes(len(fragment.format_metadata or b""))
+        key_space = fragment.ks_id
         key_span = f"{fragment.key_span.begin}..{fragment.key_span.end}"
         min_key = pretty_key(bytes(fragment.key_extent.min), key_schema)
         max_key = pretty_key(bytes(fragment.key_extent.max), key_schema)
@@ -91,8 +101,10 @@ def _table_of_fragments(manifest: FragmentManifest, title: str, key_schema: Sche
         table.add_row(
             fragment.id,
-            size_with_metadata,
+            data_size,
+            metadata_size,
             str(fragment.format),
+            key_space,
             key_span,
             key_range,
             str(fragment.level),

spiral/debug/scan.py CHANGED Viewed

@@ -15,18 +15,16 @@ def show_scan(scan: Scan):
     column_groups = scan.column_groups()
     splits = [s.key_range for s in scan.shards()]
-    key_space_state = scan.key_space_state(table_id)
+    key_space_manifest = scan.key_space_manifest(table_id)
     # Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
     key_points = set()
-    key_space_manifest = key_space_state.manifest
     for i in range(len(key_space_manifest)):
         fragment_file = key_space_manifest[i]
         key_points.add(fragment_file.key_extent.min)
         key_points.add(fragment_file.key_extent.max)
     for cg in column_groups:
-        cg_scan = scan.column_group_state(cg)
-        cg_manifest = cg_scan.manifest
+        cg_manifest = scan.column_group_manifest(cg)
         for i in range(len(cg_manifest)):
             fragment_file = cg_manifest[i]
             key_points.add(fragment_file.key_extent.min)
@@ -39,9 +37,9 @@ def show_scan(scan: Scan):
     show_manifest(key_space_manifest, scope="Key space", key_points=key_points, splits=splits)
     for cg in scan.column_groups():
-        cg_scan = scan.column_group_state(cg)
+        cg_manifest = scan.column_group_manifest(cg)
         # Skip table id from the start of the column group.
-        show_manifest(cg_scan.manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
+        show_manifest(cg_manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
 def show_manifest(manifest: FragmentManifest, scope: str = None, key_points: list[Key] = None, splits: list = None):

spiral/demo.py CHANGED Viewed

@@ -1,16 +1,69 @@
 """Demo data to play with SpiralDB"""
 import functools
+import hashlib
+import os
 import time
+from pathlib import Path
 import duckdb
+import numpy as np
 import pandas as pd
 import pyarrow as pa
+import pyarrow.parquet as pq
 from datasets import load_dataset
 from spiral import Project, Spiral, Table
+# Cache configuration
+def _get_cache_dir() -> Path | None:
+    """Get cache directory from environment variable, or None if caching is disabled."""
+    cache_dir = os.environ.get("SPIRAL_DEMO_CACHE_DIR")
+    if cache_dir:
+        path = Path(cache_dir)
+        path.mkdir(parents=True, exist_ok=True)
+        return path
+    return None
+def _cache_key(*parts: str) -> str:
+    """Generate a cache key from components."""
+    return "-".join(str(p).replace("-", "_") for p in parts)
+def _get_cached_table(cache_key: str) -> pa.Table | None:
+    """Load Arrow table from cache if available."""
+    cache_dir = _get_cache_dir()
+    if not cache_dir:
+        return None
+    cache_file = cache_dir / f"{cache_key}.parquet"
+    if not cache_file.exists():
+        return None
+    try:
+        return pq.read_table(cache_file)
+    except Exception as e:
+        # On any error (corruption, etc.), return None to trigger re-download
+        print(f"Warning: Failed to load cache {cache_file}: {e}")
+        return None
+def _save_to_cache(cache_key: str, table: pa.Table) -> None:
+    """Save Arrow table to cache."""
+    cache_dir = _get_cache_dir()
+    if not cache_dir:
+        return
+    cache_file = cache_dir / f"{cache_key}.parquet"
+    try:
+        pq.write_table(table, cache_file, compression="zstd")
+        print(f"Cached data to {cache_file}")
+    except Exception as e:
+        print(f"Warning: Failed to save cache {cache_file}: {e}")
 def _install_duckdb_extension(name: str, max_retries: int = 3) -> None:
     """Install and load a DuckDB extension with retry logic for flaky CI environments."""
     for attempt in range(max_retries):
@@ -30,22 +83,37 @@ def demo_project(sp: Spiral) -> Project:
 @functools.lru_cache(maxsize=1)
-def images(sp: Spiral) -> Table:
+def images(sp: Spiral, limit=10) -> Table:
     table = demo_project(sp).create_table(
         "openimages.images-v1", key_schema=pa.schema([("idx", pa.int64())]), exist_ok=False
     )
-    # Load URLs from a TSV file
-    df = pd.read_csv(
-        "https://storage.googleapis.com/cvdf-datasets/oid/open-images-dataset-validation.tsv",
-        names=["url", "size", "etag"],
-        skiprows=1,
-        sep="\t",
-        header=None,
-    )
-    # For this example, we load just a few rows, but Spiral can handle many more.
-    df = pa.Table.from_pandas(df[:10])
-    df = df.append_column("idx", pa.array(range(len(df))))
+    # Try to load from cache first
+    # Use a hash of the URL to create a stable cache key
+    url = "https://storage.googleapis.com/cvdf-datasets/oid/open-images-dataset-validation.tsv"
+    url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
+    cache_key = _cache_key("images", "v1", f"url-{url_hash}", f"limit-{limit}")
+    df = _get_cached_table(cache_key)
+    if df is None:
+        # Cache miss - download from Google Cloud Storage
+        print(f"Cache miss for {cache_key}, downloading from GCS...")
+        # Load URLs from a TSV file
+        df_pandas = pd.read_csv(
+            url,
+            names=["url", "size", "etag"],
+            skiprows=1,
+            sep="\t",
+            header=None,
+        )
+        # For this example, we load just a few rows, but Spiral can handle many more.
+        df = pa.Table.from_pandas(df_pandas[:limit])
+        df = df.append_column("idx", pa.array(range(len(df))))
+        # Save to cache for future runs
+        _save_to_cache(cache_key, df)
+    else:
+        print(f"Cache hit for {cache_key}")
     # Write just the metadata - lightweight and fast
     table.write(df)
@@ -57,30 +125,44 @@ def gharchive(sp: Spiral, limit=100, period=None) -> Table:
     if period is None:
         period = pd.Period("2023-01-01T00:00:00Z", freq="h")
-    _install_duckdb_extension("httpfs")
+    # Try to load from cache first
+    period_str = f"{period.strftime('%Y-%m-%d')}-{str(period.hour)}"
+    cache_key = _cache_key("gharchive", "v1", f"period-{period_str}", f"limit-{limit}")
+    cached_events = _get_cached_table(cache_key)
+    if cached_events is None:
+        # Cache miss - download from gharchive
+        print(f"Cache miss for {cache_key}, downloading from gharchive.org...")
+        _install_duckdb_extension("httpfs")
+        json_gz_url = f"https://data.gharchive.org/{period_str}.json.gz"
+        arrow_table = (
+            duckdb.read_json(json_gz_url, union_by_name=True)
+            .limit(limit)
+            .select("""
+            * REPLACE (
+                cast(created_at AS TIMESTAMP_MS) AS created_at,
+            )
+            """)
+            .to_arrow_table()
+        )
-    json_gz_url = f"https://data.gharchive.org/{period.strftime('%Y-%m-%d')}-{str(period.hour)}.json.gz"
-    arrow_table = (
-        duckdb.read_json(json_gz_url, union_by_name=True)
-        .limit(limit)
-        .select("""
-        * REPLACE (
-            cast(created_at AS TIMESTAMP_MS) AS created_at,
+        events = duckdb.from_arrow(arrow_table).order("created_at, id").distinct().to_arrow_table()
+        events = (
+            events.drop_columns("id")
+            .add_column(0, "id", events["id"].cast(pa.large_string()))
+            .drop_columns("created_at")
+            .add_column(0, "created_at", events["created_at"].cast(pa.timestamp("ms")))
+            .drop_columns("org")
         )
-        """)
-        .to_arrow_table()
-    )
-    events = duckdb.from_arrow(arrow_table).order("created_at, id").distinct().to_arrow_table()
-    events = (
-        events.drop_columns("id")
-        .add_column(0, "id", events["id"].cast(pa.large_string()))
-        .drop_columns("created_at")
-        .add_column(0, "created_at", events["created_at"].cast(pa.timestamp("ms")))
-        .drop_columns("org")
-    )
+        # Save to cache for future runs
+        _save_to_cache(cache_key, events)
+    else:
+        print(f"Cache hit for {cache_key}")
+        events = cached_events
-    key_schema = pa.schema([("created_at", pa.timestamp("ms")), ("id", pa.string_view())])
+    key_schema = pa.schema([("created_at", pa.timestamp("ms")), ("id", pa.string())])
     table = demo_project(sp).create_table("gharchive.events", key_schema=key_schema, exist_ok=False)
     table.write(events, push_down_nulls=True)
     return table
@@ -88,13 +170,38 @@ def gharchive(sp: Spiral, limit=100, period=None) -> Table:
 @functools.lru_cache(maxsize=1)
 def fineweb(sp: Spiral, limit=100) -> Table:
-    table = demo_project(sp).create_table(
-        "fineweb.v1", key_schema=pa.schema([("id", pa.string_view())]), exist_ok=False
-    )
+    table = demo_project(sp).create_table("fineweb.v1", key_schema=pa.schema([("id", pa.string())]), exist_ok=False)
+    # Try to load from cache first
+    cache_key = _cache_key("fineweb", "v1", f"limit-{limit}")
+    arrow_table = _get_cached_table(cache_key)
+    if arrow_table is None:
+        # Cache miss - download from HuggingFace
+        print(f"Cache miss for {cache_key}, downloading from HuggingFace...")
+        ds = load_dataset("HuggingFaceFW/fineweb", "sample-10BT", streaming=True)
+        data = ds["train"].take(limit)
+        arrow_table = pa.Table.from_pylist(data.to_list())
-    ds = load_dataset("HuggingFaceFW/fineweb", "sample-10BT", streaming=True)
-    data = ds["train"].take(limit)
-    arrow_table = pa.Table.from_pylist(data.to_list())
+        # Save to cache for future runs
+        _save_to_cache(cache_key, arrow_table)
+    else:
+        print(f"Cache hit for {cache_key}")
     table.write(arrow_table, push_down_nulls=True)
     return table
+@functools.lru_cache(maxsize=1)
+def abc(sp: Spiral, limit=100) -> Table:
+    table = demo_project(sp).create_table("abc", key_schema=pa.schema([("a", pa.int64())]), exist_ok=False)
+    table.write(
+        {
+            "a": pa.array(np.arange(limit)),
+            "b": pa.array(np.arange(100, 100 + limit)),
+            "c": pa.array(np.repeat(99, limit)),
+        }
+    )
+    return table

spiral/enrichment.py CHANGED Viewed

@@ -2,18 +2,18 @@ from __future__ import annotations
 import dataclasses
 import logging
-from functools import partial
+from functools import partial, reduce
 from typing import TYPE_CHECKING
 from spiral.core.client import Shard
 from spiral.core.table import KeyRange
-from spiral.core.table.spec import Key, Operation
+from spiral.core.table.spec import Key
 from spiral.expressions import Expr
 if TYPE_CHECKING:
     import dask.distributed
-    from spiral import Scan, Table
+    from spiral import Scan, Table, TransactionOps
 logger = logging.getLogger(__name__)
@@ -52,12 +52,13 @@ class Enrichment:
         """The filter expression."""
         return self._where
-    def _scan(self) -> Scan:
-        return self._table.spiral.scan(self._projection, where=self._where)
+    def _scan(self, shard: Shard | None = None) -> Scan:
+        return self._table.spiral.scan(self._projection, where=self._where, shard=shard)
     def apply(
         self,
         *,
+        shards: list[Shard] | None = None,
         txn_dump: str | None = None,
     ) -> None:
         """Apply the enrichment onto the table in a streaming fashion.
@@ -65,12 +66,17 @@ class Enrichment:
         For large tables, consider using `apply_dask` for distributed execution.
         Args:
+            shards: Optional list of shards to process.
             txn_dump: Optional path to dump the transaction JSON for debugging.
         """
+        # Combine multiple shards into one covering the full key range.
+        encompassing_shard: Shard | None = None
+        if shards:
+            encompassing_shard = reduce(lambda a, b: a | b, shards)
         txn = self._table.txn()
-        txn.writeback(self._scan())
+        txn.writeback(self._scan(encompassing_shard), shards=shards)
         if txn.is_empty():
             logger.warning("Transaction not committed. No rows were read for enrichment.")
@@ -150,7 +156,7 @@ class Enrichment:
         _compute = partial(
             _enrichment_task,
             config_json=self._table.spiral.config.to_json(),
-            state_json=plan_scan.core.plan_state().to_json(),
+            state_bytes=plan_scan.core.plan_context().to_bytes_compressed(),
             output_table_id=self._table.table_id,
             incremental=checkpoint_dump is not None,
         )
@@ -210,8 +216,7 @@ class Enrichment:
             logger.warning("Transaction not committed. No rows were read for enrichment.")
             return
-        # Always compact in distributed enrichment.
-        tx.commit(compact=True, txn_dump=txn_dump)
+        tx.commit(txn_dump=txn_dump)
 def _checkpoint_load_key_ranges(checkpoint_dump: str) -> list[KeyRange] | None:
@@ -243,26 +248,16 @@ def _checkpoint_dump_key_ranges(checkpoint_dump: str, ranges: list[KeyRange]):
 @dataclasses.dataclass
 class EnrichmentTaskResult:
-    ops: list[Operation]
+    ops: TransactionOps | None = None
     error: str | None = None
-    def __getstate__(self):
-        return {
-            "ops": [op.to_json() for op in self.ops],
-            "error": self.error,
-        }
-    def __setstate__(self, state):
-        self.ops = [Operation.from_json(op_json) for op_json in state["ops"]]
-        self.error = state["error"]
 # NOTE(marko): This function must be picklable!
 def _enrichment_task(
     shard: Shard,
     *,
     config_json: str,
-    state_json: str,
+    state_bytes: bytes,
     output_table_id,
     incremental: bool,
 ) -> EnrichmentTaskResult:
@@ -272,7 +267,7 @@ def _enrichment_task(
     config = ClientSettings.from_json(config_json)
     sp = Spiral(config=config)
-    task_scan = sp.resume_scan(state_json)
+    task_scan = sp.resume_scan(state_bytes)
     table = sp.table(output_table_id)
     task_tx = table.txn()
@@ -284,7 +279,7 @@ def _enrichment_task(
         task_tx.abort()
         if incremental:
-            return EnrichmentTaskResult(ops=[], error=str(e))
+            return EnrichmentTaskResult(error=str(e))
         logger.error(f"Enrichment task failed for shard {shard}: {e}")
         raise e