PyPI - pyspiral - Versions diffs - 0.6.9__cp312-abi3-macosx_11_0_arm64.whl → 0.7.12__cp312-abi3-macosx_11_0_arm64.whl - Mend

pyspiral 0.6.9__cp312-abi3-macosx_11_0_arm64.whl → 0.7.12__cp312-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/METADATA +9 -8
{pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/RECORD +53 -45
{pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/entry_points.txt +1 -0
spiral/__init__.py +20 -0
spiral/_lib.abi3.so +0 -0
spiral/api/__init__.py +1 -1
spiral/api/client.py +1 -1
spiral/api/types.py +1 -0
spiral/cli/admin.py +2 -2
spiral/cli/app.py +8 -4
spiral/cli/fs.py +4 -4
spiral/cli/iceberg.py +1 -1
spiral/cli/key_spaces.py +15 -1
spiral/cli/login.py +4 -3
spiral/cli/orgs.py +8 -7
spiral/cli/projects.py +4 -4
spiral/cli/state.py +5 -3
spiral/cli/tables.py +59 -36
spiral/cli/telemetry.py +1 -1
spiral/cli/types.py +2 -2
spiral/cli/workloads.py +3 -3
spiral/client.py +69 -22
spiral/core/client/__init__.pyi +48 -13
spiral/core/config/__init__.pyi +47 -0
spiral/core/expr/__init__.pyi +15 -0
spiral/core/expr/images/__init__.pyi +3 -0
spiral/core/expr/list_/__init__.pyi +4 -0
spiral/core/expr/refs/__init__.pyi +4 -0
spiral/core/expr/str_/__init__.pyi +3 -0
spiral/core/expr/struct_/__init__.pyi +6 -0
spiral/core/expr/text/__init__.pyi +5 -0
spiral/core/expr/udf/__init__.pyi +14 -0
spiral/core/expr/video/__init__.pyi +3 -0
spiral/core/table/__init__.pyi +37 -2
spiral/core/table/spec/__init__.pyi +6 -4
spiral/dataloader.py +52 -38
spiral/dataset.py +10 -1
spiral/enrichment.py +304 -0
spiral/expressions/__init__.py +21 -23
spiral/expressions/base.py +9 -4
spiral/expressions/file.py +17 -0
spiral/expressions/http.py +11 -80
spiral/expressions/s3.py +16 -0
spiral/expressions/tiff.py +2 -3
spiral/expressions/udf.py +38 -24
spiral/iceberg.py +3 -3
spiral/project.py +34 -6
spiral/scan.py +80 -33
spiral/settings.py +19 -97
spiral/streaming_/stream.py +1 -1
spiral/table.py +40 -10
spiral/transaction.py +99 -2
spiral/expressions/io.py +0 -100
spiral/expressions/mp4.py +0 -62
spiral/expressions/png.py +0 -18
spiral/expressions/qoi.py +0 -18
spiral/expressions/refs.py +0 -58
{pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/WHEEL +0 -0

spiral/expressions/http.py CHANGED Viewed

@@ -1,86 +1,17 @@
-import hishel
-import httpx
-import pyarrow as pa
+from spiral import _lib
 from spiral.expressions.base import Expr, ExprLike
-from spiral.expressions.struct import pack
-from spiral.expressions.udf import UDF
-from spiral.settings import APP_DIR
-def get(url: ExprLike, headers: ExprLike = None, force_cache: bool = False) -> Expr:
-    """Submit a GET request to either a scalar of vector of URLs."""
-    to_pack = {"url": url}
-    if headers is not None:
-        to_pack["headers"] = headers
-    return HttpGet(force_cache)(pack(to_pack))
-class HttpGet(UDF):
-    RES_DTYPE: pa.DataType = pa.struct(
-        [
-            pa.field("bytes", pa.large_binary()),
-            pa.field("status", pa.int32()),
-            pa.field("headers", pa.map_(pa.string(), pa.string())),
-        ]
-    )
-    def __init__(self, force_cache: bool = False):
-        super().__init__("http.get")
-        self._force_cache = force_cache
-    def return_type(self, *input_types: pa.DataType) -> pa.DataType:
-        return HttpGet.RES_DTYPE
-    def invoke(self, *input_args: pa.Array) -> pa.Array:
-        if len(input_args) != 1:
-            raise ValueError(f"Expected 1 argument, got {len(input_args)}")
-        result = _http_request(input_args[0], self._force_cache)
-        if isinstance(result, pa.ChunkedArray):
-            result = result.combine_chunks()
-        return result
-def _http_request(arg: pa.Array, force_cache: bool) -> pa.Array:
-    client = _HttpClient()
-    if isinstance(arg, pa.StructArray):
-        # We assume a vector of requests, but with potentially many arguments
-        return pa.array(
-            [
-                _response_dict(
-                    client.request(
-                        req.get("method", "GET").upper(),
-                        req["url"],
-                        headers=req.get("headers", {}),
-                        extensions={"force_cache": force_cache},
-                    )
-                )
-                for req in arg.to_pylist()
-            ],
-            type=HttpGet.RES_DTYPE,
-        )
-    raise TypeError(f"Unsupported argument: {arg} ({type(arg)})")
-def _response_dict(response: httpx.Response) -> dict:
-    if response.status_code != 200:
-        raise ValueError(f"Request failed with status {response.status_code}")
-    return {
-        "bytes": response.read(),
-        "status": response.status_code,
-        "headers": dict(response.headers),
-    }
+def get(expr: ExprLike, abort_on_error: bool = False) -> Expr:
+    """Read data from the URL.
-class _HttpClient(hishel.CacheClient):
-    _instance: "_HttpClient" = None
+    Args:
+        expr: URLs of the data that needs to be read.
+        abort_on_error: Should the expression abort on errors or just collect them.
+    """
+    from spiral import expressions as se
-    def __new__(cls, *args, **kwargs):
-        if not cls._instance:
-            cls._instance = super().__new__(cls)
-        return cls._instance
+    expr = se.lift(expr)
-    def __init__(self):
-        super().__init__(storage=hishel.FileStorage(base_path=APP_DIR / "http.cache", ttl=3600))
+    # This just works :)
+    return Expr(_lib.expr.s3.get(expr.__expr__, abort_on_error))

spiral/expressions/s3.py ADDED Viewed

@@ -0,0 +1,16 @@
+from spiral import _lib
+from spiral.expressions.base import Expr, ExprLike
+def get(expr: ExprLike, abort_on_error: bool = False) -> Expr:
+    """Read data from object storage by the s3:// URL.
+    Args:
+        expr: URLs of the data that needs to be read from object storage.
+        abort_on_error: Should the expression abort on errors or just collect them.
+    """
+    from spiral import expressions as se
+    expr = se.lift(expr)
+    return Expr(_lib.expr.s3.get(expr.__expr__, abort_on_error))

spiral/expressions/tiff.py CHANGED Viewed

@@ -2,7 +2,6 @@ import numpy as np
 import pyarrow as pa
 from spiral.expressions.base import Expr, ExprLike
-from spiral.expressions.udf import RefUDF
 _TIFF_RES_DTYPE: pa.DataType = pa.struct(
     [
@@ -78,7 +77,7 @@ def select(
     return TiffSelectUDF()(expr, shape, indexes)
-class TiffReadUDF(RefUDF):
+class TiffReadUDF:
     def __init__(self):
         super().__init__("tiff.read")
@@ -122,7 +121,7 @@ class TiffReadUDF(RefUDF):
             return _return_result(result, indexes)
-class TiffSelectUDF(RefUDF):
+class TiffSelectUDF:
     def __init__(self):
         super().__init__("tiff.select")

spiral/expressions/udf.py CHANGED Viewed

@@ -3,44 +3,58 @@ import abc
 import pyarrow as pa
 from spiral import _lib
-from spiral.expressions.base import Expr
+from spiral.expressions.base import Expr, ExprLike
-class BaseUDF:
-    def __init__(self, udf):
-        self._udf = udf
+class UDF(abc.ABC):
+    """A User-Defined Function (UDF). This class should be subclassed to define custom UDFs.
-    def __call__(self, *args) -> Expr:
-        """Create an expression that calls this UDF with the given arguments."""
-        from spiral import expressions as se
+    Example:
-        args = [se.lift(arg).__expr__ for arg in args]
-        return Expr(self._udf(args))
+    ```python
+    from spiral import expressions as se
+    import pyarrow as pa
-    @abc.abstractmethod
-    def return_type(self, *input_types: pa.DataType) -> pa.DataType: ...
+    class MyAdd(se.UDF):
+        def __init__(self):
+            super().__init__("my_add")
+        def return_type(self, scope: pa.DataType):
+            if not isinstance(scope, pa.StructType):
+                raise ValueError("Expected struct type as input")
+            return scope.field(0).type
-class UDF(BaseUDF):
-    """A User-Defined Function (UDF)."""
+        def invoke(self, scope: pa.Array):
+            if not isinstance(scope, pa.StructArray):
+                raise ValueError("Expected struct array as input")
+            return pa.compute.add(scope.field(0), scope.field(1))
-    def __init__(self, name: str):
-        super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke))
+    my_add = MyAdd()
-    @abc.abstractmethod
-    def invoke(self, *input_args: pa.Array) -> pa.Array: ...
+    expr = my_add(table.select("first_arg", "second_arg"))
+    ```
+    """
+    def __init__(self, name: str):
+        self._udf = _lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke)
-class RefUDF(BaseUDF):
-    """A UDF over a single ref cell, and therefore can access the file object."""
+    def __call__(self, scope: ExprLike) -> Expr:
+        """Create an expression that calls this UDF with the given arguments."""
+        from spiral import expressions as se
-    def __init__(self, name: str):
-        super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
+        return Expr(self._udf(se.lift(scope).__expr__))
     @abc.abstractmethod
-    def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
-        """Invoke the UDF with the given arguments.
+    def return_type(self, scope: pa.DataType) -> pa.DataType:
+        """Must return the return type of the UDF given the input scope type.
-        NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
+        IMPORTANT: All expressions in Spiral must return nullable (Arrow default) types,
+        including nested structs, meaning that all fields in structs must also be nullable,
+        and if those fields are structs, their fields must also be nullable, and so on.
         """
         ...
+    @abc.abstractmethod
+    def invoke(self, scope: pa.Array) -> pa.Array:
+        """Must implement the UDF logic given the input scope array."""
+        ...

spiral/iceberg.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Iceberg:
     def __init__(self, spiral: "Spiral"):
         self._spiral = spiral
-        self._api = self._spiral.config.api
+        self._api = self._spiral.api
     def catalog(self) -> "Catalog":
         """Open the Iceberg catalog."""
@@ -25,7 +25,7 @@ class Iceberg:
             "default",
             **{
                 "type": "rest",
-                "uri": self._spiral.config.spiraldb.uri + "/iceberg",
-                "token": self._spiral.config.authn.token().expose_secret(),
+                "uri": self._spiral.config.server_url + "/iceberg",
+                "token": self._spiral.authn.token().expose_secret(),
             },
         )

spiral/project.py CHANGED Viewed

@@ -53,7 +53,7 @@ class Project:
         res = res[0]
         return Table(
-            self._spiral, self._spiral._core.table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}"
+            self._spiral, self._spiral.core.table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}"
         )
     def create_table(
@@ -78,7 +78,7 @@ class Project:
             key_schema = pa.schema(key_schema)
         key_schema = Schema.from_arrow(key_schema)
-        core_table = self._spiral._core.create_table(
+        core_table = self._spiral.core.create_table(
             project_id=self._id,
             dataset=dataset,
             table=table,
@@ -89,6 +89,34 @@ class Project:
         return Table(self._spiral, core_table, identifier=f"{self._id}.{dataset}.{table}")
+    def move_table(self, identifier: str, new_dataset: str):
+        """Move a table to a new dataset in the project.
+        Args:
+            identifier: The table identifier, in the form `dataset.table` or `table`.
+            new_dataset: The dataset into which to move this table.
+        """
+        table = self.table(identifier)
+        self._spiral.core.move_table(
+            table_id=table.table_id,
+            new_dataset=new_dataset,
+        )
+    def rename_table(self, identifier: str, new_table: str):
+        """Move a table to a new dataset in the project.
+        Args:
+            identifier: The table identifier, in the form `dataset.table` or `table`.
+            new_dataset: The dataset into which to move this table.
+        """
+        table = self.table(identifier)
+        self._spiral.core.rename_table(
+            table_id=table.table_id,
+            new_table=new_table,
+        )
     def _parse_table_identifier(self, identifier: str) -> tuple[str, str]:
         parts = identifier.split(".")
         if len(parts) == 1:
@@ -105,7 +133,7 @@ class Project:
             raise ValueError(f"Index not found: {name}")
         res = res[0]
-        return TextIndex(self._spiral._core.text_index(res.id), name=name)
+        return TextIndex(self._spiral.core.text_index(res.id), name=name)
     def create_text_index(
         self,
@@ -135,7 +163,7 @@ class Project:
         if where is not None:
             where = se.lift(where)
-        core_index = self._spiral._core.create_text_index(
+        core_index = self._spiral.core.create_text_index(
             project_id=self._id,
             name=name,
             projection=projection.__expr__,
@@ -154,7 +182,7 @@ class Project:
             raise ValueError(f"Index not found: {name}")
         res = res[0]
-        return KeySpaceIndex(self._spiral._core.key_space_index(res.id), name=name)
+        return KeySpaceIndex(self._spiral.core.key_space_index(res.id), name=name)
     def create_key_space_index(
         self,
@@ -185,7 +213,7 @@ class Project:
         if where is not None:
             where = se.lift(where)
-        core_index = self._spiral._core.create_key_space_index(
+        core_index = self._spiral.core.create_key_space_index(
             project_id=self._id,
             name=name,
             granularity=granularity,

spiral/scan.py CHANGED Viewed

@@ -1,8 +1,10 @@
+from functools import partial
 from typing import TYPE_CHECKING, Any, Optional
 import pyarrow as pa
 from spiral.core.client import Shard, ShuffleConfig
+from spiral.core.table import KeyRange
 from spiral.core.table import Scan as CoreScan
 from spiral.core.table.spec import Schema
 from spiral.settings import CI, DEV
@@ -15,13 +17,15 @@ if TYPE_CHECKING:
     import streaming  # noqa
     import torch.utils.data as torchdata  # noqa
+    from spiral.client import Spiral
     from spiral.dataloader import SpiralDataLoader, World  # noqa
 class Scan:
     """Scan object."""
-    def __init__(self, core: CoreScan):
+    def __init__(self, spiral: "Spiral", core: CoreScan):
+        self.spiral = spiral
         self.core = core
     @property
@@ -34,6 +38,11 @@ class Scan:
         """Returns the schema of the scan."""
         return self.core.schema()
+    @property
+    def key_schema(self) -> Schema:
+        """Returns the key schema of the scan."""
+        return self.core.key_schema()
     def is_empty(self) -> bool:
         """Check if the Spiral is empty for the given key range.
@@ -44,20 +53,30 @@ class Scan:
     def to_record_batches(
         self,
+        *,
+        key_range: KeyRange | None = None,
         key_table: pa.Table | pa.RecordBatchReader | None = None,
         batch_size: int | None = None,
         batch_readahead: int | None = None,
+        hide_progress_bar: bool = False,
     ) -> pa.RecordBatchReader:
         """Read as a stream of RecordBatches.
         Args:
+            key_range: Optional key range to filter the scan.
+                If provided, the scan will only return rows within the key range.
+                Only one of key_range or key_table can be provided.
             key_table: a table of keys to "take" (including aux columns for cell-push-down).
                 If None, the scan will be executed without a key table.
             batch_size: the maximum number of rows per returned batch.
                 IMPORTANT: This is currently only respected when the key_table is used. If key table is a
                     RecordBatchReader, the batch_size argument must be None, and the existing batching is respected.
             batch_readahead: the number of batches to prefetch in the background.
+            hide_progress_bar: If True, disables the progress bar during reading.
         """
+        if key_range is not None and key_table is not None:
+            raise ValueError("Only one of key_range or key_table can be provided.")
         if isinstance(key_table, pa.RecordBatchReader):
             if batch_size is not None:
                 raise ValueError(
@@ -66,46 +85,56 @@ class Scan:
         elif isinstance(key_table, pa.Table):
             key_table = key_table.to_reader(max_chunksize=batch_size)
-        return self.core.to_record_batches(key_table=key_table, batch_readahead=batch_readahead)
+        return self.core.to_record_batches(
+            key_range=key_range, key_table=key_table, batch_readahead=batch_readahead, progress=(not hide_progress_bar)
+        )
     def to_table(
         self,
+        *,
+        key_range: KeyRange | None = None,
         key_table: pa.Table | pa.RecordBatchReader | None = None,
     ) -> pa.Table:
         """Read into a single PyArrow Table.
         Args:
+            key_range: Optional key range to filter the scan.
+                If provided, the scan will only return rows within the key range.
+                Only one of key_range or key_table can be provided.
             key_table: a table of keys to "take" (including aux columns for cell-push-down).
                 If None, the scan will be executed without a key table.
         """
         # NOTE: Evaluates fully on Rust side which improved debuggability.
-        if DEV and not CI and key_table is None:
+        if DEV and not CI and key_table is None and key_range is None:
             rb = self.core.to_record_batch()
             return pa.Table.from_batches([rb])
-        return self.to_record_batches(key_table=key_table).read_all()
+        return self.to_record_batches(key_range=key_range, key_table=key_table).read_all()
     def to_dask(self) -> "dd.DataFrame":
         """Read into a Dask DataFrame.
         Requires the `dask` package to be installed.
+        IMPORTANT: Dask execution has some limitations, e.g. UDFs are not currently supported. These limitations
+        usually manifest as serialization errors when Dask workers attempt to serialize the state. If you are
+        encountering such issues, please reach out to the support for assistance.
         """
         import dask.dataframe as dd
-        import pandas as pd
-        def _read_shard(shard: Shard) -> pd.DataFrame:
-            # TODO(ngates): we need a way to preserve the existing asofs?
-            raise NotImplementedError()
-        # Fetch a set of partition ranges
+        _read_shard = partial(
+            _read_shard_task,
+            settings_json=self.spiral.config.to_json(),
+            state_json=self.core.plan_state().to_json(),
+        )
         return dd.from_map(_read_shard, self.shards())
-    def to_pandas(self) -> "pd.DataFrame":
+    def to_pandas(self, *, key_range: KeyRange | None = None) -> "pd.DataFrame":
         """Read into a Pandas DataFrame.
         Requires the `pandas` package to be installed.
         """
-        return self.to_table().to_pandas()
+        return self.to_table(key_range=key_range).to_pandas()
     def to_polars(self) -> "pl.DataFrame":
         """Read into a Polars DataFrame.
@@ -160,16 +189,18 @@ class Scan:
         Returns:
             SpiralDataLoader with shards partitioned for this rank.
-        """
-        # Example usage:
-        #
-        # Auto-detect from PyTorch distributed:
-        #   loader: SpiralDataLoader = scan.to_distributed_data_loader(batch_size=32)
-        #
-        # Explicit world configuration:
-        #   world = World(rank=0, world_size=4)
-        #   loader: SpiralDataLoader = scan.to_distributed_data_loader(world=world, batch_size=32)
+        Auto-detect from PyTorch distributed:
+        ```python
+        loader: SpiralDataLoader = scan.to_distributed_data_loader(batch_size=32)
+        ```
+        Explicit world configuration:
+        ```python
+        world = World(rank=0, world_size=4)
+        loader: SpiralDataLoader = scan.to_distributed_data_loader(world=world, batch_size=32)
+        ```
+        """
         from spiral.dataloader import SpiralDataLoader, World
         if world is None:
@@ -203,19 +234,21 @@ class Scan:
         Returns:
             New SpiralDataLoader instance configured to resume from the checkpoint.
+        Save checkpoint during training:
+        ```python
+        loader = scan.to_distributed_data_loader(batch_size=32, seed=42)
+        checkpoint = loader.state_dict()
+        ```
+        Resume later - uses same shards from checkpoint:
+        ```python
+        resumed_loader = scan.resume_data_loader(
+            checkpoint,
+            batch_size=32,
+            transform_fn=my_transform,
+        )
         """
-        # Example usage:
-        #
-        # Save checkpoint during training:
-        #   loader = scan.to_distributed_data_loader(batch_size=32, seed=42)
-        #   checkpoint = loader.state_dict()
-        #
-        # Resume later - uses same shards from checkpoint:
-        #   resumed_loader = scan.resume_data_loader(
-        #       checkpoint,
-        #       batch_size=32,
-        #       transform_fn=my_transform,
-        #   )
         from spiral.dataloader import SpiralDataLoader
         return SpiralDataLoader.from_state_dict(self, state, **kwargs)
@@ -283,3 +316,17 @@ class Scan:
         from spiral.debug.metrics import display_metrics
         display_metrics(self.metrics)
+# NOTE(marko): This function must be picklable!
+def _read_shard_task(shard: Shard, *, settings_json: str, state_json: str) -> "pd.DataFrame":
+    from spiral import Spiral
+    from spiral.core.table import ScanState
+    from spiral.settings import ClientSettings
+    settings = ClientSettings.from_json(settings_json)
+    sp = Spiral(config=settings)
+    state = ScanState.from_json(state_json)
+    task_scan = Scan(sp, sp.core.load_scan(state))
+    return task_scan.to_record_batches(key_range=shard.key_range, hide_progress_bar=True).read_all().to_pandas()

spiral/settings.py CHANGED Viewed

@@ -1,22 +1,16 @@
+"""Configuration module using Rust ClientSettings via PyO3.
+This module provides a simple settings() function that returns a cached
+ClientSettings instance loaded from ~/.spiral.toml and environment variables.
+"""
 import functools
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Annotated
 import typer
-from pydantic import Field, ValidatorFunctionWrapHandler, WrapValidator
-from pydantic_settings import (
-    BaseSettings,
-    InitSettingsSource,
-    PydanticBaseSettingsSource,
-    SettingsConfigDict,
-)
-from spiral.core.authn import Authn, DeviceCodeAuth, Token
-from spiral.core.client import Spiral
-if TYPE_CHECKING:
-    from spiral.api import SpiralAPI
+from spiral.core.config import ClientSettings
 DEV = "PYTEST_VERSION" in os.environ or bool(os.environ.get("SPIRAL_DEV", None))
 CI = "GITHUB_ACTIONS" in os.environ
@@ -27,88 +21,16 @@ LOG_DIR = APP_DIR / "logs"
 PACKAGE_NAME = "pyspiral"
-def validate_token(v, handler: ValidatorFunctionWrapHandler):
-    if isinstance(v, str):
-        return Token(v)
-    else:
-        raise ValueError("Token value must be a string")
-TokenType = Annotated[Token, WrapValidator(validate_token)]
-class SpiralDBSettings(BaseSettings):
-    model_config = SettingsConfigDict(frozen=True)
-    host: str = "localhost" if DEV else "api.spiraldb.com"
-    port: int = 4279 if DEV else 443
-    ssl: bool = not DEV
-    token: TokenType | None = None
-    @property
-    def uri(self) -> str:
-        return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
-class SpfsSettings(BaseSettings):
-    model_config = SettingsConfigDict(frozen=True)
-    host: str = "localhost" if DEV else "spfs.spiraldb.dev"
-    port: int = 4295 if DEV else 443
-    ssl: bool = not DEV
-    @property
-    def uri(self) -> str:
-        return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
-class Settings(BaseSettings):
-    model_config = SettingsConfigDict(
-        env_nested_delimiter="__",
-        env_prefix="SPIRAL__",
-        frozen=True,
-    )
-    spiraldb: SpiralDBSettings = Field(default_factory=SpiralDBSettings)
-    spfs: SpfsSettings = Field(default_factory=SpfsSettings)
-    file_format: str = Field(default="vortex")
-    @functools.cached_property
-    def api(self) -> "SpiralAPI":
-        from spiral.api import SpiralAPI
-        return SpiralAPI(self.authn, base_url=self.spiraldb.uri)
-    @functools.cached_property
-    def core(self) -> Spiral:
-        return Spiral(
-            api_url=self.spiraldb.uri,
-            spfs_url=self.spfs.uri,
-            authn=self.authn,
-        )
-    @functools.cached_property
-    def authn(self):
-        if self.spiraldb.token:
-            return Authn.from_token(self.spiraldb.token)
-        return Authn.from_fallback(self.spiraldb.uri)
-    @functools.cached_property
-    def device_code_auth(self) -> DeviceCodeAuth:
-        return DeviceCodeAuth.default()
-    @classmethod
-    def settings_customise_sources(
-        cls,
-        settings_cls: type[BaseSettings],
-        env_settings: PydanticBaseSettingsSource,
-        dotenv_settings: PydanticBaseSettingsSource,
-        init_settings: InitSettingsSource,
-        **kwargs,
-    ) -> tuple[PydanticBaseSettingsSource, ...]:
-        return env_settings, dotenv_settings, init_settings
 @functools.cache
-def settings() -> Settings:
-    return Settings()
+def settings() -> ClientSettings:
+    """Get the global ClientSettings instance.
+    Configuration is loaded with the following priority (highest to lowest):
+    1. Environment variables (SPIRAL__*)
+    2. Config file (~/.spiral.toml)
+    3. Default values
+    Returns:
+        ClientSettings: The global configuration instance
+    """
+    return ClientSettings.load()

spiral/streaming_/stream.py CHANGED Viewed

@@ -101,7 +101,7 @@ class SpiralStream:
             return 0
         # Prepare the shard, writing it to disk.
-        self._sp._ops().prepare_shard(
+        self._sp.internal.prepare_shard(
             shard_path, self._scan.core, shard.shard, row_block_size=self._shard_row_block_size
         )