PyPI - pyspiral - Versions diffs - 0.4.0__pp310-pypy310_pp73-macosx_10_12_x86_64.whl - Mend

pyspiral 0.4.0__pp310-pypy310_pp73-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

pyspiral-0.4.0.dist-info/METADATA +46 -0
pyspiral-0.4.0.dist-info/RECORD +98 -0
pyspiral-0.4.0.dist-info/WHEEL +4 -0
pyspiral-0.4.0.dist-info/entry_points.txt +2 -0
spiral/__init__.py +10 -0
spiral/_lib.pypy310-pp73-darwin.so +0 -0
spiral/adbc.py +393 -0
spiral/api/__init__.py +64 -0
spiral/api/admin.py +15 -0
spiral/api/client.py +160 -0
spiral/api/filesystems.py +153 -0
spiral/api/organizations.py +77 -0
spiral/api/projects.py +197 -0
spiral/api/telemetry.py +19 -0
spiral/api/types.py +20 -0
spiral/api/workloads.py +52 -0
spiral/arrow_.py +221 -0
spiral/cli/__init__.py +79 -0
spiral/cli/__main__.py +4 -0
spiral/cli/admin.py +16 -0
spiral/cli/app.py +65 -0
spiral/cli/console.py +95 -0
spiral/cli/fs.py +112 -0
spiral/cli/iceberg/__init__.py +7 -0
spiral/cli/iceberg/namespaces.py +47 -0
spiral/cli/iceberg/tables.py +60 -0
spiral/cli/indexes/__init__.py +19 -0
spiral/cli/login.py +22 -0
spiral/cli/orgs.py +90 -0
spiral/cli/printer.py +53 -0
spiral/cli/projects.py +136 -0
spiral/cli/state.py +5 -0
spiral/cli/tables/__init__.py +121 -0
spiral/cli/telemetry.py +18 -0
spiral/cli/types.py +51 -0
spiral/cli/workloads.py +59 -0
spiral/client.py +79 -0
spiral/core/__init__.pyi +0 -0
spiral/core/client/__init__.pyi +117 -0
spiral/core/index/__init__.pyi +15 -0
spiral/core/table/__init__.pyi +108 -0
spiral/core/table/manifests/__init__.pyi +35 -0
spiral/core/table/metastore/__init__.pyi +62 -0
spiral/core/table/spec/__init__.pyi +214 -0
spiral/datetime_.py +27 -0
spiral/expressions/__init__.py +245 -0
spiral/expressions/base.py +149 -0
spiral/expressions/http.py +86 -0
spiral/expressions/io.py +100 -0
spiral/expressions/list_.py +68 -0
spiral/expressions/mp4.py +62 -0
spiral/expressions/png.py +18 -0
spiral/expressions/qoi.py +18 -0
spiral/expressions/refs.py +58 -0
spiral/expressions/str_.py +39 -0
spiral/expressions/struct.py +59 -0
spiral/expressions/text.py +62 -0
spiral/expressions/tiff.py +223 -0
spiral/expressions/udf.py +46 -0
spiral/grpc_.py +32 -0
spiral/iceberg/__init__.py +3 -0
spiral/iceberg/client.py +33 -0
spiral/indexes/__init__.py +5 -0
spiral/indexes/client.py +137 -0
spiral/indexes/index.py +34 -0
spiral/indexes/scan.py +22 -0
spiral/project.py +46 -0
spiral/protogen/_/__init__.py +0 -0
spiral/protogen/_/arrow/__init__.py +0 -0
spiral/protogen/_/arrow/flight/__init__.py +0 -0
spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1990 -0
spiral/protogen/_/scandal/__init__.py +178 -0
spiral/protogen/_/spiral/__init__.py +0 -0
spiral/protogen/_/spiral/table/__init__.py +22 -0
spiral/protogen/_/substrait/__init__.py +3399 -0
spiral/protogen/_/substrait/extensions/__init__.py +115 -0
spiral/protogen/__init__.py +0 -0
spiral/protogen/substrait/__init__.py +3399 -0
spiral/protogen/substrait/extensions/__init__.py +115 -0
spiral/protogen/util.py +41 -0
spiral/py.typed +0 -0
spiral/server.py +17 -0
spiral/settings.py +101 -0
spiral/substrait_.py +279 -0
spiral/tables/__init__.py +12 -0
spiral/tables/client.py +130 -0
spiral/tables/dataset.py +250 -0
spiral/tables/debug/__init__.py +0 -0
spiral/tables/debug/manifests.py +70 -0
spiral/tables/debug/metrics.py +56 -0
spiral/tables/debug/scan.py +248 -0
spiral/tables/maintenance.py +12 -0
spiral/tables/scan.py +193 -0
spiral/tables/snapshot.py +78 -0
spiral/tables/table.py +157 -0
spiral/tables/transaction.py +52 -0
spiral/types_.py +6 -0

spiral/expressions/mp4.py ADDED Viewed

@@ -0,0 +1,62 @@
+import pyarrow as pa
+from spiral.expressions.base import Expr, ExprLike
+_MP4_RES_DTYPE: pa.DataType = pa.struct(
+    [
+        pa.field("pixels", pa.large_binary()),
+        pa.field("height", pa.uint32()),
+        pa.field("width", pa.uint32()),
+        pa.field("frames", pa.uint32()),
+    ]
+)
+# TODO(marko): Support optional range and crop.
+#   IMPORTANT: Frames is currently broken and defaults to full.
+def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
+    """
+    Read referenced cell in a `MP4` format. Requires `ffmpeg`.
+    Args:
+        expr: The referenced `Mp4` bytes.
+            A str is assumed to be the `se.keyed` expression.
+        frames: The range of frames to read. Each element must be a list of two uint32,
+            frame start and frame end, or null / empty list to read all frames.
+            A str is assumed to be the `se.keyed` expression.
+        crop: The crop of the frames to read. Each element must be a list of four uint32,
+            x, y, width, height or null / empty list to read full frames.
+            A str is assumed to be the `se.keyed` expression.
+    Returns:
+        An array where each element is a decoded cropped video with fields:
+            pixels: RGB8 bytes, frames * width * height * 3.
+            width: Width of the image with type `pa.uint32()`.
+            height: Height of the image with type `pa.uint32()`.
+            frames: Number of frames with type `pa.uint32()`.
+    """
+    from spiral import _lib
+    from spiral.expressions import keyed, lift
+    if isinstance(expr, str):
+        expr = keyed(
+            expr,
+            pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
+        )
+    if isinstance(frames, str):
+        frames = keyed(frames, pa.list_(pa.uint32()))
+    if isinstance(crop, str):
+        crop = keyed(crop, pa.list_(pa.uint32()))
+    expr = lift(expr)
+    frames = lift(frames)
+    crop = lift(crop)
+    return Expr(
+        _lib.expr.video.read(
+            expr.__expr__,
+            frames.__expr__,
+            crop.__expr__,
+            format="mp4",
+        )
+    )

spiral/expressions/png.py ADDED Viewed

@@ -0,0 +1,18 @@
+from spiral.expressions.base import Expr, ExprLike
+def encode(expr: ExprLike) -> Expr:
+    """Encode the given expression as a PNG image.
+    Args:
+        expr: The expression to encode.
+            Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
+    Returns:
+        The encoded PNG images.
+    """
+    from spiral import _lib
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return Expr(_lib.expr.img.encode(expr.__expr__, format="png"))

spiral/expressions/qoi.py ADDED Viewed

@@ -0,0 +1,18 @@
+from spiral.expressions.base import Expr, ExprLike
+def encode(expr: ExprLike) -> Expr:
+    """Encode the given expression as a QOI image.
+    Args:
+        expr: The expression to encode.
+            Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
+    Returns:
+        The encoded QOI images.
+    """
+    from spiral import _lib
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return Expr(_lib.expr.img.encode(expr.__expr__, format="qoi"))

spiral/expressions/refs.py ADDED Viewed

@@ -0,0 +1,58 @@
+import pyarrow as pa
+from spiral.expressions.base import Expr, ExprLike
+def ref(expr: ExprLike, field: str | None = None) -> Expr:
+    """Store binary values as references. This expression can only be used on write.
+    It is often better to store large cell values, such as bytes columns, that aren't used in filter expressions as
+    references. This enables more efficient scan pruning. Many of the Spiral's cell pushdown expressions work
+    over references.
+    Args:
+        expr: The expression to store as a reference.
+        field: If the expr evaluates into struct, the field name of that struct that should be referenced.
+            If `None`, the expr must evaluate into a type that supports referencing.
+    """
+    from spiral import _lib
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return Expr(_lib.expr.refs.ref(expr.__expr__, field))
+def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
+    """De-reference referenced values.
+    See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
+    column back into their original form, e.g. binary.
+    Args:
+        expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
+        field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
+            If `None`, the expr must evaluate into a reference type.
+    """
+    from spiral import _lib
+    from spiral.expressions import keyed, lift
+    if isinstance(expr, str):
+        expr = keyed(
+            expr,
+            pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
+        )
+    expr = lift(expr)
+    return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
+def nbytes(expr: ExprLike) -> Expr:
+    """Return the number of bytes in a reference.
+    Args:
+        expr: The ref expression to get the number of bytes from.
+    """
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return expr["__ref__"]["end"] - expr["__ref__"]["begin"]

spiral/expressions/str_.py ADDED Viewed

@@ -0,0 +1,39 @@
+import pyarrow as pa
+import pyarrow.compute as pc
+import re2 as re
+from spiral import _lib
+from spiral.expressions.base import Expr, ExprLike
+# TODO(ngates): we can add a symmetric "ascii" expression namespace in the future if
+#  the performance is required.
+def substr(expr: ExprLike = None, *, begin: int = 0, end: int | None = None) -> Expr:
+    """Slice a string.
+    Args:
+        expr: The string expression to slice.
+        begin: The starting index of the slice.
+        end: The ending index of the slice.
+    """
+    from spiral import expressions as se
+    expr = se.lift(expr)
+    return Expr(_lib.spql.str.substr(expr.__expr__, begin=begin, end=end))
+def extract_regex(pattern: str, *, strings: ExprLike) -> Expr:
+    # Extract the first occurrence of a regex pattern from a string.
+    raise NotImplementedError
+def _extract_regex(arg: pa.Array | pa.Scalar, pattern: str) -> pa.Array | pa.Scalar:
+    # Compute the return type based on the regex groups
+    m = re.compile(pattern)
+    dtype = pa.struct([pa.field(k, type=pa.string()) for k in m.groupindex.keys()])
+    if pa.types.is_string(arg.type):
+        return pc.extract_regex(arg, pattern=pattern).cast(dtype)
+    raise TypeError("Input argument does not have the expected type")

spiral/expressions/struct.py ADDED Viewed

@@ -0,0 +1,59 @@
+from spiral import _lib
+from spiral.expressions.base import Expr, ExprLike
+def getitem(expr: ExprLike, field: str) -> Expr:
+    """Get field from a struct.
+    Args:
+        expr: The struct expression to get the field from.
+        field: The field to get. Dot-separated string is supported to access nested fields.
+    """
+    from spiral import expressions as se
+    expr = se.lift(expr)
+    return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
+def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
+    """Assemble a new struct from the given named fields.
+    Args:
+        fields: A dictionary of field names to expressions. The field names will be used as the struct field names.
+    """
+    from spiral import expressions as se
+    return Expr(
+        _lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
+    )
+def merge(*structs: "ExprLike") -> Expr:
+    """Merge fields from the given structs into a single struct.
+    Args:
+        *structs: Each expression must evaluate to a struct.
+    Returns:
+        A single struct containing all the fields from the input structs.
+        If a field is present in multiple structs, the value from the last struct is used.
+    """
+    from spiral import expressions as se
+    if len(structs) == 1:
+        return se.lift(structs[0])
+    return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
+def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
+    """Select fields from a struct.
+    Args:
+        expr: The struct-like expression to select fields from.
+        names: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
+        exclude: List of field names to exclude from result. Exactly one of `names` or `exclude` must be provided.
+    """
+    from spiral import expressions as se
+    expr = se.lift(expr)
+    return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))

spiral/expressions/text.py ADDED Viewed

@@ -0,0 +1,62 @@
+from spiral.expressions.base import Expr, ExprLike
+def field(expr: ExprLike, field_name: str | None = None, tokenizer: str | None = None) -> Expr:
+    """Configure a column for text indexing.
+    Args:
+        expr: An input column. The expression must either evaluate to a UTF-8,
+            or, if a `field_name` is provided, to a struct with a field of that name.
+        field_name: If provided, the expression must evaluate to a struct with a field of that name.
+            The given field will be indexed.
+        tokenizer: If provided, the text will be tokenized using the given tokenizer.
+    Returns:
+        An expression that can be used to construct a text index.
+    """
+    from spiral import _lib
+    from spiral.expressions import getitem, lift, merge, pack
+    expr = lift(expr)
+    if field_name is None:
+        return Expr(_lib.expr.text.field(expr.__expr__, tokenizer))
+    child = _lib.expr.text.field(getitem(expr, field_name).__expr__)
+    return merge(
+        expr,
+        pack({field_name: child}),
+    )
+def find(expr: ExprLike, term: str) -> Expr:
+    """Search for a term in the text.
+    Args:
+        expr: An index field.
+        term: The term to search for.
+    Returns:
+        An expression that can be used in ranking for text search.
+    """
+    from spiral import _lib
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return Expr(_lib.expr.text.find(expr.__expr__, term))
+def boost(expr: ExprLike, factor: float) -> Expr:
+    """Boost the relevance of a ranking expression.
+    Args:
+        expr: Rank by expression.
+        factor: The factor by which to boost the relevance.
+    Returns:
+        An expression that can be used in ranking for text search.
+    """
+    from spiral import _lib
+    from spiral.expressions import lift
+    expr = lift(expr)
+    return Expr(_lib.expr.text.boost(expr.__expr__, factor))

spiral/expressions/tiff.py ADDED Viewed

@@ -0,0 +1,223 @@
+import numpy as np
+import pyarrow as pa
+from spiral.expressions.base import Expr, ExprLike
+from spiral.expressions.udf import RefUDF
+_TIFF_RES_DTYPE: pa.DataType = pa.struct(
+    [
+        pa.field("pixels", pa.large_binary()),
+        pa.field("height", pa.uint32()),
+        pa.field("width", pa.uint32()),
+        pa.field("channels", pa.uint8()),
+        pa.field("channel_bit_depth", pa.uint8()),
+    ]
+)
+def read(
+    expr: ExprLike,
+    indexes: ExprLike | int | None = None,
+    window: ExprLike | tuple[tuple[int, int], tuple[int, int]] | None = None,
+    boundless: ExprLike | bool | None = None,
+) -> Expr:
+    """
+    Read referenced cell in a `TIFF` format. Requires `rasterio` to be installed.
+    Args:
+        expr: The referenced `TIFF` bytes.
+        indexes: The band indexes to read. Defaults to all.
+        window: The window to read. In format (row_range_tuple, col_range_tuple). Defaults to full window.
+        boundless: If `True`, windows that extend beyond the dataset's extent
+            are permitted and partially or completely filled arrays will be returned as appropriate.
+    Returns:
+        An array where each element is a decoded image with fields:
+            pixels: bytes of shape (channels, width, height).
+            width: Width of the image with type `pa.uint32()`.
+            height: Height of the image with type `pa.uint32()`.
+            channels: Number of channels of the image with type `pa.uint8()`.
+                If `indexes` is not None, this is the length of `indexes` or 1 if `indexes` is an int.
+            channel_bit_depth: Bit depth of the channel with type `pa.uint8()`.
+    """
+    try:
+        import rasterio  # noqa: F401
+    except ImportError:
+        raise ImportError("`rasterio` is required for tiff.read")
+    return TiffReadUDF()(expr, indexes, window, boundless)
+def select(
+    expr: ExprLike,
+    shape: ExprLike | dict,
+    indexes: ExprLike | int | None = None,
+) -> Expr:
+    """
+    Select the shape out of the referenced cell in a `TIFF` format. Requires `rasterio` to be installed.
+    Args:
+        expr: The referenced `TIFF` bytes.
+        shape: [GeoJSON-like](https://geojson.org/) shape.
+        indexes: The band indexes to read. Defaults to all.
+    Returns:
+        An array where each element is a decoded image with fields:
+            pixels: bytes of shape (len(indexes) or 1, width, height).
+            width: Width of the image with type `pa.uint32()`.
+            height: Height of the image with type `pa.uint32()`.
+            channels: Number of channels of the image with type `pa.uint8()`.
+                If `indexes` is not None, this is the length of `indexes` or 1 if `indexes` is an int.
+            channel_bit_depth: Bit depth of the channel with type `pa.uint8()`.
+    """
+    try:
+        import rasterio  # noqa: F401
+    except ImportError:
+        raise ImportError("`rasterio` is required for tiff.select")
+    return TiffSelectUDF()(expr, shape, indexes)
+class TiffReadUDF(RefUDF):
+    def __init__(self):
+        super().__init__("tiff.read")
+    def return_type(self, *input_types: pa.DataType) -> pa.DataType:
+        return _TIFF_RES_DTYPE
+    def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
+        try:
+            import rasterio
+        except ImportError:
+            raise ImportError("`rasterio` is required for tiff.read")
+        from rasterio.windows import Window
+        if len(input_args) != 4:
+            raise ValueError("tiff.read expects exactly 4 arguments: expr, indexes, window, boundless")
+        _, indexes, window, boundless = input_args
+        indexes = indexes[0].as_py()
+        if indexes is not None and not isinstance(indexes, int) and not isinstance(indexes, list):
+            raise ValueError(f"tiff.read expects indexes to be None or an int or a list, got {indexes}")
+        boundless = boundless[0].as_py()
+        if boundless is not None and not isinstance(boundless, bool):
+            raise ValueError(f"tiff.read expects boundless to be None or a bool, got {boundless}")
+        window = window[0].as_py()
+        if window is not None:
+            if len(window) != 2:
+                raise ValueError(f"tiff.read window invalid, got {window}")
+            window = Window.from_slices(slice(*window[0]), slice(*window[1]), boundless=boundless or False)
+        opener = _VsiOpener(fp)
+        with rasterio.open("ref", opener=opener) as src:
+            src: rasterio.DatasetReader
+            # TODO(marko): We know the size and dtype so we should be able to preallocate the result and read into it.
+            #   This matters more if we want to rewrite this function to work with multiple inputs at once, in which
+            #   case we should first consider using Rust GDAL bindings - I believe rasterio uses GDAL under the hood.
+            result: np.ndarray = src.read(indexes=indexes, window=window)
+            return _return_result(result, indexes)
+class TiffSelectUDF(RefUDF):
+    def __init__(self):
+        super().__init__("tiff.select")
+    def return_type(self, *input_types: pa.DataType) -> pa.DataType:
+        return _TIFF_RES_DTYPE
+    def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
+        try:
+            import rasterio
+        except ImportError:
+            raise ImportError("`rasterio` is required for tiff.select")
+        from rasterio.mask import raster_geometry_mask
+        if len(input_args) != 3:
+            raise ValueError("tiff.select expects exactly 3 arguments: expr, shape, indexes")
+        _, shape, indexes = input_args
+        shape = shape[0].as_py()
+        if shape is None:
+            raise ValueError("tiff.select expects shape to be a GeoJSON-like shape")
+        indexes = indexes[0].as_py()
+        if indexes is not None and not isinstance(indexes, int) and not isinstance(indexes, list):
+            raise ValueError(f"tiff.select expects indexes to be None or an int or a list, got {indexes}")
+        opener = _VsiOpener(fp)
+        with rasterio.open("ref", opener=opener) as src:
+            src: rasterio.DatasetReader
+            shape_mask, _, window = raster_geometry_mask(src, [shape], crop=True)
+            out_shape = (src.count,) + shape_mask.shape
+            result: np.ndarray = src.read(window=window, indexes=indexes, out_shape=out_shape, masked=True)
+            return _return_result(result, indexes)
+def _return_result(result: np.ndarray, indexes) -> pa.Array:
+    channels = result.shape[0]
+    if indexes is None:
+        pass
+    elif isinstance(indexes, int):
+        assert channels == 1, f"Expected 1 channel, got {channels}"
+    else:
+        assert channels == len(indexes), f"Expected {len(indexes)} channels, got {channels}"
+    if result.dtype == np.uint8:
+        channel_bit_depth = 8
+    elif result.dtype == np.uint16:
+        channel_bit_depth = 16
+    else:
+        raise ValueError(f"Unsupported bit width: {result.dtype}")
+    return pa.array(
+        [
+            {
+                "pixels": result.tobytes(),
+                "height": result.shape[1],
+                "width": result.shape[2],
+                "channels": channels,
+                "channel_bit_depth": channel_bit_depth,
+            }
+        ],
+        type=_TIFF_RES_DTYPE,
+    )
+class _VsiOpener:
+    """
+    VSI file opener which returns a constant file-like on open.
+    Must match https://rasterio.readthedocs.io/en/stable/topics/vsi.html#python-file-and-filesystem-openers spec but
+    only `open` is needed when going through rasterio.
+    """
+    def __init__(self, file_like):
+        self._file_like = file_like
+    def open(self, _path, mode):
+        if mode not in {"r", "rb"}:
+            raise ValueError(f"Unsupported mode: {mode}")
+        return self._file_like
+    def isdir(self, _):
+        return False
+    def isfile(self, _):
+        return False
+    def mtime(self, _):
+        return 0
+    def size(self, _):
+        return self._file_like.size()
+    def modified(self, _):
+        raise NotImplementedError

spiral/expressions/udf.py ADDED Viewed

@@ -0,0 +1,46 @@
+import abc
+import pyarrow as pa
+from spiral import _lib
+from spiral.expressions.base import Expr
+class BaseUDF:
+    def __init__(self, udf):
+        self._udf = udf
+    def __call__(self, *args) -> Expr:
+        """Create an expression that calls this UDF with the given arguments."""
+        from spiral import expressions as se
+        args = [se.lift(arg).__expr__ for arg in args]
+        return Expr(self._udf(args))
+    @abc.abstractmethod
+    def return_type(self, *input_types: pa.DataType) -> pa.DataType: ...
+class UDF(BaseUDF):
+    """A User-Defined Function (UDF)."""
+    def __init__(self, name: str):
+        super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke))
+    @abc.abstractmethod
+    def invoke(self, *input_args: pa.Array) -> pa.Array: ...
+class RefUDF(BaseUDF):
+    """A UDF over a single ref cell, and therefore can access the file object."""
+    def __init__(self, name: str):
+        super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
+    @abc.abstractmethod
+    def invoke(self, fp: _lib.FileObject, *input_args: pa.Array) -> pa.Array:
+        """Invoke the UDF with the given arguments.
+        NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
+        """
+        ...

spiral/grpc_.py ADDED Viewed

@@ -0,0 +1,32 @@
+from collections.abc import AsyncIterator, Awaitable, Callable
+from typing import TypeVar
+R = TypeVar("R")
+T = TypeVar("T")
+async def paged(stub_fn: Callable[[R], Awaitable[T]], request: R, page_size: int = None) -> AsyncIterator[T]:
+    """Page through a gRPC paged API.
+    Assumes fields exist as per https://cloud.google.com/apis/design/design_patterns#list_pagination
+    """
+    next_page_token: str | None = None
+    while True:
+        request.page_size = page_size
+        request.page_token = next_page_token
+        res = await stub_fn(request)
+        if not res.next_page_token:
+            # No more items
+            yield res
+            break
+        next_page_token = res.next_page_token
+        yield res
+async def paged_items(
+    stub_fn: Callable[[R], Awaitable[T]], request: R, collection_name: str, page_size: int = None
+) -> AsyncIterator:
+    async for page in paged(stub_fn, request, page_size=page_size):
+        for item in getattr(page, collection_name):
+            yield item

spiral/iceberg/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from spiral.iceberg.client import Iceberg
+__all__ = ["Iceberg"]

spiral/iceberg/client.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from pyiceberg.catalog import Catalog
+    from spiral.client import Spiral
+class Iceberg:
+    """
+    Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
+    Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
+    and ACID transactions to your warehouse.
+    """
+    def __init__(self, spiral: "Spiral", *, project_id: str | None = None):
+        self._spiral = spiral
+        self._project_id = project_id
+        self._api = self._spiral.config.api
+    def catalog(self) -> "Catalog":
+        """Open the Iceberg catalog."""
+        from pyiceberg.catalog import load_catalog
+        return load_catalog(
+            "default",
+            **{
+                "type": "rest",
+                "uri": self._spiral.config.spiraldb.uri + "/iceberg",
+                "token": self._spiral.config.authn.token().expose_secret(),
+            },
+        )

spiral/indexes/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from spiral.indexes.client import Indexes
+from spiral.indexes.index import TextIndex
+from spiral.indexes.scan import SearchScan
+__all__ = ["Indexes", "SearchScan", "TextIndex"]