pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
import datetime
|
2
|
+
from typing import TypeAlias
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral import _lib
|
7
|
+
|
8
|
+
NativeExpr: TypeAlias = _lib.spql.expr.Expr
|
9
|
+
|
10
|
+
|
11
|
+
class Expr:
|
12
|
+
"""Base class for Spiral expressions. All expressions support comparison and basic arithmetic operations."""
|
13
|
+
|
14
|
+
def __init__(self, native: NativeExpr) -> None:
|
15
|
+
if not isinstance(native, NativeExpr):
|
16
|
+
raise TypeError(f"Expected a native expression, got {type(native)}")
|
17
|
+
self._native = native
|
18
|
+
|
19
|
+
@property
|
20
|
+
def __expr__(self) -> NativeExpr:
|
21
|
+
return self._native
|
22
|
+
|
23
|
+
def __str__(self):
|
24
|
+
return str(self.__expr__)
|
25
|
+
|
26
|
+
def __repr__(self):
|
27
|
+
return repr(self.__expr__)
|
28
|
+
|
29
|
+
def __getitem__(self, item: str | int) -> "Expr":
|
30
|
+
"""
|
31
|
+
Get an item from a struct or list.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
item: The key or index to get.
|
35
|
+
If item is a string, it is assumed to be a field in a struct. Dot-separated string is supported
|
36
|
+
to access nested fields. If item is an integer, it is assumed to be an index in a list.
|
37
|
+
"""
|
38
|
+
from spiral import expressions as se
|
39
|
+
|
40
|
+
expr = self
|
41
|
+
|
42
|
+
if isinstance(item, int):
|
43
|
+
# Assume list and get an element.
|
44
|
+
expr = se.list_.element_at(expr, item)
|
45
|
+
else:
|
46
|
+
# Walk into the struct.
|
47
|
+
for part in item.split("."):
|
48
|
+
expr = se.getitem(expr, part)
|
49
|
+
|
50
|
+
return expr
|
51
|
+
|
52
|
+
def __eq__(self, other: "ExprLike") -> "Expr":
|
53
|
+
return self._binary("eq", other)
|
54
|
+
|
55
|
+
def __ne__(self, other: "ExprLike") -> "Expr":
|
56
|
+
return self._binary("neq", other)
|
57
|
+
|
58
|
+
def __lt__(self, other: "ExprLike") -> "Expr":
|
59
|
+
return self._binary("lt", other)
|
60
|
+
|
61
|
+
def __le__(self, other: "ExprLike") -> "Expr":
|
62
|
+
return self._binary("lte", other)
|
63
|
+
|
64
|
+
def __gt__(self, other: "ExprLike") -> "Expr":
|
65
|
+
return self._binary("gt", other)
|
66
|
+
|
67
|
+
def __ge__(self, other: "ExprLike") -> "Expr":
|
68
|
+
return self._binary("gte", other)
|
69
|
+
|
70
|
+
def __and__(self, other: "ExprLike") -> "Expr":
|
71
|
+
return self._binary("and", other)
|
72
|
+
|
73
|
+
def __or__(self, other: "ExprLike") -> "Expr":
|
74
|
+
return self._binary("or", other)
|
75
|
+
|
76
|
+
def __xor__(self, other: "ExprLike") -> "Expr":
|
77
|
+
raise NotImplementedError
|
78
|
+
|
79
|
+
def __add__(self, other: "ExprLike") -> "Expr":
|
80
|
+
return self._binary("add", other)
|
81
|
+
|
82
|
+
def __sub__(self, other: "ExprLike") -> "Expr":
|
83
|
+
return self._binary("sub", other)
|
84
|
+
|
85
|
+
def __mul__(self, other: "ExprLike") -> "Expr":
|
86
|
+
return self._binary("mul", other)
|
87
|
+
|
88
|
+
def __truediv__(self, other: "ExprLike") -> "Expr":
|
89
|
+
return self._binary("div", other)
|
90
|
+
|
91
|
+
def __mod__(self, other: "ExprLike") -> "Expr":
|
92
|
+
return self._binary("mod", other)
|
93
|
+
|
94
|
+
def __neg__(self):
|
95
|
+
return Expr(_lib.spql.expr.unary("neg", self.__expr__))
|
96
|
+
|
97
|
+
def in_(self, other: "ExprLike") -> "Expr":
|
98
|
+
from spiral import expressions as se
|
99
|
+
|
100
|
+
other = se.lift(other)
|
101
|
+
return Expr(_lib.spql.expr.list.contains(other.__expr__, self.__expr__))
|
102
|
+
|
103
|
+
def contains(self, other: "ExprLike") -> "Expr":
|
104
|
+
from spiral import expressions as se
|
105
|
+
|
106
|
+
return se.lift(other).in_(self)
|
107
|
+
|
108
|
+
def cast(self, dtype: pa.DataType) -> "Expr":
|
109
|
+
"""Cast the expression result to a different data type."""
|
110
|
+
return Expr(_lib.spql.expr.cast(self.__expr__, dtype))
|
111
|
+
|
112
|
+
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
113
|
+
"""Select fields from a struct-like expression.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
*paths: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
117
|
+
exclude: List of field names to exclude from result.
|
118
|
+
"""
|
119
|
+
from spiral import expressions as se
|
120
|
+
|
121
|
+
# If any of the paths contain nested fields, then we re-pack nested select statements.
|
122
|
+
if any("." in p for p in paths):
|
123
|
+
fields = {}
|
124
|
+
for p in paths:
|
125
|
+
if "." in p:
|
126
|
+
parent, child = p.split(".", 1)
|
127
|
+
fields[parent] = self[parent].select(child)
|
128
|
+
else:
|
129
|
+
fields[p] = self[p]
|
130
|
+
packed = se.pack(fields)
|
131
|
+
if exclude:
|
132
|
+
packed = packed.select(exclude=exclude)
|
133
|
+
return packed
|
134
|
+
|
135
|
+
if not paths:
|
136
|
+
return self
|
137
|
+
|
138
|
+
return se.select(self, names=list(paths), exclude=exclude)
|
139
|
+
|
140
|
+
def _binary(self, op: str, rhs: "ExprLike") -> "Expr":
|
141
|
+
"""Create a comparison expression."""
|
142
|
+
from spiral import expressions as se
|
143
|
+
|
144
|
+
rhs = se.lift(rhs)
|
145
|
+
return Expr(_lib.spql.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
|
+
|
147
|
+
|
148
|
+
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
149
|
+
ExprLike: TypeAlias = Expr | dict | ScalarLike
|
@@ -0,0 +1,86 @@
|
|
1
|
+
import hishel
|
2
|
+
import httpx
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
from spiral.expressions.base import Expr, ExprLike
|
6
|
+
from spiral.expressions.struct import pack
|
7
|
+
from spiral.expressions.udf import UDF
|
8
|
+
from spiral.settings import APP_DIR
|
9
|
+
|
10
|
+
|
11
|
+
def get(url: ExprLike, headers: ExprLike = None, force_cache: bool = False) -> Expr:
|
12
|
+
"""Submit a GET request to either a scalar of vector of URLs."""
|
13
|
+
to_pack = {"url": url}
|
14
|
+
if headers is not None:
|
15
|
+
to_pack["headers"] = headers
|
16
|
+
return HttpGet(force_cache)(pack(to_pack))
|
17
|
+
|
18
|
+
|
19
|
+
class HttpGet(UDF):
|
20
|
+
RES_DTYPE: pa.DataType = pa.struct(
|
21
|
+
[
|
22
|
+
pa.field("bytes", pa.large_binary()),
|
23
|
+
pa.field("status", pa.int32()),
|
24
|
+
pa.field("headers", pa.map_(pa.string(), pa.string())),
|
25
|
+
]
|
26
|
+
)
|
27
|
+
|
28
|
+
def __init__(self, force_cache: bool = False):
|
29
|
+
super().__init__("http.get")
|
30
|
+
self._force_cache = force_cache
|
31
|
+
|
32
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
33
|
+
return HttpGet.RES_DTYPE
|
34
|
+
|
35
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
36
|
+
if len(input_args) != 1:
|
37
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
38
|
+
result = _http_request(input_args[0], self._force_cache)
|
39
|
+
if isinstance(result, pa.ChunkedArray):
|
40
|
+
result = result.combine_chunks()
|
41
|
+
return result
|
42
|
+
|
43
|
+
|
44
|
+
def _http_request(arg: pa.Array, force_cache: bool) -> pa.Array:
|
45
|
+
client = _HttpClient()
|
46
|
+
|
47
|
+
if isinstance(arg, pa.StructArray):
|
48
|
+
# We assume a vector of requests, but with potentially many arguments
|
49
|
+
return pa.array(
|
50
|
+
[
|
51
|
+
_response_dict(
|
52
|
+
client.request(
|
53
|
+
req.get("method", "GET").upper(),
|
54
|
+
req["url"],
|
55
|
+
headers=req.get("headers", {}),
|
56
|
+
extensions={"force_cache": force_cache},
|
57
|
+
)
|
58
|
+
)
|
59
|
+
for req in arg.to_pylist()
|
60
|
+
],
|
61
|
+
type=HttpGet.RES_DTYPE,
|
62
|
+
)
|
63
|
+
|
64
|
+
raise TypeError(f"Unsupported argument: {arg} ({type(arg)})")
|
65
|
+
|
66
|
+
|
67
|
+
def _response_dict(response: httpx.Response) -> dict:
|
68
|
+
if response.status_code != 200:
|
69
|
+
raise ValueError(f"Request failed with status {response.status_code}")
|
70
|
+
return {
|
71
|
+
"bytes": response.read(),
|
72
|
+
"status": response.status_code,
|
73
|
+
"headers": dict(response.headers),
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
class _HttpClient(hishel.CacheClient):
|
78
|
+
_instance: "_HttpClient" = None
|
79
|
+
|
80
|
+
def __new__(cls, *args, **kwargs):
|
81
|
+
if not cls._instance:
|
82
|
+
cls._instance = super().__new__(cls)
|
83
|
+
return cls._instance
|
84
|
+
|
85
|
+
def __init__(self):
|
86
|
+
super().__init__(storage=hishel.FileStorage(base_path=APP_DIR / "http.cache", ttl=3600))
|
spiral/expressions/io.py
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
import tarfile
|
2
|
+
from io import BytesIO
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral.expressions.base import Expr, ExprLike
|
7
|
+
from spiral.expressions.struct import pack
|
8
|
+
from spiral.expressions.udf import UDF
|
9
|
+
|
10
|
+
|
11
|
+
def read_file(path: ExprLike) -> Expr:
|
12
|
+
"""
|
13
|
+
Read file path(s) from disk into a struct with a single field "bytes" containing the file contents.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
path: Expression evaluating to an array of strings representing local disk paths.
|
17
|
+
"""
|
18
|
+
to_pack = {"path": path}
|
19
|
+
return FileRead()(pack(to_pack))
|
20
|
+
|
21
|
+
|
22
|
+
class FileRead(UDF):
|
23
|
+
RES_DTYPE: pa.DataType = pa.struct(
|
24
|
+
[
|
25
|
+
pa.field("bytes", pa.large_binary()),
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
super().__init__("file.read")
|
31
|
+
|
32
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
33
|
+
return FileRead.RES_DTYPE
|
34
|
+
|
35
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
36
|
+
if len(input_args) != 1:
|
37
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
38
|
+
arg = input_args[0]
|
39
|
+
|
40
|
+
res = []
|
41
|
+
for req in arg:
|
42
|
+
with open(req["path"].as_py(), "rb") as f:
|
43
|
+
res.append({"bytes": f.read()})
|
44
|
+
|
45
|
+
return pa.array(res, type=FileRead.RES_DTYPE)
|
46
|
+
|
47
|
+
|
48
|
+
def read_tar(path: ExprLike = None, bytes_: ExprLike = None) -> "Expr":
|
49
|
+
# Untar a vector of paths / byte arrays representing tarballs.
|
50
|
+
if path is None and bytes_ is None:
|
51
|
+
raise ValueError("Expected either path or bytes_ to be provided")
|
52
|
+
to_pack = {}
|
53
|
+
if path is not None:
|
54
|
+
to_pack["path"] = path
|
55
|
+
if bytes_ is not None:
|
56
|
+
to_pack["bytes"] = bytes_
|
57
|
+
return TarRead()(pack(to_pack))
|
58
|
+
|
59
|
+
|
60
|
+
class TarRead(UDF):
|
61
|
+
RES_DTYPE = pa.list_(
|
62
|
+
pa.struct(
|
63
|
+
[
|
64
|
+
pa.field("name", pa.string()),
|
65
|
+
pa.field("bytes", pa.large_binary()),
|
66
|
+
]
|
67
|
+
)
|
68
|
+
)
|
69
|
+
|
70
|
+
def __init__(self):
|
71
|
+
super().__init__("tar.read")
|
72
|
+
|
73
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
74
|
+
return TarRead.RES_DTYPE
|
75
|
+
|
76
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
77
|
+
if len(input_args) != 1:
|
78
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
79
|
+
arg = input_args[0]
|
80
|
+
|
81
|
+
res = []
|
82
|
+
for req in arg:
|
83
|
+
if "path" in req:
|
84
|
+
kwargs = {"name": req["path"].as_py()}
|
85
|
+
elif "bytes" in req:
|
86
|
+
kwargs = {"fileobj": BytesIO(req["bytes"].as_py())}
|
87
|
+
else:
|
88
|
+
raise ValueError("Expected path or bytes_ to be provided")
|
89
|
+
|
90
|
+
files = []
|
91
|
+
with tarfile.open(**kwargs) as f:
|
92
|
+
for m in f.getmembers():
|
93
|
+
m: tarfile.TarInfo
|
94
|
+
if m.type == tarfile.DIRTYPE:
|
95
|
+
continue
|
96
|
+
# TODO(ngates): skip other types too maybe? Why are we even skipping directories?
|
97
|
+
files.append({"name": m.name, "bytes": f.extractfile(m).read()})
|
98
|
+
res.append(files)
|
99
|
+
|
100
|
+
return pa.array(res, type=TarRead.RES_DTYPE)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def in_(expr: ExprLike, values: ExprLike) -> Expr:
|
5
|
+
"""Check if a value is in a list.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The value to check.
|
9
|
+
values: The list array expression to check against.
|
10
|
+
"""
|
11
|
+
# `se.list.in_(Array[2, 4], Array[[1, 2], [1, 2]]) -> Array[True, False]`
|
12
|
+
from spiral.expressions import lift
|
13
|
+
|
14
|
+
expr = lift(expr)
|
15
|
+
return expr.in_(values)
|
16
|
+
|
17
|
+
|
18
|
+
def element_at(expr: ExprLike, index: ExprLike) -> Expr:
|
19
|
+
"""Get the element at the given index.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
expr: The list array expression.
|
23
|
+
index: The index to get.
|
24
|
+
"""
|
25
|
+
# e.g. `se.list.element_at([1, 2, 3], 1) -> 2`
|
26
|
+
...
|
27
|
+
from spiral import _lib
|
28
|
+
from spiral.expressions import lift
|
29
|
+
|
30
|
+
expr = lift(expr)
|
31
|
+
index = lift(index)
|
32
|
+
return Expr(_lib.spql.expr.list.element_at(expr.__expr__, index.__expr__))
|
33
|
+
|
34
|
+
|
35
|
+
def of(*expr: ExprLike) -> Expr:
|
36
|
+
# Creates an array or scalar list from a series of expressions, all values must be of the same type.
|
37
|
+
# The expressions must all also have the same length (1 for scalars).
|
38
|
+
#
|
39
|
+
# e.g. `se.list.of(1+3, 2, 3) -> [4, 2, 3]`
|
40
|
+
...
|
41
|
+
|
42
|
+
|
43
|
+
def zip(*lists: ExprLike) -> Expr:
|
44
|
+
# Merge the given lists, with duplicates.
|
45
|
+
#
|
46
|
+
# e.g. `se.list.merge([1, 2], [3, 4]) -> [(1, 2), (3, 4)]`
|
47
|
+
...
|
48
|
+
|
49
|
+
|
50
|
+
def concat(*lists: ExprLike) -> Expr:
|
51
|
+
# Concatenate the given lists. The types of all the lists must be the same.
|
52
|
+
#
|
53
|
+
# e.g. `se.list.concat([1, 2], [3, 4]) -> [1, 2, 3, 4]`
|
54
|
+
...
|
55
|
+
|
56
|
+
|
57
|
+
def slice_(expr: ExprLike, start: int | None = None, stop: int | None = None) -> Expr:
|
58
|
+
# Slice a list.
|
59
|
+
#
|
60
|
+
# e.g. `se.list.slice_([0, 1, 2], slice(0,2)) -> [0, 1]`
|
61
|
+
...
|
62
|
+
|
63
|
+
|
64
|
+
def length(expr: ExprLike) -> Expr:
|
65
|
+
# Get the length of a list.
|
66
|
+
#
|
67
|
+
# e.g. `se.list.length([1, 2, 3]) -> 3`
|
68
|
+
...
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from spiral.expressions.base import Expr, ExprLike
|
4
|
+
|
5
|
+
if TYPE_CHECKING:
|
6
|
+
from spiral import Table
|
7
|
+
|
8
|
+
|
9
|
+
def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
10
|
+
"""Store binary values as references. This expression can only be used on write.
|
11
|
+
|
12
|
+
It is often better to store large cell values, such as bytes columns, that aren't used in filter expressions as
|
13
|
+
references. This enables more efficient scan pruning. Many of the Spiral's cell pushdown expressions work
|
14
|
+
over references.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
expr: The expression to store as a reference.
|
18
|
+
field: If the expr evaluates into struct, the field name of that struct that should be referenced.
|
19
|
+
If `None`, the expr must evaluate into a type that supports referencing.
|
20
|
+
"""
|
21
|
+
from spiral import _lib
|
22
|
+
from spiral.expressions import lift
|
23
|
+
|
24
|
+
expr = lift(expr)
|
25
|
+
return Expr(_lib.spql.expr.ref(expr.__expr__, field))
|
26
|
+
|
27
|
+
|
28
|
+
def deref(expr: ExprLike, field: str | None = None, table: "Table" = None) -> Expr:
|
29
|
+
"""De-reference referenced values.
|
30
|
+
|
31
|
+
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
32
|
+
column back into their original form, e.g. binary.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
expr: The expression to de-reference.
|
36
|
+
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
37
|
+
If `None`, the expr must evaluate into a reference type.
|
38
|
+
table (optional): The table to de-reference from, if not available in input expression.
|
39
|
+
"""
|
40
|
+
from spiral import _lib
|
41
|
+
from spiral.expressions import lift
|
42
|
+
|
43
|
+
expr = lift(expr)
|
44
|
+
return Expr(_lib.spql.expr.deref(expr.__expr__, field, table._table if table is not None else None))
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
import pyarrow.compute as pc
|
3
|
+
import re2 as re
|
4
|
+
|
5
|
+
from spiral import _lib
|
6
|
+
from spiral.expressions.base import Expr, ExprLike
|
7
|
+
|
8
|
+
# TODO(ngates): we can add a symmetric "ascii" expression namespace in the future if
|
9
|
+
# the performance is required.
|
10
|
+
|
11
|
+
|
12
|
+
def substr(expr: ExprLike = None, *, begin: int = 0, end: int | None = None) -> Expr:
|
13
|
+
"""Slice a string.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
expr: The string expression to slice.
|
17
|
+
begin: The starting index of the slice.
|
18
|
+
end: The ending index of the slice.
|
19
|
+
"""
|
20
|
+
from spiral import expressions as se
|
21
|
+
|
22
|
+
expr = se.lift(expr)
|
23
|
+
return Expr(_lib.spql.str.substr(expr.__expr__, begin=begin, end=end))
|
24
|
+
|
25
|
+
|
26
|
+
def extract_regex(pattern: str, *, strings: ExprLike) -> Expr:
|
27
|
+
# Extract the first occurrence of a regex pattern from a string.
|
28
|
+
raise NotImplementedError
|
29
|
+
|
30
|
+
|
31
|
+
def _extract_regex(arg: pa.Array | pa.Scalar, pattern: str) -> pa.Array | pa.Scalar:
|
32
|
+
# Compute the return type based on the regex groups
|
33
|
+
m = re.compile(pattern)
|
34
|
+
dtype = pa.struct([pa.field(k, type=pa.string()) for k in m.groupindex.keys()])
|
35
|
+
|
36
|
+
if pa.types.is_string(arg.type):
|
37
|
+
return pc.extract_regex(arg, pattern=pattern).cast(dtype)
|
38
|
+
|
39
|
+
raise TypeError("Input argument does not have the expected type")
|
@@ -0,0 +1,57 @@
|
|
1
|
+
from spiral import _lib
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
3
|
+
|
4
|
+
|
5
|
+
def getitem(expr: ExprLike, field: str) -> Expr:
|
6
|
+
"""Get field from a struct.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
expr: The struct expression to get the field from.
|
10
|
+
field: The field to get. Dot-separated string is supported to access nested fields.
|
11
|
+
"""
|
12
|
+
from spiral import expressions as se
|
13
|
+
|
14
|
+
expr = se.lift(expr)
|
15
|
+
return Expr(_lib.spql.expr.struct.getitem(expr.__expr__, field))
|
16
|
+
|
17
|
+
|
18
|
+
def pack(fields: dict[str, ExprLike]) -> Expr:
|
19
|
+
"""Assemble a new struct from the given named fields.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
fields: A dictionary of field names to expressions. The field names will be used as the struct field names.
|
23
|
+
"""
|
24
|
+
from spiral import expressions as se
|
25
|
+
|
26
|
+
return Expr(_lib.spql.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()]))
|
27
|
+
|
28
|
+
|
29
|
+
def merge(*structs: "ExprLike") -> Expr:
|
30
|
+
"""Merge fields from the given structs into a single struct.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
*structs: Each expression must evaluate to a struct.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
A single struct containing all the fields from the input structs.
|
37
|
+
If a field is present in multiple structs, the value from the last struct is used.
|
38
|
+
"""
|
39
|
+
from spiral import expressions as se
|
40
|
+
|
41
|
+
if len(structs) == 1:
|
42
|
+
return se.lift(structs[0])
|
43
|
+
return Expr(_lib.spql.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
|
44
|
+
|
45
|
+
|
46
|
+
def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
|
47
|
+
"""Select fields from a struct.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
expr: The struct-like expression to select fields from.
|
51
|
+
names: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
52
|
+
exclude: List of field names to exclude from result. Exactly one of `names` or `exclude` must be provided.
|
53
|
+
"""
|
54
|
+
from spiral import expressions as se
|
55
|
+
|
56
|
+
expr = se.lift(expr)
|
57
|
+
return Expr(_lib.spql.expr.struct.select(expr.__expr__, names, exclude))
|