pyspiral 0.2.5__pp310-pypy310_pp73-macosx_10_13_x86_64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyspiral-0.2.5.dist-info/METADATA +48 -0
- pyspiral-0.2.5.dist-info/RECORD +81 -0
- pyspiral-0.2.5.dist-info/WHEEL +4 -0
- pyspiral-0.2.5.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.pypy310-pp73-darwin.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
import datetime
|
2
|
+
from typing import TypeAlias
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral import _lib
|
7
|
+
|
8
|
+
NativeExpr: TypeAlias = _lib.spql.expr.Expr
|
9
|
+
|
10
|
+
|
11
|
+
class Expr:
|
12
|
+
"""Base class for Spiral expressions. All expressions support comparison and basic arithmetic operations."""
|
13
|
+
|
14
|
+
def __init__(self, native: NativeExpr) -> None:
|
15
|
+
if not isinstance(native, NativeExpr):
|
16
|
+
raise TypeError(f"Expected a native expression, got {type(native)}")
|
17
|
+
self._native = native
|
18
|
+
|
19
|
+
@property
|
20
|
+
def __expr__(self) -> NativeExpr:
|
21
|
+
return self._native
|
22
|
+
|
23
|
+
def __str__(self):
|
24
|
+
return str(self.__expr__)
|
25
|
+
|
26
|
+
def __repr__(self):
|
27
|
+
return repr(self.__expr__)
|
28
|
+
|
29
|
+
def __getitem__(self, item: str | int) -> "Expr":
|
30
|
+
"""
|
31
|
+
Get an item from a struct or list.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
item: The key or index to get.
|
35
|
+
If item is a string, it is assumed to be a field in a struct. Dot-separated string is supported
|
36
|
+
to access nested fields. If item is an integer, it is assumed to be an index in a list.
|
37
|
+
"""
|
38
|
+
from spiral import expressions as se
|
39
|
+
|
40
|
+
expr = self
|
41
|
+
|
42
|
+
if isinstance(item, int):
|
43
|
+
# Assume list and get an element.
|
44
|
+
expr = se.list_.element_at(expr, item)
|
45
|
+
else:
|
46
|
+
# Walk into the struct.
|
47
|
+
for part in item.split("."):
|
48
|
+
expr = se.getitem(expr, part)
|
49
|
+
|
50
|
+
return expr
|
51
|
+
|
52
|
+
def __eq__(self, other: "ExprLike") -> "Expr":
|
53
|
+
return self._binary("eq", other)
|
54
|
+
|
55
|
+
def __ne__(self, other: "ExprLike") -> "Expr":
|
56
|
+
return self._binary("neq", other)
|
57
|
+
|
58
|
+
def __lt__(self, other: "ExprLike") -> "Expr":
|
59
|
+
return self._binary("lt", other)
|
60
|
+
|
61
|
+
def __le__(self, other: "ExprLike") -> "Expr":
|
62
|
+
return self._binary("lte", other)
|
63
|
+
|
64
|
+
def __gt__(self, other: "ExprLike") -> "Expr":
|
65
|
+
return self._binary("gt", other)
|
66
|
+
|
67
|
+
def __ge__(self, other: "ExprLike") -> "Expr":
|
68
|
+
return self._binary("gte", other)
|
69
|
+
|
70
|
+
def __and__(self, other: "ExprLike") -> "Expr":
|
71
|
+
return self._binary("and", other)
|
72
|
+
|
73
|
+
def __or__(self, other: "ExprLike") -> "Expr":
|
74
|
+
return self._binary("or", other)
|
75
|
+
|
76
|
+
def __xor__(self, other: "ExprLike") -> "Expr":
|
77
|
+
raise NotImplementedError
|
78
|
+
|
79
|
+
def __add__(self, other: "ExprLike") -> "Expr":
|
80
|
+
return self._binary("add", other)
|
81
|
+
|
82
|
+
def __sub__(self, other: "ExprLike") -> "Expr":
|
83
|
+
return self._binary("sub", other)
|
84
|
+
|
85
|
+
def __mul__(self, other: "ExprLike") -> "Expr":
|
86
|
+
return self._binary("mul", other)
|
87
|
+
|
88
|
+
def __truediv__(self, other: "ExprLike") -> "Expr":
|
89
|
+
return self._binary("div", other)
|
90
|
+
|
91
|
+
def __mod__(self, other: "ExprLike") -> "Expr":
|
92
|
+
return self._binary("mod", other)
|
93
|
+
|
94
|
+
def __neg__(self):
|
95
|
+
return Expr(_lib.spql.expr.unary("neg", self.__expr__))
|
96
|
+
|
97
|
+
def in_(self, other: "ExprLike") -> "Expr":
|
98
|
+
from spiral import expressions as se
|
99
|
+
|
100
|
+
other = se.lift(other)
|
101
|
+
return Expr(_lib.spql.expr.list.contains(other.__expr__, self.__expr__))
|
102
|
+
|
103
|
+
def contains(self, other: "ExprLike") -> "Expr":
|
104
|
+
from spiral import expressions as se
|
105
|
+
|
106
|
+
return se.lift(other).in_(self)
|
107
|
+
|
108
|
+
def cast(self, dtype: pa.DataType) -> "Expr":
|
109
|
+
"""Cast the expression result to a different data type."""
|
110
|
+
return Expr(_lib.spql.expr.cast(self.__expr__, dtype))
|
111
|
+
|
112
|
+
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
113
|
+
"""Select fields from a struct-like expression.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
*paths: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
117
|
+
exclude: List of field names to exclude from result.
|
118
|
+
"""
|
119
|
+
from spiral import expressions as se
|
120
|
+
|
121
|
+
# If any of the paths contain nested fields, then we re-pack nested select statements.
|
122
|
+
if any("." in p for p in paths):
|
123
|
+
fields = {}
|
124
|
+
for p in paths:
|
125
|
+
if "." in p:
|
126
|
+
parent, child = p.split(".", 1)
|
127
|
+
fields[parent] = self[parent].select(child)
|
128
|
+
else:
|
129
|
+
fields[p] = self[p]
|
130
|
+
packed = se.pack(fields)
|
131
|
+
if exclude:
|
132
|
+
packed = packed.select(exclude=exclude)
|
133
|
+
return packed
|
134
|
+
|
135
|
+
if not paths:
|
136
|
+
return self
|
137
|
+
|
138
|
+
return se.select(self, names=list(paths), exclude=exclude)
|
139
|
+
|
140
|
+
def _binary(self, op: str, rhs: "ExprLike") -> "Expr":
|
141
|
+
"""Create a comparison expression."""
|
142
|
+
from spiral import expressions as se
|
143
|
+
|
144
|
+
rhs = se.lift(rhs)
|
145
|
+
return Expr(_lib.spql.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
|
+
|
147
|
+
|
148
|
+
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
149
|
+
ExprLike: TypeAlias = Expr | dict | ScalarLike
|
@@ -0,0 +1,86 @@
|
|
1
|
+
import hishel
|
2
|
+
import httpx
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
from spiral.expressions.base import Expr, ExprLike
|
6
|
+
from spiral.expressions.struct import pack
|
7
|
+
from spiral.expressions.udf import UDF
|
8
|
+
from spiral.settings import APP_DIR
|
9
|
+
|
10
|
+
|
11
|
+
def get(url: ExprLike, headers: ExprLike = None, force_cache: bool = False) -> Expr:
|
12
|
+
"""Submit a GET request to either a scalar of vector of URLs."""
|
13
|
+
to_pack = {"url": url}
|
14
|
+
if headers is not None:
|
15
|
+
to_pack["headers"] = headers
|
16
|
+
return HttpGet(force_cache)(pack(to_pack))
|
17
|
+
|
18
|
+
|
19
|
+
class HttpGet(UDF):
|
20
|
+
RES_DTYPE: pa.DataType = pa.struct(
|
21
|
+
[
|
22
|
+
pa.field("bytes", pa.large_binary()),
|
23
|
+
pa.field("status", pa.int32()),
|
24
|
+
pa.field("headers", pa.map_(pa.string(), pa.string())),
|
25
|
+
]
|
26
|
+
)
|
27
|
+
|
28
|
+
def __init__(self, force_cache: bool = False):
|
29
|
+
super().__init__("http.get")
|
30
|
+
self._force_cache = force_cache
|
31
|
+
|
32
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
33
|
+
return HttpGet.RES_DTYPE
|
34
|
+
|
35
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
36
|
+
if len(input_args) != 1:
|
37
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
38
|
+
result = _http_request(input_args[0], self._force_cache)
|
39
|
+
if isinstance(result, pa.ChunkedArray):
|
40
|
+
result = result.combine_chunks()
|
41
|
+
return result
|
42
|
+
|
43
|
+
|
44
|
+
def _http_request(arg: pa.Array, force_cache: bool) -> pa.Array:
|
45
|
+
client = _HttpClient()
|
46
|
+
|
47
|
+
if isinstance(arg, pa.StructArray):
|
48
|
+
# We assume a vector of requests, but with potentially many arguments
|
49
|
+
return pa.array(
|
50
|
+
[
|
51
|
+
_response_dict(
|
52
|
+
client.request(
|
53
|
+
req.get("method", "GET").upper(),
|
54
|
+
req["url"],
|
55
|
+
headers=req.get("headers", {}),
|
56
|
+
extensions={"force_cache": force_cache},
|
57
|
+
)
|
58
|
+
)
|
59
|
+
for req in arg.to_pylist()
|
60
|
+
],
|
61
|
+
type=HttpGet.RES_DTYPE,
|
62
|
+
)
|
63
|
+
|
64
|
+
raise TypeError(f"Unsupported argument: {arg} ({type(arg)})")
|
65
|
+
|
66
|
+
|
67
|
+
def _response_dict(response: httpx.Response) -> dict:
|
68
|
+
if response.status_code != 200:
|
69
|
+
raise ValueError(f"Request failed with status {response.status_code}")
|
70
|
+
return {
|
71
|
+
"bytes": response.read(),
|
72
|
+
"status": response.status_code,
|
73
|
+
"headers": dict(response.headers),
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
class _HttpClient(hishel.CacheClient):
|
78
|
+
_instance: "_HttpClient" = None
|
79
|
+
|
80
|
+
def __new__(cls, *args, **kwargs):
|
81
|
+
if not cls._instance:
|
82
|
+
cls._instance = super().__new__(cls)
|
83
|
+
return cls._instance
|
84
|
+
|
85
|
+
def __init__(self):
|
86
|
+
super().__init__(storage=hishel.FileStorage(base_path=APP_DIR / "http.cache", ttl=3600))
|
spiral/expressions/io.py
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
import tarfile
|
2
|
+
from io import BytesIO
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral.expressions.base import Expr, ExprLike
|
7
|
+
from spiral.expressions.struct import pack
|
8
|
+
from spiral.expressions.udf import UDF
|
9
|
+
|
10
|
+
|
11
|
+
def read_file(path: ExprLike) -> Expr:
|
12
|
+
"""
|
13
|
+
Read file path(s) from disk into a struct with a single field "bytes" containing the file contents.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
path: Expression evaluating to an array of strings representing local disk paths.
|
17
|
+
"""
|
18
|
+
to_pack = {"path": path}
|
19
|
+
return FileRead()(pack(to_pack))
|
20
|
+
|
21
|
+
|
22
|
+
class FileRead(UDF):
|
23
|
+
RES_DTYPE: pa.DataType = pa.struct(
|
24
|
+
[
|
25
|
+
pa.field("bytes", pa.large_binary()),
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
super().__init__("file.read")
|
31
|
+
|
32
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
33
|
+
return FileRead.RES_DTYPE
|
34
|
+
|
35
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
36
|
+
if len(input_args) != 1:
|
37
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
38
|
+
arg = input_args[0]
|
39
|
+
|
40
|
+
res = []
|
41
|
+
for req in arg:
|
42
|
+
with open(req["path"].as_py(), "rb") as f:
|
43
|
+
res.append({"bytes": f.read()})
|
44
|
+
|
45
|
+
return pa.array(res, type=FileRead.RES_DTYPE)
|
46
|
+
|
47
|
+
|
48
|
+
def read_tar(path: ExprLike = None, bytes_: ExprLike = None) -> "Expr":
|
49
|
+
# Untar a vector of paths / byte arrays representing tarballs.
|
50
|
+
if path is None and bytes_ is None:
|
51
|
+
raise ValueError("Expected either path or bytes_ to be provided")
|
52
|
+
to_pack = {}
|
53
|
+
if path is not None:
|
54
|
+
to_pack["path"] = path
|
55
|
+
if bytes_ is not None:
|
56
|
+
to_pack["bytes"] = bytes_
|
57
|
+
return TarRead()(pack(to_pack))
|
58
|
+
|
59
|
+
|
60
|
+
class TarRead(UDF):
|
61
|
+
RES_DTYPE = pa.list_(
|
62
|
+
pa.struct(
|
63
|
+
[
|
64
|
+
pa.field("name", pa.string()),
|
65
|
+
pa.field("bytes", pa.large_binary()),
|
66
|
+
]
|
67
|
+
)
|
68
|
+
)
|
69
|
+
|
70
|
+
def __init__(self):
|
71
|
+
super().__init__("tar.read")
|
72
|
+
|
73
|
+
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
74
|
+
return TarRead.RES_DTYPE
|
75
|
+
|
76
|
+
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
77
|
+
if len(input_args) != 1:
|
78
|
+
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
79
|
+
arg = input_args[0]
|
80
|
+
|
81
|
+
res = []
|
82
|
+
for req in arg:
|
83
|
+
if "path" in req:
|
84
|
+
kwargs = {"name": req["path"].as_py()}
|
85
|
+
elif "bytes" in req:
|
86
|
+
kwargs = {"fileobj": BytesIO(req["bytes"].as_py())}
|
87
|
+
else:
|
88
|
+
raise ValueError("Expected path or bytes_ to be provided")
|
89
|
+
|
90
|
+
files = []
|
91
|
+
with tarfile.open(**kwargs) as f:
|
92
|
+
for m in f.getmembers():
|
93
|
+
m: tarfile.TarInfo
|
94
|
+
if m.type == tarfile.DIRTYPE:
|
95
|
+
continue
|
96
|
+
# TODO(ngates): skip other types too maybe? Why are we even skipping directories?
|
97
|
+
files.append({"name": m.name, "bytes": f.extractfile(m).read()})
|
98
|
+
res.append(files)
|
99
|
+
|
100
|
+
return pa.array(res, type=TarRead.RES_DTYPE)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def in_(expr: ExprLike, values: ExprLike) -> Expr:
|
5
|
+
"""Check if a value is in a list.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The value to check.
|
9
|
+
values: The list array expression to check against.
|
10
|
+
"""
|
11
|
+
# `se.list.in_(Array[2, 4], Array[[1, 2], [1, 2]]) -> Array[True, False]`
|
12
|
+
from spiral.expressions import lift
|
13
|
+
|
14
|
+
expr = lift(expr)
|
15
|
+
return expr.in_(values)
|
16
|
+
|
17
|
+
|
18
|
+
def element_at(expr: ExprLike, index: ExprLike) -> Expr:
|
19
|
+
"""Get the element at the given index.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
expr: The list array expression.
|
23
|
+
index: The index to get.
|
24
|
+
"""
|
25
|
+
# e.g. `se.list.element_at([1, 2, 3], 1) -> 2`
|
26
|
+
...
|
27
|
+
from spiral import _lib
|
28
|
+
from spiral.expressions import lift
|
29
|
+
|
30
|
+
expr = lift(expr)
|
31
|
+
index = lift(index)
|
32
|
+
return Expr(_lib.spql.expr.list.element_at(expr.__expr__, index.__expr__))
|
33
|
+
|
34
|
+
|
35
|
+
def of(*expr: ExprLike) -> Expr:
|
36
|
+
# Creates an array or scalar list from a series of expressions, all values must be of the same type.
|
37
|
+
# The expressions must all also have the same length (1 for scalars).
|
38
|
+
#
|
39
|
+
# e.g. `se.list.of(1+3, 2, 3) -> [4, 2, 3]`
|
40
|
+
...
|
41
|
+
|
42
|
+
|
43
|
+
def zip(*lists: ExprLike) -> Expr:
|
44
|
+
# Merge the given lists, with duplicates.
|
45
|
+
#
|
46
|
+
# e.g. `se.list.merge([1, 2], [3, 4]) -> [(1, 2), (3, 4)]`
|
47
|
+
...
|
48
|
+
|
49
|
+
|
50
|
+
def concat(*lists: ExprLike) -> Expr:
|
51
|
+
# Concatenate the given lists. The types of all the lists must be the same.
|
52
|
+
#
|
53
|
+
# e.g. `se.list.concat([1, 2], [3, 4]) -> [1, 2, 3, 4]`
|
54
|
+
...
|
55
|
+
|
56
|
+
|
57
|
+
def slice_(expr: ExprLike, start: int | None = None, stop: int | None = None) -> Expr:
|
58
|
+
# Slice a list.
|
59
|
+
#
|
60
|
+
# e.g. `se.list.slice_([0, 1, 2], slice(0,2)) -> [0, 1]`
|
61
|
+
...
|
62
|
+
|
63
|
+
|
64
|
+
def length(expr: ExprLike) -> Expr:
|
65
|
+
# Get the length of a list.
|
66
|
+
#
|
67
|
+
# e.g. `se.list.length([1, 2, 3]) -> 3`
|
68
|
+
...
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from spiral.expressions.base import Expr, ExprLike
|
4
|
+
|
5
|
+
if TYPE_CHECKING:
|
6
|
+
from spiral import Table
|
7
|
+
|
8
|
+
|
9
|
+
def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
10
|
+
"""Store binary values as references. This expression can only be used on write.
|
11
|
+
|
12
|
+
It is often better to store large cell values, such as bytes columns, that aren't used in filter expressions as
|
13
|
+
references. This enables more efficient scan pruning. Many of the Spiral's cell pushdown expressions work
|
14
|
+
over references.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
expr: The expression to store as a reference.
|
18
|
+
field: If the expr evaluates into struct, the field name of that struct that should be referenced.
|
19
|
+
If `None`, the expr must evaluate into a type that supports referencing.
|
20
|
+
"""
|
21
|
+
from spiral import _lib
|
22
|
+
from spiral.expressions import lift
|
23
|
+
|
24
|
+
expr = lift(expr)
|
25
|
+
return Expr(_lib.spql.expr.ref(expr.__expr__, field))
|
26
|
+
|
27
|
+
|
28
|
+
def deref(expr: ExprLike, field: str | None = None, table: "Table" = None) -> Expr:
|
29
|
+
"""De-reference referenced values.
|
30
|
+
|
31
|
+
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
32
|
+
column back into their original form, e.g. binary.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
expr: The expression to de-reference.
|
36
|
+
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
37
|
+
If `None`, the expr must evaluate into a reference type.
|
38
|
+
table (optional): The table to de-reference from, if not available in input expression.
|
39
|
+
"""
|
40
|
+
from spiral import _lib
|
41
|
+
from spiral.expressions import lift
|
42
|
+
|
43
|
+
expr = lift(expr)
|
44
|
+
return Expr(_lib.spql.expr.deref(expr.__expr__, field, table._table if table is not None else None))
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
import pyarrow.compute as pc
|
3
|
+
import re2 as re
|
4
|
+
|
5
|
+
from spiral import _lib
|
6
|
+
from spiral.expressions.base import Expr, ExprLike
|
7
|
+
|
8
|
+
# TODO(ngates): we can add a symmetric "ascii" expression namespace in the future if
|
9
|
+
# the performance is required.
|
10
|
+
|
11
|
+
|
12
|
+
def substr(expr: ExprLike = None, *, begin: int = 0, end: int | None = None) -> Expr:
|
13
|
+
"""Slice a string.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
expr: The string expression to slice.
|
17
|
+
begin: The starting index of the slice.
|
18
|
+
end: The ending index of the slice.
|
19
|
+
"""
|
20
|
+
from spiral import expressions as se
|
21
|
+
|
22
|
+
expr = se.lift(expr)
|
23
|
+
return Expr(_lib.spql.str.substr(expr.__expr__, begin=begin, end=end))
|
24
|
+
|
25
|
+
|
26
|
+
def extract_regex(pattern: str, *, strings: ExprLike) -> Expr:
|
27
|
+
# Extract the first occurrence of a regex pattern from a string.
|
28
|
+
raise NotImplementedError
|
29
|
+
|
30
|
+
|
31
|
+
def _extract_regex(arg: pa.Array | pa.Scalar, pattern: str) -> pa.Array | pa.Scalar:
|
32
|
+
# Compute the return type based on the regex groups
|
33
|
+
m = re.compile(pattern)
|
34
|
+
dtype = pa.struct([pa.field(k, type=pa.string()) for k in m.groupindex.keys()])
|
35
|
+
|
36
|
+
if pa.types.is_string(arg.type):
|
37
|
+
return pc.extract_regex(arg, pattern=pattern).cast(dtype)
|
38
|
+
|
39
|
+
raise TypeError("Input argument does not have the expected type")
|
@@ -0,0 +1,57 @@
|
|
1
|
+
from spiral import _lib
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
3
|
+
|
4
|
+
|
5
|
+
def getitem(expr: ExprLike, field: str) -> Expr:
|
6
|
+
"""Get field from a struct.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
expr: The struct expression to get the field from.
|
10
|
+
field: The field to get. Dot-separated string is supported to access nested fields.
|
11
|
+
"""
|
12
|
+
from spiral import expressions as se
|
13
|
+
|
14
|
+
expr = se.lift(expr)
|
15
|
+
return Expr(_lib.spql.expr.struct.getitem(expr.__expr__, field))
|
16
|
+
|
17
|
+
|
18
|
+
def pack(fields: dict[str, ExprLike]) -> Expr:
|
19
|
+
"""Assemble a new struct from the given named fields.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
fields: A dictionary of field names to expressions. The field names will be used as the struct field names.
|
23
|
+
"""
|
24
|
+
from spiral import expressions as se
|
25
|
+
|
26
|
+
return Expr(_lib.spql.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()]))
|
27
|
+
|
28
|
+
|
29
|
+
def merge(*structs: "ExprLike") -> Expr:
|
30
|
+
"""Merge fields from the given structs into a single struct.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
*structs: Each expression must evaluate to a struct.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
A single struct containing all the fields from the input structs.
|
37
|
+
If a field is present in multiple structs, the value from the last struct is used.
|
38
|
+
"""
|
39
|
+
from spiral import expressions as se
|
40
|
+
|
41
|
+
if len(structs) == 1:
|
42
|
+
return se.lift(structs[0])
|
43
|
+
return Expr(_lib.spql.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
|
44
|
+
|
45
|
+
|
46
|
+
def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
|
47
|
+
"""Select fields from a struct.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
expr: The struct-like expression to select fields from.
|
51
|
+
names: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
52
|
+
exclude: List of field names to exclude from result. Exactly one of `names` or `exclude` must be provided.
|
53
|
+
"""
|
54
|
+
from spiral import expressions as se
|
55
|
+
|
56
|
+
expr = se.lift(expr)
|
57
|
+
return Expr(_lib.spql.expr.struct.select(expr.__expr__, names, exclude))
|