pyspiral 0.7.18__cp312-abi3-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.7.18.dist-info/METADATA +52 -0
- pyspiral-0.7.18.dist-info/RECORD +110 -0
- pyspiral-0.7.18.dist-info/WHEEL +4 -0
- pyspiral-0.7.18.dist-info/entry_points.txt +3 -0
- spiral/__init__.py +55 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +411 -0
- spiral/api/__init__.py +78 -0
- spiral/api/admin.py +15 -0
- spiral/api/client.py +164 -0
- spiral/api/filesystems.py +134 -0
- spiral/api/key_space_indexes.py +23 -0
- spiral/api/organizations.py +77 -0
- spiral/api/projects.py +219 -0
- spiral/api/telemetry.py +19 -0
- spiral/api/text_indexes.py +56 -0
- spiral/api/types.py +23 -0
- spiral/api/workers.py +40 -0
- spiral/api/workloads.py +52 -0
- spiral/arrow_.py +216 -0
- spiral/cli/__init__.py +88 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +14 -0
- spiral/cli/app.py +108 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +76 -0
- spiral/cli/iceberg.py +97 -0
- spiral/cli/key_spaces.py +103 -0
- spiral/cli/login.py +25 -0
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +53 -0
- spiral/cli/projects.py +147 -0
- spiral/cli/state.py +7 -0
- spiral/cli/tables.py +197 -0
- spiral/cli/telemetry.py +17 -0
- spiral/cli/text.py +115 -0
- spiral/cli/types.py +50 -0
- spiral/cli/workloads.py +58 -0
- spiral/client.py +256 -0
- spiral/core/__init__.pyi +0 -0
- spiral/core/_tools/__init__.pyi +5 -0
- spiral/core/authn/__init__.pyi +21 -0
- spiral/core/client/__init__.pyi +285 -0
- spiral/core/config/__init__.pyi +35 -0
- spiral/core/expr/__init__.pyi +15 -0
- spiral/core/expr/images/__init__.pyi +3 -0
- spiral/core/expr/list_/__init__.pyi +4 -0
- spiral/core/expr/refs/__init__.pyi +4 -0
- spiral/core/expr/str_/__init__.pyi +3 -0
- spiral/core/expr/struct_/__init__.pyi +6 -0
- spiral/core/expr/text/__init__.pyi +5 -0
- spiral/core/expr/udf/__init__.pyi +14 -0
- spiral/core/expr/video/__init__.pyi +3 -0
- spiral/core/table/__init__.pyi +141 -0
- spiral/core/table/manifests/__init__.pyi +35 -0
- spiral/core/table/metastore/__init__.pyi +58 -0
- spiral/core/table/spec/__init__.pyi +215 -0
- spiral/dataloader.py +299 -0
- spiral/dataset.py +264 -0
- spiral/datetime_.py +27 -0
- spiral/debug/__init__.py +0 -0
- spiral/debug/manifests.py +87 -0
- spiral/debug/metrics.py +56 -0
- spiral/debug/scan.py +266 -0
- spiral/enrichment.py +306 -0
- spiral/expressions/__init__.py +274 -0
- spiral/expressions/base.py +167 -0
- spiral/expressions/file.py +17 -0
- spiral/expressions/http.py +17 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/s3.py +16 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +59 -0
- spiral/expressions/text.py +62 -0
- spiral/expressions/tiff.py +222 -0
- spiral/expressions/udf.py +60 -0
- spiral/grpc_.py +32 -0
- spiral/iceberg.py +31 -0
- spiral/iterable_dataset.py +106 -0
- spiral/key_space_index.py +44 -0
- spiral/project.py +227 -0
- spiral/protogen/_/__init__.py +0 -0
- spiral/protogen/_/arrow/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
- spiral/protogen/_/google/__init__.py +0 -0
- spiral/protogen/_/google/protobuf/__init__.py +2310 -0
- spiral/protogen/_/message_pool.py +3 -0
- spiral/protogen/_/py.typed +0 -0
- spiral/protogen/_/scandal/__init__.py +190 -0
- spiral/protogen/_/spfs/__init__.py +72 -0
- spiral/protogen/_/spql/__init__.py +61 -0
- spiral/protogen/_/substrait/__init__.py +6196 -0
- spiral/protogen/_/substrait/extensions/__init__.py +169 -0
- spiral/protogen/__init__.py +0 -0
- spiral/protogen/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan.py +363 -0
- spiral/server.py +17 -0
- spiral/settings.py +36 -0
- spiral/snapshot.py +56 -0
- spiral/streaming_/__init__.py +3 -0
- spiral/streaming_/reader.py +133 -0
- spiral/streaming_/stream.py +157 -0
- spiral/substrait_.py +274 -0
- spiral/table.py +224 -0
- spiral/text_index.py +17 -0
- spiral/transaction.py +155 -0
- spiral/types_.py +6 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
import builtins
|
|
2
|
+
import functools
|
|
3
|
+
import operator
|
|
4
|
+
import warnings
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
|
|
9
|
+
from spiral import _lib, arrow_
|
|
10
|
+
|
|
11
|
+
from . import file as file
|
|
12
|
+
from . import http as http
|
|
13
|
+
from . import list_ as list
|
|
14
|
+
from . import s3 as s3
|
|
15
|
+
from . import str_ as str
|
|
16
|
+
from . import struct as struct
|
|
17
|
+
from . import text as text
|
|
18
|
+
from .base import Expr, ExprLike, NativeExpr
|
|
19
|
+
from .udf import UDF
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"Expr",
|
|
23
|
+
"add",
|
|
24
|
+
"and_",
|
|
25
|
+
"divide",
|
|
26
|
+
"eq",
|
|
27
|
+
"getitem",
|
|
28
|
+
"gt",
|
|
29
|
+
"gte",
|
|
30
|
+
"is_not_null",
|
|
31
|
+
"is_null",
|
|
32
|
+
"lift",
|
|
33
|
+
"list",
|
|
34
|
+
"lt",
|
|
35
|
+
"lte",
|
|
36
|
+
"merge",
|
|
37
|
+
"modulo",
|
|
38
|
+
"multiply",
|
|
39
|
+
"negate",
|
|
40
|
+
"neq",
|
|
41
|
+
"not_",
|
|
42
|
+
"or_",
|
|
43
|
+
"pack",
|
|
44
|
+
"aux",
|
|
45
|
+
"scalar",
|
|
46
|
+
"select",
|
|
47
|
+
"str",
|
|
48
|
+
"struct",
|
|
49
|
+
"subtract",
|
|
50
|
+
"xor",
|
|
51
|
+
"text",
|
|
52
|
+
"s3",
|
|
53
|
+
"http",
|
|
54
|
+
"file",
|
|
55
|
+
"UDF",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Inline some of the struct expressions since they're so common
|
|
59
|
+
getitem = struct.getitem
|
|
60
|
+
merge = struct.merge
|
|
61
|
+
pack = struct.pack
|
|
62
|
+
select = struct.select
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def lift(expr: ExprLike) -> Expr:
|
|
66
|
+
# Convert an ExprLike into an Expr.
|
|
67
|
+
|
|
68
|
+
if isinstance(expr, Expr):
|
|
69
|
+
return expr
|
|
70
|
+
if isinstance(expr, NativeExpr):
|
|
71
|
+
return Expr(expr)
|
|
72
|
+
|
|
73
|
+
if isinstance(expr, dict):
|
|
74
|
+
# NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
|
|
75
|
+
# this is in fact a struct scalar, but the user can always create one of those manually.
|
|
76
|
+
|
|
77
|
+
# First we un-nest any dot-separated field names
|
|
78
|
+
expr: dict = arrow_.nest_structs(expr)
|
|
79
|
+
|
|
80
|
+
return pack({k: lift(v) for k, v in expr.items()})
|
|
81
|
+
|
|
82
|
+
if isinstance(expr, builtins.list):
|
|
83
|
+
return lift(pa.array(expr))
|
|
84
|
+
|
|
85
|
+
# Unpack tables and chunked arrays
|
|
86
|
+
if isinstance(expr, pa.Table | pa.RecordBatch):
|
|
87
|
+
expr = expr.to_struct_array()
|
|
88
|
+
if isinstance(expr, pa.ChunkedArray):
|
|
89
|
+
expr = expr.combine_chunks()
|
|
90
|
+
|
|
91
|
+
# If the value is struct-like, we un-nest any dot-separated field names
|
|
92
|
+
if isinstance(expr, pa.StructArray | pa.StructScalar):
|
|
93
|
+
# TODO(marko): Figure out what to do with nullable struct arrays when unpacking them.
|
|
94
|
+
# We need to merge struct validity into the child validity?
|
|
95
|
+
if isinstance(expr, pa.StructArray) and expr.null_count != 0:
|
|
96
|
+
# raise ValueError("lift: cannot lift a struct array with nulls.")
|
|
97
|
+
warnings.warn("found a struct array with nulls", stacklevel=2)
|
|
98
|
+
if isinstance(expr, pa.StructScalar) and not expr.is_valid:
|
|
99
|
+
# raise ValueError("lift: cannot lift a struct scalar with nulls.")
|
|
100
|
+
warnings.warn("found a struct scalar with nulls", stacklevel=2)
|
|
101
|
+
return lift(arrow_.nest_structs(expr))
|
|
102
|
+
|
|
103
|
+
if isinstance(expr, pa.Array):
|
|
104
|
+
return Expr(_lib.expr.array_lit(expr))
|
|
105
|
+
|
|
106
|
+
# Otherwise, assume it's a scalar.
|
|
107
|
+
return scalar(expr)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def evaluate(expr: ExprLike) -> pa.RecordBatchReader:
|
|
111
|
+
# TODO(marko): This implementation is currently minimal and most ExprLike-s fail.
|
|
112
|
+
if isinstance(expr, pa.RecordBatchReader):
|
|
113
|
+
return expr
|
|
114
|
+
if isinstance(expr, pa.Table):
|
|
115
|
+
return expr.to_reader()
|
|
116
|
+
if isinstance(expr, pa.RecordBatch):
|
|
117
|
+
return pa.RecordBatchReader.from_batches(expr.schema, [expr])
|
|
118
|
+
if isinstance(expr, pa.StructArray):
|
|
119
|
+
return pa.Table.from_struct_array(expr).to_reader()
|
|
120
|
+
|
|
121
|
+
if isinstance(expr, pa.ChunkedArray):
|
|
122
|
+
if not pa.types.is_struct(expr.type):
|
|
123
|
+
raise ValueError("Arrow chunked array must be a struct type.")
|
|
124
|
+
|
|
125
|
+
def _iter_batches():
|
|
126
|
+
for chunk in expr.chunks:
|
|
127
|
+
yield pa.RecordBatch.from_struct_array(chunk)
|
|
128
|
+
|
|
129
|
+
return pa.RecordBatchReader.from_batches(pa.schema(expr.type.fields), _iter_batches())
|
|
130
|
+
|
|
131
|
+
if isinstance(expr, pa.Array):
|
|
132
|
+
raise ValueError("Arrow array must be a struct array.")
|
|
133
|
+
|
|
134
|
+
if isinstance(expr, Expr) or isinstance(expr, NativeExpr):
|
|
135
|
+
raise NotImplementedError(
|
|
136
|
+
"Expr evaluation not supported yet. Use Arrow to write instead. Reach out if you require this feature."
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if isinstance(expr, dict):
|
|
140
|
+
# NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
|
|
141
|
+
# this is in fact a struct scalar, but the user can always create one of those manually.
|
|
142
|
+
|
|
143
|
+
# First we un-nest any dot-separated field names
|
|
144
|
+
expr: dict = arrow_.nest_structs(expr)
|
|
145
|
+
return evaluate(arrow_.dict_to_table(expr))
|
|
146
|
+
|
|
147
|
+
if isinstance(expr, builtins.list):
|
|
148
|
+
return evaluate(pa.array(expr))
|
|
149
|
+
|
|
150
|
+
if isinstance(expr, pa.Scalar):
|
|
151
|
+
return evaluate(pa.array([expr]))
|
|
152
|
+
|
|
153
|
+
# Otherwise, try scalar.
|
|
154
|
+
return evaluate(scalar(expr))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def aux(name: builtins.str, dtype: pa.DataType) -> Expr:
|
|
158
|
+
"""Create a variable expression referencing a column in the auxiliary table.
|
|
159
|
+
|
|
160
|
+
Auxiliary table is optionally given to `Scan#to_record_batches` function when reading only specific keys
|
|
161
|
+
or doing cell pushdown.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
name: variable name
|
|
165
|
+
dtype: must match dtype of the column in the auxiliary table.
|
|
166
|
+
"""
|
|
167
|
+
return Expr(_lib.expr.aux(name, dtype))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def scalar(value: Any) -> Expr:
|
|
171
|
+
"""Create a scalar expression."""
|
|
172
|
+
if not isinstance(value, pa.Scalar):
|
|
173
|
+
value = pa.scalar(value)
|
|
174
|
+
# TODO(marko): Use Vortex scalar instead of passing as array.
|
|
175
|
+
return Expr(_lib.expr.scalar(pa.array([value.as_py()], type=value.type)))
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def cast(expr: ExprLike, dtype: pa.DataType) -> Expr:
|
|
179
|
+
"""Cast an expression into another PyArrow DataType."""
|
|
180
|
+
expr = lift(expr)
|
|
181
|
+
return Expr(_lib.expr.cast(expr.__expr__, dtype))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def and_(expr: ExprLike, *exprs: ExprLike) -> Expr:
|
|
185
|
+
"""Create a conjunction of one or more expressions."""
|
|
186
|
+
|
|
187
|
+
return functools.reduce(operator.and_, [lift(e) for e in exprs], lift(expr))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def or_(expr: ExprLike, *exprs: ExprLike) -> Expr:
|
|
191
|
+
"""Create a disjunction of one or more expressions."""
|
|
192
|
+
return functools.reduce(operator.or_, [lift(e) for e in exprs], lift(expr))
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def eq(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
196
|
+
"""Create an equality comparison."""
|
|
197
|
+
return operator.eq(lift(lhs), rhs)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def neq(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
201
|
+
"""Create a not-equal comparison."""
|
|
202
|
+
return operator.ne(lift(lhs), rhs)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def xor(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
206
|
+
"""Create a XOR comparison."""
|
|
207
|
+
return operator.xor(lift(lhs), rhs)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def lt(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
211
|
+
"""Create a less-than comparison."""
|
|
212
|
+
return operator.lt(lift(lhs), rhs)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def lte(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
216
|
+
"""Create a less-than-or-equal comparison."""
|
|
217
|
+
return operator.le(lift(lhs), rhs)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def gt(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
221
|
+
"""Create a greater-than comparison."""
|
|
222
|
+
return operator.gt(lift(lhs), rhs)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def gte(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
226
|
+
"""Create a greater-than-or-equal comparison."""
|
|
227
|
+
return operator.ge(lift(lhs), rhs)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def negate(expr: ExprLike) -> Expr:
|
|
231
|
+
"""Negate the given expression."""
|
|
232
|
+
return operator.neg(lift(expr))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def not_(expr: ExprLike) -> Expr:
|
|
236
|
+
"""Negate the given expression."""
|
|
237
|
+
expr = lift(expr)
|
|
238
|
+
return Expr(_lib.expr.not_(expr.__expr__))
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def is_null(expr: ExprLike) -> Expr:
|
|
242
|
+
"""Check if the given expression is null."""
|
|
243
|
+
expr = lift(expr)
|
|
244
|
+
return Expr(_lib.expr.is_null(expr.__expr__))
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def is_not_null(expr: ExprLike) -> Expr:
|
|
248
|
+
"""Check if the given expression is not null."""
|
|
249
|
+
return not_(is_null(expr))
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def add(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
253
|
+
"""Add two expressions."""
|
|
254
|
+
return operator.add(lift(lhs), rhs)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def subtract(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
258
|
+
"""Subtract two expressions."""
|
|
259
|
+
return operator.sub(lift(lhs), rhs)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def multiply(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
263
|
+
"""Multiply two expressions."""
|
|
264
|
+
return operator.mul(lift(lhs), rhs)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def divide(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
268
|
+
"""Divide two expressions."""
|
|
269
|
+
return operator.truediv(lift(lhs), rhs)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def modulo(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
|
273
|
+
"""Modulo two expressions."""
|
|
274
|
+
return operator.mod(lift(lhs), rhs)
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import TypeAlias, Union
|
|
3
|
+
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
|
|
6
|
+
from spiral import _lib
|
|
7
|
+
|
|
8
|
+
NativeExpr: TypeAlias = _lib.expr.Expr
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Expr:
|
|
12
|
+
"""Base class for Spiral expressions. All expressions support comparison and basic arithmetic operations."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, native: NativeExpr) -> None:
|
|
15
|
+
if not isinstance(native, NativeExpr):
|
|
16
|
+
raise TypeError(f"Expected a native expression, got {type(native)}")
|
|
17
|
+
self._native = native
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def __expr__(self) -> NativeExpr:
|
|
21
|
+
return self._native
|
|
22
|
+
|
|
23
|
+
def __str__(self):
|
|
24
|
+
return str(self.__expr__)
|
|
25
|
+
|
|
26
|
+
def __repr__(self):
|
|
27
|
+
return repr(self.__expr__)
|
|
28
|
+
|
|
29
|
+
def __getitem__(self, item: str | int | list[str]) -> "Expr":
|
|
30
|
+
"""
|
|
31
|
+
Get an item from a struct or list.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
item: The key or index to get.
|
|
35
|
+
If item is a string, it is assumed to be a field in a struct. Dot-separated string is supported
|
|
36
|
+
to access nested fields.
|
|
37
|
+
If item is a list of strings, it is assumed to be a list of fields in a struct.
|
|
38
|
+
If item is an integer, it is assumed to be an index in a list.
|
|
39
|
+
"""
|
|
40
|
+
from spiral import expressions as se
|
|
41
|
+
|
|
42
|
+
expr = self
|
|
43
|
+
|
|
44
|
+
if isinstance(item, int):
|
|
45
|
+
# Assume list and get an element.
|
|
46
|
+
expr = se.list_.element_at(expr, item)
|
|
47
|
+
elif isinstance(item, str):
|
|
48
|
+
# Walk into the struct.
|
|
49
|
+
for part in item.split("."):
|
|
50
|
+
expr = se.getitem(expr, part)
|
|
51
|
+
elif isinstance(item, list) and all(isinstance(i, str) for i in item):
|
|
52
|
+
expr = se.pack({k: expr[k] for k in item})
|
|
53
|
+
else:
|
|
54
|
+
raise TypeError(f"Invalid item type: {type(item)}")
|
|
55
|
+
|
|
56
|
+
return expr
|
|
57
|
+
|
|
58
|
+
def __eq__(self, other: "ExprLike") -> "Expr":
|
|
59
|
+
return self._binary("eq", other)
|
|
60
|
+
|
|
61
|
+
def __ne__(self, other: "ExprLike") -> "Expr":
|
|
62
|
+
return self._binary("neq", other)
|
|
63
|
+
|
|
64
|
+
def __lt__(self, other: "ExprLike") -> "Expr":
|
|
65
|
+
return self._binary("lt", other)
|
|
66
|
+
|
|
67
|
+
def __le__(self, other: "ExprLike") -> "Expr":
|
|
68
|
+
return self._binary("lte", other)
|
|
69
|
+
|
|
70
|
+
def __gt__(self, other: "ExprLike") -> "Expr":
|
|
71
|
+
return self._binary("gt", other)
|
|
72
|
+
|
|
73
|
+
def __ge__(self, other: "ExprLike") -> "Expr":
|
|
74
|
+
return self._binary("gte", other)
|
|
75
|
+
|
|
76
|
+
def __and__(self, other: "ExprLike") -> "Expr":
|
|
77
|
+
return self._binary("and", other)
|
|
78
|
+
|
|
79
|
+
def __or__(self, other: "ExprLike") -> "Expr":
|
|
80
|
+
return self._binary("or", other)
|
|
81
|
+
|
|
82
|
+
def __xor__(self, other: "ExprLike") -> "Expr":
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
def __add__(self, other: "ExprLike") -> "Expr":
|
|
86
|
+
return self._binary("add", other)
|
|
87
|
+
|
|
88
|
+
def __sub__(self, other: "ExprLike") -> "Expr":
|
|
89
|
+
return self._binary("sub", other)
|
|
90
|
+
|
|
91
|
+
def __mul__(self, other: "ExprLike") -> "Expr":
|
|
92
|
+
return self._binary("mul", other)
|
|
93
|
+
|
|
94
|
+
def __truediv__(self, other: "ExprLike") -> "Expr":
|
|
95
|
+
return self._binary("div", other)
|
|
96
|
+
|
|
97
|
+
def __mod__(self, other: "ExprLike") -> "Expr":
|
|
98
|
+
return self._binary("mod", other)
|
|
99
|
+
|
|
100
|
+
def __neg__(self):
|
|
101
|
+
return Expr(_lib.expr.unary("neg", self.__expr__))
|
|
102
|
+
|
|
103
|
+
def in_(self, other: "ExprLike") -> "Expr":
|
|
104
|
+
from spiral import expressions as se
|
|
105
|
+
|
|
106
|
+
other = se.lift(other)
|
|
107
|
+
return Expr(_lib.expr.list.contains(other.__expr__, self.__expr__))
|
|
108
|
+
|
|
109
|
+
def contains(self, other: "ExprLike") -> "Expr":
|
|
110
|
+
from spiral import expressions as se
|
|
111
|
+
|
|
112
|
+
return se.lift(other).in_(self)
|
|
113
|
+
|
|
114
|
+
def cast(self, dtype: pa.DataType) -> "Expr":
|
|
115
|
+
"""Cast the expression result to a different data type."""
|
|
116
|
+
return Expr(_lib.expr.cast(self.__expr__, dtype))
|
|
117
|
+
|
|
118
|
+
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
|
119
|
+
"""Select fields from a struct-like expression.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
*paths: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
|
123
|
+
exclude: List of field names to exclude from result.
|
|
124
|
+
"""
|
|
125
|
+
from spiral import expressions as se
|
|
126
|
+
|
|
127
|
+
# If any of the paths contain nested fields, then we re-pack nested select statements.
|
|
128
|
+
if any("." in p for p in paths):
|
|
129
|
+
fields = {}
|
|
130
|
+
for p in paths:
|
|
131
|
+
if "." in p:
|
|
132
|
+
parent, child = p.split(".", 1)
|
|
133
|
+
fields[parent] = self[parent].select(child)
|
|
134
|
+
else:
|
|
135
|
+
fields[p] = self[p]
|
|
136
|
+
packed = se.pack(fields)
|
|
137
|
+
if exclude:
|
|
138
|
+
packed = packed.select(exclude=exclude)
|
|
139
|
+
return packed
|
|
140
|
+
|
|
141
|
+
if paths:
|
|
142
|
+
if exclude:
|
|
143
|
+
raise ValueError("Cannot specify both selection and exclusion.")
|
|
144
|
+
return se.select(self, names=list(paths))
|
|
145
|
+
|
|
146
|
+
if exclude:
|
|
147
|
+
return se.select(self, exclude=exclude)
|
|
148
|
+
|
|
149
|
+
return self
|
|
150
|
+
|
|
151
|
+
def _binary(self, op: str, rhs: "ExprLike") -> "Expr":
|
|
152
|
+
"""Create a comparison expression."""
|
|
153
|
+
from spiral import expressions as se
|
|
154
|
+
|
|
155
|
+
rhs = se.lift(rhs)
|
|
156
|
+
return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
ScalarLike: TypeAlias = bool | int | float | str | list["ScalarLike"] | datetime.datetime | None
|
|
160
|
+
ArrowLike: TypeAlias = Union[
|
|
161
|
+
pa.RecordBatch,
|
|
162
|
+
"pa.Array[pa.Scalar[pa.DataType]]",
|
|
163
|
+
"pa.ChunkedArray[pa.Scalar[pa.DataType]]",
|
|
164
|
+
"pa.Scalar[pa.DataType]",
|
|
165
|
+
pa.Table,
|
|
166
|
+
]
|
|
167
|
+
ExprLike: TypeAlias = Expr | dict[str, "ExprLike"] | list["ExprLike"] | ArrowLike | ScalarLike
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get(expr: ExprLike, abort_on_error: bool = False) -> Expr:
|
|
6
|
+
"""Read data from the local filesystem by the file:// URL.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: URLs of the data that needs to be read.
|
|
10
|
+
abort_on_error: Should the expression abort on errors or just collect them.
|
|
11
|
+
"""
|
|
12
|
+
from spiral import expressions as se
|
|
13
|
+
|
|
14
|
+
expr = se.lift(expr)
|
|
15
|
+
|
|
16
|
+
# This just works :)
|
|
17
|
+
return Expr(_lib.expr.s3.get(expr.__expr__, abort_on_error))
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get(expr: ExprLike, abort_on_error: bool = False) -> Expr:
|
|
6
|
+
"""Read data from the URL.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: URLs of the data that needs to be read.
|
|
10
|
+
abort_on_error: Should the expression abort on errors or just collect them.
|
|
11
|
+
"""
|
|
12
|
+
from spiral import expressions as se
|
|
13
|
+
|
|
14
|
+
expr = se.lift(expr)
|
|
15
|
+
|
|
16
|
+
# This just works :)
|
|
17
|
+
return Expr(_lib.expr.s3.get(expr.__expr__, abort_on_error))
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def in_(expr: ExprLike, values: ExprLike) -> Expr:
|
|
5
|
+
"""Check if a value is in a list.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
expr: The value to check.
|
|
9
|
+
values: The list array expression to check against.
|
|
10
|
+
"""
|
|
11
|
+
# `se.list.in_(Array[2, 4], Array[[1, 2], [1, 2]]) -> Array[True, False]`
|
|
12
|
+
from spiral.expressions import lift
|
|
13
|
+
|
|
14
|
+
expr = lift(expr)
|
|
15
|
+
return expr.in_(values)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def element_at(expr: ExprLike, index: ExprLike) -> Expr:
|
|
19
|
+
"""Get the element at the given index.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
expr: The list array expression.
|
|
23
|
+
index: The index to get.
|
|
24
|
+
"""
|
|
25
|
+
# e.g. `se.list.element_at([1, 2, 3], 1) -> 2`
|
|
26
|
+
...
|
|
27
|
+
from spiral import _lib
|
|
28
|
+
from spiral.expressions import lift
|
|
29
|
+
|
|
30
|
+
expr = lift(expr)
|
|
31
|
+
index = lift(index)
|
|
32
|
+
return Expr(_lib.expr.list.element_at(expr.__expr__, index.__expr__))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def of(*expr: ExprLike) -> Expr:
|
|
36
|
+
# Creates an array or scalar list from a series of expressions, all values must be of the same type.
|
|
37
|
+
# The expressions must all also have the same length (1 for scalars).
|
|
38
|
+
#
|
|
39
|
+
# e.g. `se.list.of(1+3, 2, 3) -> [4, 2, 3]`
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def zip(*lists: ExprLike) -> Expr:
|
|
44
|
+
# Merge the given lists, with duplicates.
|
|
45
|
+
#
|
|
46
|
+
# e.g. `se.list.merge([1, 2], [3, 4]) -> [(1, 2), (3, 4)]`
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def concat(*lists: ExprLike) -> Expr:
|
|
51
|
+
# Concatenate the given lists. The types of all the lists must be the same.
|
|
52
|
+
#
|
|
53
|
+
# e.g. `se.list.concat([1, 2], [3, 4]) -> [1, 2, 3, 4]`
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def slice_(expr: ExprLike, start: int | None = None, stop: int | None = None) -> Expr:
|
|
58
|
+
# Slice a list.
|
|
59
|
+
#
|
|
60
|
+
# e.g. `se.list.slice_([0, 1, 2], slice(0,2)) -> [0, 1]`
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def length(expr: ExprLike) -> Expr:
|
|
65
|
+
# Get the length of a list.
|
|
66
|
+
#
|
|
67
|
+
# e.g. `se.list.length([1, 2, 3]) -> 3`
|
|
68
|
+
...
|
spiral/expressions/s3.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get(expr: ExprLike, abort_on_error: bool = False) -> Expr:
|
|
6
|
+
"""Read data from object storage by the s3:// URL.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: URLs of the data that needs to be read from object storage.
|
|
10
|
+
abort_on_error: Should the expression abort on errors or just collect them.
|
|
11
|
+
"""
|
|
12
|
+
from spiral import expressions as se
|
|
13
|
+
|
|
14
|
+
expr = se.lift(expr)
|
|
15
|
+
|
|
16
|
+
return Expr(_lib.expr.s3.get(expr.__expr__, abort_on_error))
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import pyarrow as pa
|
|
2
|
+
import pyarrow.compute as pc
|
|
3
|
+
import re2 as re
|
|
4
|
+
|
|
5
|
+
from spiral import _lib
|
|
6
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
7
|
+
|
|
8
|
+
# TODO(ngates): we can add a symmetric "ascii" expression namespace in the future if
|
|
9
|
+
# the performance is required.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def substr(expr: ExprLike = None, *, begin: int = 0, end: int | None = None) -> Expr:
|
|
13
|
+
"""Slice a string.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
expr: The string expression to slice.
|
|
17
|
+
begin: The starting index of the slice.
|
|
18
|
+
end: The ending index of the slice.
|
|
19
|
+
"""
|
|
20
|
+
from spiral import expressions as se
|
|
21
|
+
|
|
22
|
+
expr = se.lift(expr)
|
|
23
|
+
return Expr(_lib.spql.str.substr(expr.__expr__, begin=begin, end=end))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_regex(pattern: str, *, strings: ExprLike) -> Expr:
|
|
27
|
+
# Extract the first occurrence of a regex pattern from a string.
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _extract_regex(arg: pa.Array | pa.Scalar, pattern: str) -> pa.Array | pa.Scalar:
|
|
32
|
+
# Compute the return type based on the regex groups
|
|
33
|
+
m = re.compile(pattern)
|
|
34
|
+
dtype = pa.struct([pa.field(k, type=pa.string()) for k in m.groupindex.keys()])
|
|
35
|
+
|
|
36
|
+
if pa.types.is_string(arg.type):
|
|
37
|
+
return pc.extract_regex(arg, pattern=pattern).cast(dtype)
|
|
38
|
+
|
|
39
|
+
raise TypeError("Input argument does not have the expected type")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def getitem(expr: ExprLike, field: str) -> Expr:
|
|
6
|
+
"""Get field from a struct.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: The struct expression to get the field from.
|
|
10
|
+
field: The field to get. Dot-separated string is supported to access nested fields.
|
|
11
|
+
"""
|
|
12
|
+
from spiral import expressions as se
|
|
13
|
+
|
|
14
|
+
expr = se.lift(expr)
|
|
15
|
+
return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
|
|
19
|
+
"""Assemble a new struct from the given named fields.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
fields: A dictionary of field names to expressions. The field names will be used as the struct field names.
|
|
23
|
+
"""
|
|
24
|
+
from spiral import expressions as se
|
|
25
|
+
|
|
26
|
+
return Expr(
|
|
27
|
+
_lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def merge(*structs: "ExprLike") -> Expr:
|
|
32
|
+
"""Merge fields from the given structs into a single struct.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
*structs: Each expression must evaluate to a struct.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
A single struct containing all the fields from the input structs.
|
|
39
|
+
If a field is present in multiple structs, the value from the last struct is used.
|
|
40
|
+
"""
|
|
41
|
+
from spiral import expressions as se
|
|
42
|
+
|
|
43
|
+
if len(structs) == 1:
|
|
44
|
+
return se.lift(structs[0])
|
|
45
|
+
return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
|
|
49
|
+
"""Select fields from a struct.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
expr: The struct-like expression to select fields from.
|
|
53
|
+
names: Field names to select. If a path contains a dot, it is assumed to be a nested struct field.
|
|
54
|
+
exclude: List of field names to exclude from result. Exactly one of `names` or `exclude` must be provided.
|
|
55
|
+
"""
|
|
56
|
+
from spiral import expressions as se
|
|
57
|
+
|
|
58
|
+
expr = se.lift(expr)
|
|
59
|
+
return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))
|