pyspiral 0.3.1__cp310-abi3-macosx_11_0_arm64.whl → 0.4.1__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/METADATA +9 -13
- pyspiral-0.4.1.dist-info/RECORD +98 -0
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/WHEEL +1 -1
- spiral/__init__.py +6 -9
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +21 -14
- spiral/api/__init__.py +14 -175
- spiral/api/admin.py +12 -26
- spiral/api/client.py +160 -0
- spiral/api/filesystems.py +100 -72
- spiral/api/organizations.py +45 -58
- spiral/api/projects.py +171 -134
- spiral/api/telemetry.py +19 -0
- spiral/api/types.py +20 -0
- spiral/api/workloads.py +32 -25
- spiral/{arrow.py → arrow_.py} +12 -0
- spiral/cli/__init__.py +2 -5
- spiral/cli/admin.py +7 -12
- spiral/cli/app.py +23 -6
- spiral/cli/console.py +1 -1
- spiral/cli/fs.py +82 -17
- spiral/cli/iceberg/__init__.py +7 -0
- spiral/cli/iceberg/namespaces.py +47 -0
- spiral/cli/iceberg/tables.py +60 -0
- spiral/cli/indexes/__init__.py +19 -0
- spiral/cli/login.py +14 -5
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +9 -1
- spiral/cli/projects.py +136 -0
- spiral/cli/state.py +2 -0
- spiral/cli/tables/__init__.py +121 -0
- spiral/cli/telemetry.py +18 -0
- spiral/cli/types.py +8 -10
- spiral/cli/{workload.py → workloads.py} +11 -11
- spiral/{catalog.py → client.py} +23 -37
- spiral/core/client/__init__.pyi +117 -0
- spiral/core/index/__init__.pyi +15 -0
- spiral/core/{core → table}/__init__.pyi +44 -17
- spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
- spiral/core/table/metastore/__init__.pyi +62 -0
- spiral/core/{spec → table/spec}/__init__.pyi +41 -66
- spiral/datetime_.py +27 -0
- spiral/expressions/__init__.py +26 -18
- spiral/expressions/base.py +5 -5
- spiral/expressions/list_.py +1 -1
- spiral/expressions/mp4.py +2 -9
- spiral/expressions/png.py +1 -1
- spiral/expressions/qoi.py +1 -1
- spiral/expressions/refs.py +3 -9
- spiral/expressions/struct.py +7 -5
- spiral/expressions/text.py +62 -0
- spiral/expressions/udf.py +3 -3
- spiral/iceberg/__init__.py +3 -0
- spiral/iceberg/client.py +33 -0
- spiral/indexes/__init__.py +5 -0
- spiral/indexes/client.py +137 -0
- spiral/indexes/index.py +34 -0
- spiral/indexes/scan.py +22 -0
- spiral/project.py +19 -110
- spiral/{proto → protogen}/_/scandal/__init__.py +23 -135
- spiral/protogen/_/spiral/table/__init__.py +22 -0
- spiral/protogen/substrait/__init__.py +3399 -0
- spiral/protogen/substrait/extensions/__init__.py +115 -0
- spiral/server.py +17 -0
- spiral/settings.py +29 -91
- spiral/substrait_.py +9 -5
- spiral/tables/__init__.py +12 -0
- spiral/tables/client.py +130 -0
- spiral/{dataset.py → tables/dataset.py} +9 -199
- spiral/tables/debug/manifests.py +70 -0
- spiral/tables/debug/metrics.py +56 -0
- spiral/{debug.py → tables/debug/scan.py} +6 -9
- spiral/{maintenance.py → tables/maintenance.py} +1 -1
- spiral/{scan_.py → tables/scan.py} +63 -89
- spiral/tables/snapshot.py +78 -0
- spiral/{table.py → tables/table.py} +59 -73
- spiral/{txn.py → tables/transaction.py} +7 -3
- pyspiral-0.3.1.dist-info/RECORD +0 -85
- spiral/api/tables.py +0 -91
- spiral/api/tokens.py +0 -56
- spiral/authn/authn.py +0 -89
- spiral/authn/device.py +0 -206
- spiral/authn/github_.py +0 -33
- spiral/authn/modal_.py +0 -18
- spiral/cli/org.py +0 -90
- spiral/cli/project.py +0 -109
- spiral/cli/table.py +0 -20
- spiral/cli/token.py +0 -27
- spiral/core/metastore/__init__.pyi +0 -91
- spiral/proto/_/spfs/__init__.py +0 -36
- spiral/proto/_/spiral/table/__init__.py +0 -276
- spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +0 -45
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +0 -96
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/entry_points.txt +0 -0
- /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
- /spiral/{core → protogen/_}/__init__.py +0 -0
- /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
- /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
- /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
- /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
- /spiral/{proto → protogen}/util.py +0 -0
- /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -14,6 +14,24 @@ class ColumnGroup:
|
|
14
14
|
@staticmethod
|
15
15
|
def from_str(path: str) -> ColumnGroup: ...
|
16
16
|
|
17
|
+
class KeySpaceMetadata:
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
*,
|
21
|
+
manifest_handle: ManifestHandle | None,
|
22
|
+
last_modified_at: int,
|
23
|
+
): ...
|
24
|
+
|
25
|
+
manifest_handle: ManifestHandle | None
|
26
|
+
last_modified_at: int
|
27
|
+
|
28
|
+
def asof(self, asof: int) -> KeySpaceMetadata:
|
29
|
+
"""Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
|
30
|
+
...
|
31
|
+
|
32
|
+
def apply_wal(self, wal: WriteAheadLog) -> KeySpaceMetadata:
|
33
|
+
"""Applies the given WAL to the metadata."""
|
34
|
+
|
17
35
|
class ColumnGroupMetadata:
|
18
36
|
def __init__(
|
19
37
|
self,
|
@@ -44,17 +62,16 @@ class ColumnGroupMetadata:
|
|
44
62
|
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
45
63
|
"""Applies the given WAL to the metadata."""
|
46
64
|
|
47
|
-
def __bytes__(self):
|
48
|
-
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
49
|
-
|
50
|
-
@staticmethod
|
51
|
-
def from_proto(buffer: bytes) -> ColumnGroupMetadata:
|
52
|
-
"""Deserializes a ColumnGroupMetadata from a protobuf buffer."""
|
53
|
-
...
|
54
|
-
|
55
65
|
class LogEntry:
|
56
66
|
ts: int
|
57
|
-
operation:
|
67
|
+
operation: (
|
68
|
+
KeySpaceWriteOp
|
69
|
+
| ColumnGroupWriteOp
|
70
|
+
| SchemaEvolutionOp
|
71
|
+
| SchemaBreakOp
|
72
|
+
| KeySpaceCompactOp
|
73
|
+
| ColumnGroupCompactOp
|
74
|
+
)
|
58
75
|
|
59
76
|
def column_group(self) -> ColumnGroup | None:
|
60
77
|
"""Returns the column group of the entry if it is associated with one."""
|
@@ -71,6 +88,10 @@ class FileFormat:
|
|
71
88
|
"""Returns the protobuf enum int value."""
|
72
89
|
...
|
73
90
|
|
91
|
+
def __str__(self) -> str:
|
92
|
+
"""Returns the string representation of the file format."""
|
93
|
+
...
|
94
|
+
|
74
95
|
class FragmentLevel:
|
75
96
|
L0: FragmentLevel
|
76
97
|
L1: FragmentLevel
|
@@ -80,15 +101,9 @@ class FragmentLevel:
|
|
80
101
|
...
|
81
102
|
|
82
103
|
class Key:
|
83
|
-
def __init__(self, key: bytes): ...
|
84
|
-
|
85
104
|
key: bytes
|
86
105
|
|
87
|
-
def
|
88
|
-
"""Concatenates two keys.
|
89
|
-
|
90
|
-
TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
|
91
|
-
|
106
|
+
def __init__(self, key: bytes): ...
|
92
107
|
def __bytes__(self): ...
|
93
108
|
def step(self) -> Key:
|
94
109
|
"""Returns the next key in the key space."""
|
@@ -97,8 +112,6 @@ class Key:
|
|
97
112
|
def min() -> Key: ...
|
98
113
|
@staticmethod
|
99
114
|
def max() -> Key: ...
|
100
|
-
@staticmethod
|
101
|
-
def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
|
102
115
|
|
103
116
|
class KeyExtent:
|
104
117
|
"""An inclusive range of keys."""
|
@@ -108,9 +121,6 @@ class KeyExtent:
|
|
108
121
|
min: Key
|
109
122
|
max: Key
|
110
123
|
|
111
|
-
def to_range(self) -> KeyRange:
|
112
|
-
"""Turn this inclusive key extent into an exclusive key range."""
|
113
|
-
|
114
124
|
def union(self, key_extent: KeyExtent) -> KeyExtent: ...
|
115
125
|
def __or__(self, other: KeyExtent) -> KeyExtent: ...
|
116
126
|
def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
|
@@ -118,30 +128,6 @@ class KeyExtent:
|
|
118
128
|
def contains(self, item: Key) -> bool: ...
|
119
129
|
def __contains__(self, item: Key) -> bool: ...
|
120
130
|
|
121
|
-
class KeyRange:
|
122
|
-
"""A right-exclusive range of keys."""
|
123
|
-
|
124
|
-
def __init__(self, *, begin: Key, end: Key): ...
|
125
|
-
|
126
|
-
begin: Key
|
127
|
-
end: Key
|
128
|
-
|
129
|
-
def union(self, other: KeyRange) -> KeyRange: ...
|
130
|
-
def __or__(self, other: KeyRange) -> KeyRange: ...
|
131
|
-
def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
|
132
|
-
def __and__(self, other: KeyRange) -> KeyRange | None: ...
|
133
|
-
def contains(self, item: Key) -> bool: ...
|
134
|
-
def __contains__(self, item: Key) -> bool: ...
|
135
|
-
def is_disjoint(self, key_range: KeyRange) -> bool:
|
136
|
-
return self.end <= key_range.begin or self.begin >= key_range.end
|
137
|
-
|
138
|
-
@staticmethod
|
139
|
-
def beginning_with(begin: Key) -> KeyRange: ...
|
140
|
-
@staticmethod
|
141
|
-
def ending_with(end: Key) -> KeyRange: ...
|
142
|
-
@staticmethod
|
143
|
-
def full() -> KeyRange: ...
|
144
|
-
|
145
131
|
class KeySpan:
|
146
132
|
"""An exclusive range of keys as indexed by their position in a key space."""
|
147
133
|
|
@@ -155,16 +141,10 @@ class KeySpan:
|
|
155
141
|
def union(self, other: KeySpan) -> KeySpan: ...
|
156
142
|
def __or__(self, other: KeySpan) -> KeySpan: ...
|
157
143
|
|
158
|
-
class KeyMap:
|
159
|
-
"""Displacement map."""
|
160
|
-
|
161
144
|
class ManifestHandle:
|
162
|
-
def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
|
163
|
-
|
164
145
|
id: str
|
165
146
|
format: FileFormat
|
166
147
|
file_size: int
|
167
|
-
spfs_format_metadata: bytes | None
|
168
148
|
|
169
149
|
class Schema:
|
170
150
|
def to_arrow(self) -> pa.Schema:
|
@@ -191,24 +171,28 @@ class KeySpaceWriteOp:
|
|
191
171
|
ks_id: str
|
192
172
|
manifest_handle: ManifestHandle
|
193
173
|
|
194
|
-
class
|
174
|
+
class ColumnGroupWriteOp:
|
195
175
|
column_group: ColumnGroup
|
196
|
-
|
197
|
-
fs_level: FragmentLevel
|
176
|
+
level: FragmentLevel
|
198
177
|
manifest_handle: ManifestHandle
|
199
178
|
key_span: KeySpan
|
200
179
|
key_extent: KeyExtent
|
201
180
|
column_ids: list[str]
|
202
181
|
|
203
|
-
class ConfigurationOp:
|
204
|
-
column_group: ColumnGroup
|
205
|
-
|
206
182
|
class SchemaEvolutionOp:
|
207
183
|
column_group: ColumnGroup
|
208
184
|
|
209
185
|
class SchemaBreakOp:
|
210
186
|
column_group: ColumnGroup
|
211
187
|
|
188
|
+
class KeySpaceCompactOp:
|
189
|
+
ks_ids: list[str]
|
190
|
+
moved_ks_ids: list[str]
|
191
|
+
|
192
|
+
class ColumnGroupCompactOp:
|
193
|
+
column_group: ColumnGroup
|
194
|
+
fragment_ids: list[int]
|
195
|
+
|
212
196
|
class WriteAheadLog:
|
213
197
|
def __init__(
|
214
198
|
self,
|
@@ -219,7 +203,6 @@ class WriteAheadLog:
|
|
219
203
|
|
220
204
|
entries: list[LogEntry]
|
221
205
|
truncated_up_to: int
|
222
|
-
ks_manifest_handle: ManifestHandle | None
|
223
206
|
|
224
207
|
@property
|
225
208
|
def last_modified_at(self) -> int:
|
@@ -229,11 +212,3 @@ class WriteAheadLog:
|
|
229
212
|
self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
|
230
213
|
) -> WriteAheadLog:
|
231
214
|
"""Filters the WAL to entries by the given parameters."""
|
232
|
-
|
233
|
-
def __bytes__(self):
|
234
|
-
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
235
|
-
|
236
|
-
@staticmethod
|
237
|
-
def from_proto(buffer: bytes) -> WriteAheadLog:
|
238
|
-
"""Deserializes a WriteAheadLog from a protobuf buffer."""
|
239
|
-
...
|
spiral/datetime_.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
import warnings
|
2
|
+
from datetime import datetime, timedelta, timezone, tzinfo
|
3
|
+
|
4
|
+
_THE_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)
|
5
|
+
|
6
|
+
|
7
|
+
def local_tz() -> tzinfo:
|
8
|
+
"""Determine this machine's local timezone."""
|
9
|
+
tz = datetime.now().astimezone().tzinfo
|
10
|
+
if tz is None:
|
11
|
+
raise ValueError("Could not determine this machine's local timezone.")
|
12
|
+
return tz
|
13
|
+
|
14
|
+
|
15
|
+
def timestamp_micros(instant: datetime) -> int:
|
16
|
+
"""The number of microseconds between the epoch and the given instant."""
|
17
|
+
if instant.tzinfo is None:
|
18
|
+
warnings.warn("assuming timezone-naive datetime is local time", stacklevel=2)
|
19
|
+
instant = instant.replace(tzinfo=local_tz())
|
20
|
+
return (instant - _THE_EPOCH) // timedelta(microseconds=1)
|
21
|
+
|
22
|
+
|
23
|
+
def from_timestamp_micros(ts: int) -> datetime:
|
24
|
+
"""Convert a timestamp in microseconds to a datetime."""
|
25
|
+
if ts < 0:
|
26
|
+
raise ValueError("Timestamp must be non-negative")
|
27
|
+
return _THE_EPOCH + timedelta(microseconds=ts)
|
spiral/expressions/__init__.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
import builtins
|
2
2
|
import functools
|
3
3
|
import operator
|
4
|
+
import warnings
|
4
5
|
from typing import Any
|
5
6
|
|
6
7
|
import pyarrow as pa
|
7
8
|
|
8
|
-
from spiral import _lib,
|
9
|
+
from spiral import _lib, arrow_
|
9
10
|
|
10
11
|
from . import http as http
|
11
12
|
from . import io as io
|
@@ -16,8 +17,9 @@ from . import qoi as qoi
|
|
16
17
|
from . import refs as refs
|
17
18
|
from . import str_ as str
|
18
19
|
from . import struct as struct
|
20
|
+
from . import text as text
|
19
21
|
from . import tiff as tiff
|
20
|
-
from .base import Expr, ExprLike
|
22
|
+
from .base import Expr, ExprLike, NativeExpr
|
21
23
|
|
22
24
|
__all__ = [
|
23
25
|
"Expr",
|
@@ -54,11 +56,11 @@ __all__ = [
|
|
54
56
|
"struct",
|
55
57
|
"subtract",
|
56
58
|
"tiff",
|
57
|
-
"var",
|
58
59
|
"xor",
|
59
60
|
"png",
|
60
61
|
"qoi",
|
61
62
|
"mp4",
|
63
|
+
"text",
|
62
64
|
]
|
63
65
|
|
64
66
|
# Inline some of the struct expressions since they're so common
|
@@ -74,13 +76,15 @@ def lift(expr: ExprLike) -> Expr:
|
|
74
76
|
# Convert an ExprLike into an Expr.
|
75
77
|
if isinstance(expr, Expr):
|
76
78
|
return expr
|
79
|
+
if isinstance(expr, NativeExpr):
|
80
|
+
return Expr(expr)
|
77
81
|
|
78
82
|
if isinstance(expr, dict):
|
79
83
|
# NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
|
80
84
|
# this is in fact a struct scalar, but the user can always create one of those manually.
|
81
85
|
|
82
86
|
# First we un-nest any dot-separated field names
|
83
|
-
expr: dict =
|
87
|
+
expr: dict = arrow_.nest_structs(expr)
|
84
88
|
|
85
89
|
return pack({k: lift(v) for k, v in expr.items()})
|
86
90
|
|
@@ -95,14 +99,18 @@ def lift(expr: ExprLike) -> Expr:
|
|
95
99
|
|
96
100
|
# If the value is struct-like, we un-nest any dot-separated field names
|
97
101
|
if isinstance(expr, pa.StructArray | pa.StructScalar):
|
102
|
+
# TODO(marko): Figure out what to do with nullable struct arrays when unpacking them.
|
103
|
+
# We need to merge struct validity into the child validity?
|
98
104
|
if isinstance(expr, pa.StructArray) and expr.null_count != 0:
|
99
|
-
raise ValueError("lift: cannot lift a struct array with nulls.")
|
100
|
-
|
101
|
-
|
102
|
-
|
105
|
+
# raise ValueError("lift: cannot lift a struct array with nulls.")
|
106
|
+
warnings.warn("found a struct array with nulls", stacklevel=2)
|
107
|
+
if isinstance(expr, pa.StructScalar) and not expr.is_valid():
|
108
|
+
# raise ValueError("lift: cannot lift a struct scalar with nulls.")
|
109
|
+
warnings.warn("found a struct scalar with nulls", stacklevel=2)
|
110
|
+
return lift(arrow_.nest_structs(expr))
|
103
111
|
|
104
112
|
if isinstance(expr, pa.Array):
|
105
|
-
return Expr(_lib.
|
113
|
+
return Expr(_lib.expr.array_lit(expr))
|
106
114
|
|
107
115
|
# Otherwise, assume it's a scalar.
|
108
116
|
return scalar(expr)
|
@@ -114,7 +122,7 @@ def key(name: builtins.str) -> Expr:
|
|
114
122
|
Args:
|
115
123
|
name: variable name
|
116
124
|
"""
|
117
|
-
return Expr(_lib.
|
125
|
+
return Expr(_lib.expr.keyed(name))
|
118
126
|
|
119
127
|
|
120
128
|
def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
@@ -127,20 +135,21 @@ def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
|
127
135
|
name: variable name
|
128
136
|
dtype: must match dtype of the column in the key table.
|
129
137
|
"""
|
130
|
-
return Expr(_lib.
|
138
|
+
return Expr(_lib.expr.keyed(name, dtype))
|
131
139
|
|
132
140
|
|
133
141
|
def scalar(value: Any) -> Expr:
|
134
142
|
"""Create a scalar expression."""
|
135
143
|
if not isinstance(value, pa.Scalar):
|
136
144
|
value = pa.scalar(value)
|
137
|
-
|
145
|
+
# TODO(marko): Use Vortex scalar instead of passing as array.
|
146
|
+
return Expr(_lib.expr.scalar(pa.array([value.as_py()], type=value.type)))
|
138
147
|
|
139
148
|
|
140
149
|
def cast(expr: ExprLike, dtype: pa.DataType) -> Expr:
|
141
150
|
"""Cast an expression into another PyArrow DataType."""
|
142
151
|
expr = lift(expr)
|
143
|
-
return Expr(_lib.
|
152
|
+
return Expr(_lib.expr.cast(expr.__expr__, dtype))
|
144
153
|
|
145
154
|
|
146
155
|
def and_(expr: ExprLike, *exprs: ExprLike) -> Expr:
|
@@ -197,19 +206,18 @@ def negate(expr: ExprLike) -> Expr:
|
|
197
206
|
def not_(expr: ExprLike) -> Expr:
|
198
207
|
"""Negate the given expression."""
|
199
208
|
expr = lift(expr)
|
200
|
-
return Expr(_lib.
|
209
|
+
return Expr(_lib.expr.not_(expr.__expr__))
|
201
210
|
|
202
211
|
|
203
212
|
def is_null(expr: ExprLike) -> Expr:
|
204
213
|
"""Check if the given expression is null."""
|
205
214
|
expr = lift(expr)
|
206
|
-
return Expr(_lib.
|
215
|
+
return Expr(_lib.expr.is_null(expr.__expr__))
|
207
216
|
|
208
217
|
|
209
218
|
def is_not_null(expr: ExprLike) -> Expr:
|
210
|
-
"""Check if the given expression is null."""
|
211
|
-
|
212
|
-
return Expr(_lib.spql.expr.unary("is_not_null", expr.__expr__))
|
219
|
+
"""Check if the given expression is not null."""
|
220
|
+
return not_(is_null(expr))
|
213
221
|
|
214
222
|
|
215
223
|
def add(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
spiral/expressions/base.py
CHANGED
@@ -5,7 +5,7 @@ import pyarrow as pa
|
|
5
5
|
|
6
6
|
from spiral import _lib
|
7
7
|
|
8
|
-
NativeExpr: TypeAlias = _lib.
|
8
|
+
NativeExpr: TypeAlias = _lib.expr.Expr
|
9
9
|
|
10
10
|
|
11
11
|
class Expr:
|
@@ -92,13 +92,13 @@ class Expr:
|
|
92
92
|
return self._binary("mod", other)
|
93
93
|
|
94
94
|
def __neg__(self):
|
95
|
-
return Expr(_lib.
|
95
|
+
return Expr(_lib.expr.unary("neg", self.__expr__))
|
96
96
|
|
97
97
|
def in_(self, other: "ExprLike") -> "Expr":
|
98
98
|
from spiral import expressions as se
|
99
99
|
|
100
100
|
other = se.lift(other)
|
101
|
-
return Expr(_lib.
|
101
|
+
return Expr(_lib.expr.list.contains(other.__expr__, self.__expr__))
|
102
102
|
|
103
103
|
def contains(self, other: "ExprLike") -> "Expr":
|
104
104
|
from spiral import expressions as se
|
@@ -107,7 +107,7 @@ class Expr:
|
|
107
107
|
|
108
108
|
def cast(self, dtype: pa.DataType) -> "Expr":
|
109
109
|
"""Cast the expression result to a different data type."""
|
110
|
-
return Expr(_lib.
|
110
|
+
return Expr(_lib.expr.cast(self.__expr__, dtype))
|
111
111
|
|
112
112
|
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
113
113
|
"""Select fields from a struct-like expression.
|
@@ -142,7 +142,7 @@ class Expr:
|
|
142
142
|
from spiral import expressions as se
|
143
143
|
|
144
144
|
rhs = se.lift(rhs)
|
145
|
-
return Expr(_lib.
|
145
|
+
return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
146
|
|
147
147
|
|
148
148
|
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
spiral/expressions/list_.py
CHANGED
@@ -29,7 +29,7 @@ def element_at(expr: ExprLike, index: ExprLike) -> Expr:
|
|
29
29
|
|
30
30
|
expr = lift(expr)
|
31
31
|
index = lift(index)
|
32
|
-
return Expr(_lib.
|
32
|
+
return Expr(_lib.expr.list.element_at(expr.__expr__, index.__expr__))
|
33
33
|
|
34
34
|
|
35
35
|
def of(*expr: ExprLike) -> Expr:
|
spiral/expressions/mp4.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1
|
-
from typing import TYPE_CHECKING
|
2
|
-
|
3
1
|
import pyarrow as pa
|
4
2
|
|
5
3
|
from spiral.expressions.base import Expr, ExprLike
|
6
4
|
|
7
|
-
if TYPE_CHECKING:
|
8
|
-
from spiral import Table
|
9
|
-
|
10
5
|
_MP4_RES_DTYPE: pa.DataType = pa.struct(
|
11
6
|
[
|
12
7
|
pa.field("pixels", pa.large_binary()),
|
@@ -19,7 +14,7 @@ _MP4_RES_DTYPE: pa.DataType = pa.struct(
|
|
19
14
|
|
20
15
|
# TODO(marko): Support optional range and crop.
|
21
16
|
# IMPORTANT: Frames is currently broken and defaults to full.
|
22
|
-
def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str
|
17
|
+
def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
|
23
18
|
"""
|
24
19
|
Read referenced cell in a `MP4` format. Requires `ffmpeg`.
|
25
20
|
|
@@ -32,7 +27,6 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *,
|
|
32
27
|
crop: The crop of the frames to read. Each element must be a list of four uint32,
|
33
28
|
x, y, width, height or null / empty list to read full frames.
|
34
29
|
A str is assumed to be the `se.keyed` expression.
|
35
|
-
table (optional): The table to de-reference from, if not available in input expression.
|
36
30
|
|
37
31
|
Returns:
|
38
32
|
An array where each element is a decoded cropped video with fields:
|
@@ -59,11 +53,10 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *,
|
|
59
53
|
crop = lift(crop)
|
60
54
|
|
61
55
|
return Expr(
|
62
|
-
_lib.
|
56
|
+
_lib.expr.video.read(
|
63
57
|
expr.__expr__,
|
64
58
|
frames.__expr__,
|
65
59
|
crop.__expr__,
|
66
60
|
format="mp4",
|
67
|
-
table=table._table if table is not None else None,
|
68
61
|
)
|
69
62
|
)
|
spiral/expressions/png.py
CHANGED
spiral/expressions/qoi.py
CHANGED
spiral/expressions/refs.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1
|
-
from typing import TYPE_CHECKING
|
2
|
-
|
3
1
|
import pyarrow as pa
|
4
2
|
|
5
3
|
from spiral.expressions.base import Expr, ExprLike
|
6
4
|
|
7
|
-
if TYPE_CHECKING:
|
8
|
-
from spiral import Table
|
9
|
-
|
10
5
|
|
11
6
|
def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
12
7
|
"""Store binary values as references. This expression can only be used on write.
|
@@ -24,10 +19,10 @@ def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
|
24
19
|
from spiral.expressions import lift
|
25
20
|
|
26
21
|
expr = lift(expr)
|
27
|
-
return Expr(_lib.
|
22
|
+
return Expr(_lib.expr.refs.ref(expr.__expr__, field))
|
28
23
|
|
29
24
|
|
30
|
-
def deref(expr: ExprLike | str, field: str | None = None
|
25
|
+
def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
|
31
26
|
"""De-reference referenced values.
|
32
27
|
|
33
28
|
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
@@ -37,7 +32,6 @@ def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = No
|
|
37
32
|
expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
|
38
33
|
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
39
34
|
If `None`, the expr must evaluate into a reference type.
|
40
|
-
table (optional): The table to de-reference from, if not available in input expression.
|
41
35
|
"""
|
42
36
|
from spiral import _lib
|
43
37
|
from spiral.expressions import keyed, lift
|
@@ -49,7 +43,7 @@ def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = No
|
|
49
43
|
)
|
50
44
|
|
51
45
|
expr = lift(expr)
|
52
|
-
return Expr(_lib.
|
46
|
+
return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
|
53
47
|
|
54
48
|
|
55
49
|
def nbytes(expr: ExprLike) -> Expr:
|
spiral/expressions/struct.py
CHANGED
@@ -12,10 +12,10 @@ def getitem(expr: ExprLike, field: str) -> Expr:
|
|
12
12
|
from spiral import expressions as se
|
13
13
|
|
14
14
|
expr = se.lift(expr)
|
15
|
-
return Expr(_lib.
|
15
|
+
return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
|
16
16
|
|
17
17
|
|
18
|
-
def pack(fields: dict[str, ExprLike]) -> Expr:
|
18
|
+
def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
|
19
19
|
"""Assemble a new struct from the given named fields.
|
20
20
|
|
21
21
|
Args:
|
@@ -23,7 +23,9 @@ def pack(fields: dict[str, ExprLike]) -> Expr:
|
|
23
23
|
"""
|
24
24
|
from spiral import expressions as se
|
25
25
|
|
26
|
-
return Expr(
|
26
|
+
return Expr(
|
27
|
+
_lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
|
28
|
+
)
|
27
29
|
|
28
30
|
|
29
31
|
def merge(*structs: "ExprLike") -> Expr:
|
@@ -40,7 +42,7 @@ def merge(*structs: "ExprLike") -> Expr:
|
|
40
42
|
|
41
43
|
if len(structs) == 1:
|
42
44
|
return se.lift(structs[0])
|
43
|
-
return Expr(_lib.
|
45
|
+
return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
|
44
46
|
|
45
47
|
|
46
48
|
def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
|
@@ -54,4 +56,4 @@ def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -
|
|
54
56
|
from spiral import expressions as se
|
55
57
|
|
56
58
|
expr = se.lift(expr)
|
57
|
-
return Expr(_lib.
|
59
|
+
return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))
|
@@ -0,0 +1,62 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def field(expr: ExprLike, field_name: str | None = None, tokenizer: str | None = None) -> Expr:
|
5
|
+
"""Configure a column for text indexing.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: An input column. The expression must either evaluate to a UTF-8,
|
9
|
+
or, if a `field_name` is provided, to a struct with a field of that name.
|
10
|
+
field_name: If provided, the expression must evaluate to a struct with a field of that name.
|
11
|
+
The given field will be indexed.
|
12
|
+
tokenizer: If provided, the text will be tokenized using the given tokenizer.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
An expression that can be used to construct a text index.
|
16
|
+
"""
|
17
|
+
from spiral import _lib
|
18
|
+
from spiral.expressions import getitem, lift, merge, pack
|
19
|
+
|
20
|
+
expr = lift(expr)
|
21
|
+
if field_name is None:
|
22
|
+
return Expr(_lib.expr.text.field(expr.__expr__, tokenizer))
|
23
|
+
|
24
|
+
child = _lib.expr.text.field(getitem(expr, field_name).__expr__)
|
25
|
+
return merge(
|
26
|
+
expr,
|
27
|
+
pack({field_name: child}),
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
def find(expr: ExprLike, term: str) -> Expr:
|
32
|
+
"""Search for a term in the text.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
expr: An index field.
|
36
|
+
term: The term to search for.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
An expression that can be used in ranking for text search.
|
40
|
+
"""
|
41
|
+
from spiral import _lib
|
42
|
+
from spiral.expressions import lift
|
43
|
+
|
44
|
+
expr = lift(expr)
|
45
|
+
return Expr(_lib.expr.text.find(expr.__expr__, term))
|
46
|
+
|
47
|
+
|
48
|
+
def boost(expr: ExprLike, factor: float) -> Expr:
|
49
|
+
"""Boost the relevance of a ranking expression.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
expr: Rank by expression.
|
53
|
+
factor: The factor by which to boost the relevance.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
An expression that can be used in ranking for text search.
|
57
|
+
"""
|
58
|
+
from spiral import _lib
|
59
|
+
from spiral.expressions import lift
|
60
|
+
|
61
|
+
expr = lift(expr)
|
62
|
+
return Expr(_lib.expr.text.boost(expr.__expr__, factor))
|
spiral/expressions/udf.py
CHANGED
@@ -25,7 +25,7 @@ class UDF(BaseUDF):
|
|
25
25
|
"""A User-Defined Function (UDF)."""
|
26
26
|
|
27
27
|
def __init__(self, name: str):
|
28
|
-
super().__init__(_lib.
|
28
|
+
super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke))
|
29
29
|
|
30
30
|
@abc.abstractmethod
|
31
31
|
def invoke(self, *input_args: pa.Array) -> pa.Array: ...
|
@@ -35,10 +35,10 @@ class RefUDF(BaseUDF):
|
|
35
35
|
"""A UDF over a single ref cell, and therefore can access the file object."""
|
36
36
|
|
37
37
|
def __init__(self, name: str):
|
38
|
-
super().__init__(_lib.
|
38
|
+
super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
|
39
39
|
|
40
40
|
@abc.abstractmethod
|
41
|
-
def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
|
41
|
+
def invoke(self, fp: _lib.FileObject, *input_args: pa.Array) -> pa.Array:
|
42
42
|
"""Invoke the UDF with the given arguments.
|
43
43
|
|
44
44
|
NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
|
spiral/iceberg/client.py
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
if TYPE_CHECKING:
|
4
|
+
from pyiceberg.catalog import Catalog
|
5
|
+
|
6
|
+
from spiral.client import Spiral
|
7
|
+
|
8
|
+
|
9
|
+
class Iceberg:
|
10
|
+
"""
|
11
|
+
Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
|
12
|
+
Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
|
13
|
+
and ACID transactions to your warehouse.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, spiral: "Spiral", *, project_id: str | None = None):
|
17
|
+
self._spiral = spiral
|
18
|
+
self._project_id = project_id
|
19
|
+
|
20
|
+
self._api = self._spiral.config.api
|
21
|
+
|
22
|
+
def catalog(self) -> "Catalog":
|
23
|
+
"""Open the Iceberg catalog."""
|
24
|
+
from pyiceberg.catalog import load_catalog
|
25
|
+
|
26
|
+
return load_catalog(
|
27
|
+
"default",
|
28
|
+
**{
|
29
|
+
"type": "rest",
|
30
|
+
"uri": self._spiral.config.spiraldb.uri + "/iceberg",
|
31
|
+
"token": self._spiral.config.authn.token().expose_secret(),
|
32
|
+
},
|
33
|
+
)
|