pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/METADATA +12 -14
- pyspiral-0.4.0.dist-info/RECORD +98 -0
- {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
- spiral/__init__.py +6 -7
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +21 -14
- spiral/api/__init__.py +15 -172
- spiral/api/admin.py +12 -26
- spiral/api/client.py +160 -0
- spiral/api/filesystems.py +100 -72
- spiral/api/organizations.py +45 -58
- spiral/api/projects.py +171 -134
- spiral/api/telemetry.py +19 -0
- spiral/api/types.py +20 -0
- spiral/api/workloads.py +32 -25
- spiral/{arrow.py → arrow_.py} +12 -0
- spiral/cli/__init__.py +2 -5
- spiral/cli/admin.py +7 -12
- spiral/cli/app.py +23 -6
- spiral/cli/console.py +1 -1
- spiral/cli/fs.py +83 -18
- spiral/cli/iceberg/__init__.py +7 -0
- spiral/cli/iceberg/namespaces.py +47 -0
- spiral/cli/iceberg/tables.py +60 -0
- spiral/cli/indexes/__init__.py +19 -0
- spiral/cli/login.py +14 -5
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +9 -1
- spiral/cli/projects.py +136 -0
- spiral/cli/state.py +2 -0
- spiral/cli/tables/__init__.py +121 -0
- spiral/cli/telemetry.py +18 -0
- spiral/cli/types.py +8 -10
- spiral/cli/{workload.py → workloads.py} +11 -11
- spiral/{catalog.py → client.py} +22 -21
- spiral/core/client/__init__.pyi +117 -0
- spiral/core/index/__init__.pyi +15 -0
- spiral/core/table/__init__.pyi +108 -0
- spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
- spiral/core/table/metastore/__init__.pyi +62 -0
- spiral/core/{spec → table/spec}/__init__.pyi +49 -92
- spiral/datetime_.py +27 -0
- spiral/expressions/__init__.py +40 -17
- spiral/expressions/base.py +5 -5
- spiral/expressions/list_.py +1 -1
- spiral/expressions/mp4.py +62 -0
- spiral/expressions/png.py +18 -0
- spiral/expressions/qoi.py +18 -0
- spiral/expressions/refs.py +23 -9
- spiral/expressions/struct.py +7 -5
- spiral/expressions/text.py +62 -0
- spiral/expressions/tiff.py +88 -88
- spiral/expressions/udf.py +3 -3
- spiral/iceberg/__init__.py +3 -0
- spiral/iceberg/client.py +33 -0
- spiral/indexes/__init__.py +5 -0
- spiral/indexes/client.py +137 -0
- spiral/indexes/index.py +34 -0
- spiral/indexes/scan.py +22 -0
- spiral/project.py +19 -110
- spiral/{proto → protogen}/_/scandal/__init__.py +32 -77
- spiral/protogen/_/spiral/table/__init__.py +22 -0
- spiral/protogen/substrait/__init__.py +3399 -0
- spiral/protogen/substrait/extensions/__init__.py +115 -0
- spiral/server.py +17 -0
- spiral/settings.py +31 -87
- spiral/substrait_.py +10 -6
- spiral/tables/__init__.py +12 -0
- spiral/tables/client.py +130 -0
- spiral/{dataset.py → tables/dataset.py} +36 -25
- spiral/tables/debug/manifests.py +70 -0
- spiral/tables/debug/metrics.py +56 -0
- spiral/{debug.py → tables/debug/scan.py} +6 -9
- spiral/tables/maintenance.py +12 -0
- spiral/tables/scan.py +193 -0
- spiral/tables/snapshot.py +78 -0
- spiral/tables/table.py +157 -0
- spiral/tables/transaction.py +52 -0
- pyspiral-0.2.5.dist-info/RECORD +0 -81
- spiral/api/tables.py +0 -94
- spiral/api/tokens.py +0 -56
- spiral/authn/authn.py +0 -89
- spiral/authn/device.py +0 -206
- spiral/authn/github_.py +0 -33
- spiral/authn/modal_.py +0 -18
- spiral/cli/org.py +0 -90
- spiral/cli/project.py +0 -107
- spiral/cli/table.py +0 -20
- spiral/cli/token.py +0 -27
- spiral/config.py +0 -26
- spiral/core/core/__init__.pyi +0 -53
- spiral/core/metastore/__init__.pyi +0 -91
- spiral/proto/_/spfs/__init__.py +0 -36
- spiral/proto/_/spiral/table/__init__.py +0 -225
- spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +0 -45
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +0 -96
- spiral/scan_.py +0 -168
- spiral/table.py +0 -157
- {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
- /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
- /spiral/{core → protogen/_}/__init__.py +0 -0
- /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
- /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
- /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
- /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
- /spiral/{proto → protogen}/util.py +0 -0
- /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -14,6 +14,24 @@ class ColumnGroup:
|
|
14
14
|
@staticmethod
|
15
15
|
def from_str(path: str) -> ColumnGroup: ...
|
16
16
|
|
17
|
+
class KeySpaceMetadata:
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
*,
|
21
|
+
manifest_handle: ManifestHandle | None,
|
22
|
+
last_modified_at: int,
|
23
|
+
): ...
|
24
|
+
|
25
|
+
manifest_handle: ManifestHandle | None
|
26
|
+
last_modified_at: int
|
27
|
+
|
28
|
+
def asof(self, asof: int) -> KeySpaceMetadata:
|
29
|
+
"""Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
|
30
|
+
...
|
31
|
+
|
32
|
+
def apply_wal(self, wal: WriteAheadLog) -> KeySpaceMetadata:
|
33
|
+
"""Applies the given WAL to the metadata."""
|
34
|
+
|
17
35
|
class ColumnGroupMetadata:
|
18
36
|
def __init__(
|
19
37
|
self,
|
@@ -44,57 +62,36 @@ class ColumnGroupMetadata:
|
|
44
62
|
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
45
63
|
"""Applies the given WAL to the metadata."""
|
46
64
|
|
47
|
-
def __bytes__(self):
|
48
|
-
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
49
|
-
|
50
|
-
@staticmethod
|
51
|
-
def from_proto(buffer: bytes) -> ColumnGroupMetadata:
|
52
|
-
"""Deserializes a ColumnGroupMetadata from a protobuf buffer."""
|
53
|
-
...
|
54
|
-
|
55
65
|
class LogEntry:
|
56
66
|
ts: int
|
57
|
-
operation:
|
67
|
+
operation: (
|
68
|
+
KeySpaceWriteOp
|
69
|
+
| ColumnGroupWriteOp
|
70
|
+
| SchemaEvolutionOp
|
71
|
+
| SchemaBreakOp
|
72
|
+
| KeySpaceCompactOp
|
73
|
+
| ColumnGroupCompactOp
|
74
|
+
)
|
58
75
|
|
59
76
|
def column_group(self) -> ColumnGroup | None:
|
60
77
|
"""Returns the column group of the entry if it is associated with one."""
|
61
78
|
|
62
|
-
def replace_timestamp(self, ts: int) -> LogEntry:
|
63
|
-
"""Returns a copy of the entry with the timestamp replaced."""
|
64
|
-
|
65
|
-
@staticmethod
|
66
|
-
def schema_break(*, column_group: ColumnGroup, removed_column_names: list[str]) -> LogEntry: ...
|
67
|
-
@staticmethod
|
68
|
-
def schema_evolution(*, column_group: ColumnGroup, new_schema: Schema) -> LogEntry: ...
|
69
|
-
@staticmethod
|
70
|
-
def ks_write(
|
71
|
-
*,
|
72
|
-
ks_id: str,
|
73
|
-
manifest_handle: ManifestHandle,
|
74
|
-
) -> LogEntry: ...
|
75
|
-
@staticmethod
|
76
|
-
def fs_write(
|
77
|
-
*,
|
78
|
-
column_group: ColumnGroup,
|
79
|
-
fs_id: str,
|
80
|
-
fs_level: FragmentLevel,
|
81
|
-
manifest_handle: ManifestHandle,
|
82
|
-
key_span: KeySpan,
|
83
|
-
key_extent: KeyExtent,
|
84
|
-
column_ids: list[str],
|
85
|
-
) -> LogEntry: ...
|
86
|
-
|
87
79
|
class FileFormat:
|
88
80
|
def __init__(self, value: int): ...
|
89
81
|
|
90
82
|
Parquet: FileFormat
|
91
83
|
Protobuf: FileFormat
|
92
84
|
BinaryArray: FileFormat
|
85
|
+
Vortex: FileFormat
|
93
86
|
|
94
87
|
def __int__(self) -> int:
|
95
88
|
"""Returns the protobuf enum int value."""
|
96
89
|
...
|
97
90
|
|
91
|
+
def __str__(self) -> str:
|
92
|
+
"""Returns the string representation of the file format."""
|
93
|
+
...
|
94
|
+
|
98
95
|
class FragmentLevel:
|
99
96
|
L0: FragmentLevel
|
100
97
|
L1: FragmentLevel
|
@@ -104,15 +101,9 @@ class FragmentLevel:
|
|
104
101
|
...
|
105
102
|
|
106
103
|
class Key:
|
107
|
-
def __init__(self, key: bytes): ...
|
108
|
-
|
109
104
|
key: bytes
|
110
105
|
|
111
|
-
def
|
112
|
-
"""Concatenates two keys.
|
113
|
-
|
114
|
-
TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
|
115
|
-
|
106
|
+
def __init__(self, key: bytes): ...
|
116
107
|
def __bytes__(self): ...
|
117
108
|
def step(self) -> Key:
|
118
109
|
"""Returns the next key in the key space."""
|
@@ -121,8 +112,6 @@ class Key:
|
|
121
112
|
def min() -> Key: ...
|
122
113
|
@staticmethod
|
123
114
|
def max() -> Key: ...
|
124
|
-
@staticmethod
|
125
|
-
def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
|
126
115
|
|
127
116
|
class KeyExtent:
|
128
117
|
"""An inclusive range of keys."""
|
@@ -132,9 +121,6 @@ class KeyExtent:
|
|
132
121
|
min: Key
|
133
122
|
max: Key
|
134
123
|
|
135
|
-
def to_range(self) -> KeyRange:
|
136
|
-
"""Turn this inclusive key extent into an exclusive key range."""
|
137
|
-
|
138
124
|
def union(self, key_extent: KeyExtent) -> KeyExtent: ...
|
139
125
|
def __or__(self, other: KeyExtent) -> KeyExtent: ...
|
140
126
|
def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
|
@@ -142,30 +128,6 @@ class KeyExtent:
|
|
142
128
|
def contains(self, item: Key) -> bool: ...
|
143
129
|
def __contains__(self, item: Key) -> bool: ...
|
144
130
|
|
145
|
-
class KeyRange:
|
146
|
-
"""A right-exclusive range of keys."""
|
147
|
-
|
148
|
-
def __init__(self, *, begin: Key, end: Key): ...
|
149
|
-
|
150
|
-
begin: Key
|
151
|
-
end: Key
|
152
|
-
|
153
|
-
def union(self, other: KeyRange) -> KeyRange: ...
|
154
|
-
def __or__(self, other: KeyRange) -> KeyRange: ...
|
155
|
-
def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
|
156
|
-
def __and__(self, other: KeyRange) -> KeyRange | None: ...
|
157
|
-
def contains(self, item: Key) -> bool: ...
|
158
|
-
def __contains__(self, item: Key) -> bool: ...
|
159
|
-
def is_disjoint(self, key_range: KeyRange) -> bool:
|
160
|
-
return self.end <= key_range.begin or self.begin >= key_range.end
|
161
|
-
|
162
|
-
@staticmethod
|
163
|
-
def beginning_with(begin: Key) -> KeyRange: ...
|
164
|
-
@staticmethod
|
165
|
-
def ending_with(end: Key) -> KeyRange: ...
|
166
|
-
@staticmethod
|
167
|
-
def full() -> KeyRange: ...
|
168
|
-
|
169
131
|
class KeySpan:
|
170
132
|
"""An exclusive range of keys as indexed by their position in a key space."""
|
171
133
|
|
@@ -179,26 +141,26 @@ class KeySpan:
|
|
179
141
|
def union(self, other: KeySpan) -> KeySpan: ...
|
180
142
|
def __or__(self, other: KeySpan) -> KeySpan: ...
|
181
143
|
|
182
|
-
class KeyMap:
|
183
|
-
"""Displacement map."""
|
184
|
-
|
185
144
|
class ManifestHandle:
|
186
|
-
def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
|
187
|
-
|
188
145
|
id: str
|
189
146
|
format: FileFormat
|
190
147
|
file_size: int
|
191
|
-
spfs_format_metadata: bytes | None
|
192
148
|
|
193
149
|
class Schema:
|
194
150
|
def to_arrow(self) -> pa.Schema:
|
195
151
|
"""Returns the Arrow schema."""
|
196
152
|
...
|
197
|
-
|
198
153
|
@staticmethod
|
199
154
|
def from_arrow(arrow: pa.Schema) -> Schema:
|
200
155
|
"""Creates a Schema from an Arrow schema."""
|
201
156
|
...
|
157
|
+
def __len__(self):
|
158
|
+
"""Returns the number of columns in the schema."""
|
159
|
+
...
|
160
|
+
@property
|
161
|
+
def names(self) -> list[str]:
|
162
|
+
"""Returns the names of the columns in the schema."""
|
163
|
+
...
|
202
164
|
|
203
165
|
class VersionedSchema:
|
204
166
|
ts: int
|
@@ -209,24 +171,28 @@ class KeySpaceWriteOp:
|
|
209
171
|
ks_id: str
|
210
172
|
manifest_handle: ManifestHandle
|
211
173
|
|
212
|
-
class
|
174
|
+
class ColumnGroupWriteOp:
|
213
175
|
column_group: ColumnGroup
|
214
|
-
|
215
|
-
fs_level: FragmentLevel
|
176
|
+
level: FragmentLevel
|
216
177
|
manifest_handle: ManifestHandle
|
217
178
|
key_span: KeySpan
|
218
179
|
key_extent: KeyExtent
|
219
180
|
column_ids: list[str]
|
220
181
|
|
221
|
-
class ConfigurationOp:
|
222
|
-
column_group: ColumnGroup
|
223
|
-
|
224
182
|
class SchemaEvolutionOp:
|
225
183
|
column_group: ColumnGroup
|
226
184
|
|
227
185
|
class SchemaBreakOp:
|
228
186
|
column_group: ColumnGroup
|
229
187
|
|
188
|
+
class KeySpaceCompactOp:
|
189
|
+
ks_ids: list[str]
|
190
|
+
moved_ks_ids: list[str]
|
191
|
+
|
192
|
+
class ColumnGroupCompactOp:
|
193
|
+
column_group: ColumnGroup
|
194
|
+
fragment_ids: list[int]
|
195
|
+
|
230
196
|
class WriteAheadLog:
|
231
197
|
def __init__(
|
232
198
|
self,
|
@@ -237,7 +203,6 @@ class WriteAheadLog:
|
|
237
203
|
|
238
204
|
entries: list[LogEntry]
|
239
205
|
truncated_up_to: int
|
240
|
-
ks_manifest_handle: ManifestHandle | None
|
241
206
|
|
242
207
|
@property
|
243
208
|
def last_modified_at(self) -> int:
|
@@ -247,11 +212,3 @@ class WriteAheadLog:
|
|
247
212
|
self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
|
248
213
|
) -> WriteAheadLog:
|
249
214
|
"""Filters the WAL to entries by the given parameters."""
|
250
|
-
|
251
|
-
def __bytes__(self):
|
252
|
-
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
253
|
-
|
254
|
-
@staticmethod
|
255
|
-
def from_proto(buffer: bytes) -> WriteAheadLog:
|
256
|
-
"""Deserializes a WriteAheadLog from a protobuf buffer."""
|
257
|
-
...
|
spiral/datetime_.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
import warnings
|
2
|
+
from datetime import datetime, timedelta, timezone, tzinfo
|
3
|
+
|
4
|
+
_THE_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)
|
5
|
+
|
6
|
+
|
7
|
+
def local_tz() -> tzinfo:
|
8
|
+
"""Determine this machine's local timezone."""
|
9
|
+
tz = datetime.now().astimezone().tzinfo
|
10
|
+
if tz is None:
|
11
|
+
raise ValueError("Could not determine this machine's local timezone.")
|
12
|
+
return tz
|
13
|
+
|
14
|
+
|
15
|
+
def timestamp_micros(instant: datetime) -> int:
|
16
|
+
"""The number of microseconds between the epoch and the given instant."""
|
17
|
+
if instant.tzinfo is None:
|
18
|
+
warnings.warn("assuming timezone-naive datetime is local time", stacklevel=2)
|
19
|
+
instant = instant.replace(tzinfo=local_tz())
|
20
|
+
return (instant - _THE_EPOCH) // timedelta(microseconds=1)
|
21
|
+
|
22
|
+
|
23
|
+
def from_timestamp_micros(ts: int) -> datetime:
|
24
|
+
"""Convert a timestamp in microseconds to a datetime."""
|
25
|
+
if ts < 0:
|
26
|
+
raise ValueError("Timestamp must be non-negative")
|
27
|
+
return _THE_EPOCH + timedelta(microseconds=ts)
|
spiral/expressions/__init__.py
CHANGED
@@ -1,20 +1,25 @@
|
|
1
1
|
import builtins
|
2
2
|
import functools
|
3
3
|
import operator
|
4
|
+
import warnings
|
4
5
|
from typing import Any
|
5
6
|
|
6
7
|
import pyarrow as pa
|
7
8
|
|
8
|
-
from spiral import _lib,
|
9
|
+
from spiral import _lib, arrow_
|
9
10
|
|
10
11
|
from . import http as http
|
11
12
|
from . import io as io
|
12
13
|
from . import list_ as list
|
14
|
+
from . import mp4 as mp4
|
15
|
+
from . import png as png
|
16
|
+
from . import qoi as qoi
|
13
17
|
from . import refs as refs
|
14
18
|
from . import str_ as str
|
15
19
|
from . import struct as struct
|
20
|
+
from . import text as text
|
16
21
|
from . import tiff as tiff
|
17
|
-
from .base import Expr, ExprLike
|
22
|
+
from .base import Expr, ExprLike, NativeExpr
|
18
23
|
|
19
24
|
__all__ = [
|
20
25
|
"Expr",
|
@@ -42,6 +47,7 @@ __all__ = [
|
|
42
47
|
"not_",
|
43
48
|
"or_",
|
44
49
|
"pack",
|
50
|
+
"keyed",
|
45
51
|
"ref",
|
46
52
|
"refs",
|
47
53
|
"scalar",
|
@@ -50,8 +56,11 @@ __all__ = [
|
|
50
56
|
"struct",
|
51
57
|
"subtract",
|
52
58
|
"tiff",
|
53
|
-
"var",
|
54
59
|
"xor",
|
60
|
+
"png",
|
61
|
+
"qoi",
|
62
|
+
"mp4",
|
63
|
+
"text",
|
55
64
|
]
|
56
65
|
|
57
66
|
# Inline some of the struct expressions since they're so common
|
@@ -67,13 +76,15 @@ def lift(expr: ExprLike) -> Expr:
|
|
67
76
|
# Convert an ExprLike into an Expr.
|
68
77
|
if isinstance(expr, Expr):
|
69
78
|
return expr
|
79
|
+
if isinstance(expr, NativeExpr):
|
80
|
+
return Expr(expr)
|
70
81
|
|
71
82
|
if isinstance(expr, dict):
|
72
83
|
# NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
|
73
84
|
# this is in fact a struct scalar, but the user can always create one of those manually.
|
74
85
|
|
75
86
|
# First we un-nest any dot-separated field names
|
76
|
-
expr: dict =
|
87
|
+
expr: dict = arrow_.nest_structs(expr)
|
77
88
|
|
78
89
|
return pack({k: lift(v) for k, v in expr.items()})
|
79
90
|
|
@@ -88,18 +99,30 @@ def lift(expr: ExprLike) -> Expr:
|
|
88
99
|
|
89
100
|
# If the value is struct-like, we un-nest any dot-separated field names
|
90
101
|
if isinstance(expr, pa.StructArray | pa.StructScalar):
|
91
|
-
|
102
|
+
# TODO(marko): Figure out what to do with nullable struct arrays when unpacking them.
|
103
|
+
# We need to merge struct validity into the child validity?
|
104
|
+
if isinstance(expr, pa.StructArray) and expr.null_count != 0:
|
105
|
+
# raise ValueError("lift: cannot lift a struct array with nulls.")
|
106
|
+
warnings.warn("found a struct array with nulls", stacklevel=2)
|
107
|
+
if isinstance(expr, pa.StructScalar) and not expr.is_valid():
|
108
|
+
# raise ValueError("lift: cannot lift a struct scalar with nulls.")
|
109
|
+
warnings.warn("found a struct scalar with nulls", stacklevel=2)
|
110
|
+
return lift(arrow_.nest_structs(expr))
|
92
111
|
|
93
112
|
if isinstance(expr, pa.Array):
|
94
|
-
return Expr(_lib.
|
113
|
+
return Expr(_lib.expr.array_lit(expr))
|
95
114
|
|
96
115
|
# Otherwise, assume it's a scalar.
|
97
116
|
return scalar(expr)
|
98
117
|
|
99
118
|
|
100
|
-
def
|
101
|
-
"""Create a variable expression.
|
102
|
-
|
119
|
+
def key(name: builtins.str) -> Expr:
|
120
|
+
"""Create a variable expression referencing a key column.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
name: variable name
|
124
|
+
"""
|
125
|
+
return Expr(_lib.expr.keyed(name))
|
103
126
|
|
104
127
|
|
105
128
|
def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
@@ -112,20 +135,21 @@ def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
|
112
135
|
name: variable name
|
113
136
|
dtype: must match dtype of the column in the key table.
|
114
137
|
"""
|
115
|
-
return Expr(_lib.
|
138
|
+
return Expr(_lib.expr.keyed(name, dtype))
|
116
139
|
|
117
140
|
|
118
141
|
def scalar(value: Any) -> Expr:
|
119
142
|
"""Create a scalar expression."""
|
120
143
|
if not isinstance(value, pa.Scalar):
|
121
144
|
value = pa.scalar(value)
|
122
|
-
|
145
|
+
# TODO(marko): Use Vortex scalar instead of passing as array.
|
146
|
+
return Expr(_lib.expr.scalar(pa.array([value.as_py()], type=value.type)))
|
123
147
|
|
124
148
|
|
125
149
|
def cast(expr: ExprLike, dtype: pa.DataType) -> Expr:
|
126
150
|
"""Cast an expression into another PyArrow DataType."""
|
127
151
|
expr = lift(expr)
|
128
|
-
return Expr(_lib.
|
152
|
+
return Expr(_lib.expr.cast(expr.__expr__, dtype))
|
129
153
|
|
130
154
|
|
131
155
|
def and_(expr: ExprLike, *exprs: ExprLike) -> Expr:
|
@@ -182,19 +206,18 @@ def negate(expr: ExprLike) -> Expr:
|
|
182
206
|
def not_(expr: ExprLike) -> Expr:
|
183
207
|
"""Negate the given expression."""
|
184
208
|
expr = lift(expr)
|
185
|
-
return Expr(_lib.
|
209
|
+
return Expr(_lib.expr.not_(expr.__expr__))
|
186
210
|
|
187
211
|
|
188
212
|
def is_null(expr: ExprLike) -> Expr:
|
189
213
|
"""Check if the given expression is null."""
|
190
214
|
expr = lift(expr)
|
191
|
-
return Expr(_lib.
|
215
|
+
return Expr(_lib.expr.is_null(expr.__expr__))
|
192
216
|
|
193
217
|
|
194
218
|
def is_not_null(expr: ExprLike) -> Expr:
|
195
|
-
"""Check if the given expression is null."""
|
196
|
-
|
197
|
-
return Expr(_lib.spql.expr.unary("is_not_null", expr.__expr__))
|
219
|
+
"""Check if the given expression is not null."""
|
220
|
+
return not_(is_null(expr))
|
198
221
|
|
199
222
|
|
200
223
|
def add(lhs: ExprLike, rhs: ExprLike) -> Expr:
|
spiral/expressions/base.py
CHANGED
@@ -5,7 +5,7 @@ import pyarrow as pa
|
|
5
5
|
|
6
6
|
from spiral import _lib
|
7
7
|
|
8
|
-
NativeExpr: TypeAlias = _lib.
|
8
|
+
NativeExpr: TypeAlias = _lib.expr.Expr
|
9
9
|
|
10
10
|
|
11
11
|
class Expr:
|
@@ -92,13 +92,13 @@ class Expr:
|
|
92
92
|
return self._binary("mod", other)
|
93
93
|
|
94
94
|
def __neg__(self):
|
95
|
-
return Expr(_lib.
|
95
|
+
return Expr(_lib.expr.unary("neg", self.__expr__))
|
96
96
|
|
97
97
|
def in_(self, other: "ExprLike") -> "Expr":
|
98
98
|
from spiral import expressions as se
|
99
99
|
|
100
100
|
other = se.lift(other)
|
101
|
-
return Expr(_lib.
|
101
|
+
return Expr(_lib.expr.list.contains(other.__expr__, self.__expr__))
|
102
102
|
|
103
103
|
def contains(self, other: "ExprLike") -> "Expr":
|
104
104
|
from spiral import expressions as se
|
@@ -107,7 +107,7 @@ class Expr:
|
|
107
107
|
|
108
108
|
def cast(self, dtype: pa.DataType) -> "Expr":
|
109
109
|
"""Cast the expression result to a different data type."""
|
110
|
-
return Expr(_lib.
|
110
|
+
return Expr(_lib.expr.cast(self.__expr__, dtype))
|
111
111
|
|
112
112
|
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
113
113
|
"""Select fields from a struct-like expression.
|
@@ -142,7 +142,7 @@ class Expr:
|
|
142
142
|
from spiral import expressions as se
|
143
143
|
|
144
144
|
rhs = se.lift(rhs)
|
145
|
-
return Expr(_lib.
|
145
|
+
return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
146
|
|
147
147
|
|
148
148
|
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
spiral/expressions/list_.py
CHANGED
@@ -29,7 +29,7 @@ def element_at(expr: ExprLike, index: ExprLike) -> Expr:
|
|
29
29
|
|
30
30
|
expr = lift(expr)
|
31
31
|
index = lift(index)
|
32
|
-
return Expr(_lib.
|
32
|
+
return Expr(_lib.expr.list.element_at(expr.__expr__, index.__expr__))
|
33
33
|
|
34
34
|
|
35
35
|
def of(*expr: ExprLike) -> Expr:
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
|
3
|
+
from spiral.expressions.base import Expr, ExprLike
|
4
|
+
|
5
|
+
_MP4_RES_DTYPE: pa.DataType = pa.struct(
|
6
|
+
[
|
7
|
+
pa.field("pixels", pa.large_binary()),
|
8
|
+
pa.field("height", pa.uint32()),
|
9
|
+
pa.field("width", pa.uint32()),
|
10
|
+
pa.field("frames", pa.uint32()),
|
11
|
+
]
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
# TODO(marko): Support optional range and crop.
|
16
|
+
# IMPORTANT: Frames is currently broken and defaults to full.
|
17
|
+
def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
|
18
|
+
"""
|
19
|
+
Read referenced cell in a `MP4` format. Requires `ffmpeg`.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
expr: The referenced `Mp4` bytes.
|
23
|
+
A str is assumed to be the `se.keyed` expression.
|
24
|
+
frames: The range of frames to read. Each element must be a list of two uint32,
|
25
|
+
frame start and frame end, or null / empty list to read all frames.
|
26
|
+
A str is assumed to be the `se.keyed` expression.
|
27
|
+
crop: The crop of the frames to read. Each element must be a list of four uint32,
|
28
|
+
x, y, width, height or null / empty list to read full frames.
|
29
|
+
A str is assumed to be the `se.keyed` expression.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
An array where each element is a decoded cropped video with fields:
|
33
|
+
pixels: RGB8 bytes, frames * width * height * 3.
|
34
|
+
width: Width of the image with type `pa.uint32()`.
|
35
|
+
height: Height of the image with type `pa.uint32()`.
|
36
|
+
frames: Number of frames with type `pa.uint32()`.
|
37
|
+
"""
|
38
|
+
from spiral import _lib
|
39
|
+
from spiral.expressions import keyed, lift
|
40
|
+
|
41
|
+
if isinstance(expr, str):
|
42
|
+
expr = keyed(
|
43
|
+
expr,
|
44
|
+
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
45
|
+
)
|
46
|
+
if isinstance(frames, str):
|
47
|
+
frames = keyed(frames, pa.list_(pa.uint32()))
|
48
|
+
if isinstance(crop, str):
|
49
|
+
crop = keyed(crop, pa.list_(pa.uint32()))
|
50
|
+
|
51
|
+
expr = lift(expr)
|
52
|
+
frames = lift(frames)
|
53
|
+
crop = lift(crop)
|
54
|
+
|
55
|
+
return Expr(
|
56
|
+
_lib.expr.video.read(
|
57
|
+
expr.__expr__,
|
58
|
+
frames.__expr__,
|
59
|
+
crop.__expr__,
|
60
|
+
format="mp4",
|
61
|
+
)
|
62
|
+
)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def encode(expr: ExprLike) -> Expr:
|
5
|
+
"""Encode the given expression as a PNG image.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The expression to encode.
|
9
|
+
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
The encoded PNG images.
|
13
|
+
"""
|
14
|
+
from spiral import _lib
|
15
|
+
from spiral.expressions import lift
|
16
|
+
|
17
|
+
expr = lift(expr)
|
18
|
+
return Expr(_lib.expr.img.encode(expr.__expr__, format="png"))
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def encode(expr: ExprLike) -> Expr:
|
5
|
+
"""Encode the given expression as a QOI image.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The expression to encode.
|
9
|
+
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
The encoded QOI images.
|
13
|
+
"""
|
14
|
+
from spiral import _lib
|
15
|
+
from spiral.expressions import lift
|
16
|
+
|
17
|
+
expr = lift(expr)
|
18
|
+
return Expr(_lib.expr.img.encode(expr.__expr__, format="qoi"))
|
spiral/expressions/refs.py
CHANGED
@@ -1,10 +1,7 @@
|
|
1
|
-
|
1
|
+
import pyarrow as pa
|
2
2
|
|
3
3
|
from spiral.expressions.base import Expr, ExprLike
|
4
4
|
|
5
|
-
if TYPE_CHECKING:
|
6
|
-
from spiral import Table
|
7
|
-
|
8
5
|
|
9
6
|
def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
10
7
|
"""Store binary values as references. This expression can only be used on write.
|
@@ -22,23 +19,40 @@ def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
|
22
19
|
from spiral.expressions import lift
|
23
20
|
|
24
21
|
expr = lift(expr)
|
25
|
-
return Expr(_lib.
|
22
|
+
return Expr(_lib.expr.refs.ref(expr.__expr__, field))
|
26
23
|
|
27
24
|
|
28
|
-
def deref(expr: ExprLike, field: str | None = None
|
25
|
+
def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
|
29
26
|
"""De-reference referenced values.
|
30
27
|
|
31
28
|
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
32
29
|
column back into their original form, e.g. binary.
|
33
30
|
|
34
31
|
Args:
|
35
|
-
expr: The expression to de-reference.
|
32
|
+
expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
|
36
33
|
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
37
34
|
If `None`, the expr must evaluate into a reference type.
|
38
|
-
table (optional): The table to de-reference from, if not available in input expression.
|
39
35
|
"""
|
40
36
|
from spiral import _lib
|
37
|
+
from spiral.expressions import keyed, lift
|
38
|
+
|
39
|
+
if isinstance(expr, str):
|
40
|
+
expr = keyed(
|
41
|
+
expr,
|
42
|
+
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
43
|
+
)
|
44
|
+
|
45
|
+
expr = lift(expr)
|
46
|
+
return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
|
47
|
+
|
48
|
+
|
49
|
+
def nbytes(expr: ExprLike) -> Expr:
|
50
|
+
"""Return the number of bytes in a reference.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
expr: The ref expression to get the number of bytes from.
|
54
|
+
"""
|
41
55
|
from spiral.expressions import lift
|
42
56
|
|
43
57
|
expr = lift(expr)
|
44
|
-
return
|
58
|
+
return expr["__ref__"]["end"] - expr["__ref__"]["begin"]
|
spiral/expressions/struct.py
CHANGED
@@ -12,10 +12,10 @@ def getitem(expr: ExprLike, field: str) -> Expr:
|
|
12
12
|
from spiral import expressions as se
|
13
13
|
|
14
14
|
expr = se.lift(expr)
|
15
|
-
return Expr(_lib.
|
15
|
+
return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
|
16
16
|
|
17
17
|
|
18
|
-
def pack(fields: dict[str, ExprLike]) -> Expr:
|
18
|
+
def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
|
19
19
|
"""Assemble a new struct from the given named fields.
|
20
20
|
|
21
21
|
Args:
|
@@ -23,7 +23,9 @@ def pack(fields: dict[str, ExprLike]) -> Expr:
|
|
23
23
|
"""
|
24
24
|
from spiral import expressions as se
|
25
25
|
|
26
|
-
return Expr(
|
26
|
+
return Expr(
|
27
|
+
_lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
|
28
|
+
)
|
27
29
|
|
28
30
|
|
29
31
|
def merge(*structs: "ExprLike") -> Expr:
|
@@ -40,7 +42,7 @@ def merge(*structs: "ExprLike") -> Expr:
|
|
40
42
|
|
41
43
|
if len(structs) == 1:
|
42
44
|
return se.lift(structs[0])
|
43
|
-
return Expr(_lib.
|
45
|
+
return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
|
44
46
|
|
45
47
|
|
46
48
|
def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
|
@@ -54,4 +56,4 @@ def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -
|
|
54
56
|
from spiral import expressions as se
|
55
57
|
|
56
58
|
expr = se.lift(expr)
|
57
|
-
return Expr(_lib.
|
59
|
+
return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))
|