pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.3.1__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/METADATA +8 -6
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/RECORD +29 -25
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/WHEEL +1 -1
- spiral/__init__.py +3 -1
- spiral/_lib.abi3.so +0 -0
- spiral/api/__init__.py +8 -4
- spiral/api/filesystems.py +1 -1
- spiral/api/tables.py +3 -6
- spiral/catalog.py +15 -0
- spiral/cli/fs.py +2 -2
- spiral/cli/project.py +5 -3
- spiral/core/core/__init__.pyi +34 -6
- spiral/core/spec/__init__.pyi +8 -26
- spiral/dataset.py +221 -20
- spiral/expressions/__init__.py +19 -4
- spiral/expressions/mp4.py +69 -0
- spiral/expressions/png.py +18 -0
- spiral/expressions/qoi.py +18 -0
- spiral/expressions/refs.py +23 -3
- spiral/expressions/tiff.py +88 -88
- spiral/maintenance.py +12 -0
- spiral/proto/_/scandal/__init__.py +78 -11
- spiral/proto/_/spiral/table/__init__.py +53 -2
- spiral/scan_.py +75 -24
- spiral/settings.py +6 -0
- spiral/substrait_.py +1 -1
- spiral/table.py +35 -21
- spiral/txn.py +48 -0
- spiral/config.py +0 -26
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/entry_points.txt +0 -0
spiral/dataset.py
CHANGED
@@ -1,22 +1,23 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Any
|
2
2
|
|
3
3
|
import pyarrow as pa
|
4
4
|
import pyarrow.compute as pc
|
5
|
-
|
6
|
-
if TYPE_CHECKING:
|
7
|
-
import pyarrow.dataset
|
5
|
+
import pyarrow.dataset as ds
|
8
6
|
|
9
7
|
from spiral import Scan, Table
|
10
8
|
|
11
9
|
|
12
|
-
class TableDataset(
|
10
|
+
class TableDataset(ds.Dataset):
|
13
11
|
def __init__(self, table: Table):
|
14
12
|
self._table = table
|
15
|
-
|
13
|
+
# Once table is converted to a dataset, used pinned snapshot.
|
14
|
+
self._asof = table.last_modified_at
|
15
|
+
self._schema: pa.Schema = table._table.get_schema(asof=self._asof).to_arrow()
|
16
16
|
|
17
17
|
# We don't actually initialize a Dataset, we just implement enough of the API
|
18
18
|
# to fool both DuckDB and Polars.
|
19
19
|
# super().__init__()
|
20
|
+
self._last_scan = None
|
20
21
|
|
21
22
|
@property
|
22
23
|
def schema(self) -> pa.Schema:
|
@@ -28,7 +29,7 @@ class TableDataset(pa.dataset.Dataset):
|
|
28
29
|
batch_size: int | None = None,
|
29
30
|
batch_readahead: int | None = None,
|
30
31
|
fragment_readahead: int | None = None,
|
31
|
-
fragment_scan_options:
|
32
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
32
33
|
use_threads: bool = True,
|
33
34
|
memory_pool: pa.MemoryPool = None,
|
34
35
|
):
|
@@ -58,11 +59,11 @@ class TableDataset(pa.dataset.Dataset):
|
|
58
59
|
batch_size: int | None = None,
|
59
60
|
batch_readahead: int | None = None,
|
60
61
|
fragment_readahead: int | None = None,
|
61
|
-
fragment_scan_options:
|
62
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
62
63
|
use_threads: bool = True,
|
63
64
|
memory_pool: pa.MemoryPool = None,
|
64
65
|
):
|
65
|
-
self.scanner(
|
66
|
+
return self.scanner(
|
66
67
|
columns,
|
67
68
|
filter,
|
68
69
|
batch_size,
|
@@ -99,7 +100,7 @@ class TableDataset(pa.dataset.Dataset):
|
|
99
100
|
batch_size: int | None = None,
|
100
101
|
batch_readahead: int | None = None,
|
101
102
|
fragment_readahead: int | None = None,
|
102
|
-
fragment_scan_options:
|
103
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
103
104
|
use_threads: bool = True,
|
104
105
|
memory_pool: pa.MemoryPool = None,
|
105
106
|
) -> "TableScanner":
|
@@ -111,11 +112,18 @@ class TableDataset(pa.dataset.Dataset):
|
|
111
112
|
filter.to_substrait(self._schema, allow_arrow_extensions=True),
|
112
113
|
)
|
113
114
|
|
114
|
-
scan =
|
115
|
-
|
116
|
-
|
117
|
-
|
115
|
+
scan = (
|
116
|
+
self._table.scan(
|
117
|
+
{c: self._table[c] for c in columns},
|
118
|
+
where=filter,
|
119
|
+
exclude_keys=True,
|
120
|
+
asof=self._asof,
|
121
|
+
)
|
122
|
+
if columns
|
123
|
+
else self._table.scan(where=filter, asof=self._asof)
|
118
124
|
)
|
125
|
+
self._last_scan = scan
|
126
|
+
|
119
127
|
return TableScanner(scan)
|
120
128
|
|
121
129
|
def sort_by(self, sorting, **kwargs):
|
@@ -129,7 +137,7 @@ class TableDataset(pa.dataset.Dataset):
|
|
129
137
|
batch_size: int | None = None,
|
130
138
|
batch_readahead: int | None = None,
|
131
139
|
fragment_readahead: int | None = None,
|
132
|
-
fragment_scan_options:
|
140
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
133
141
|
use_threads: bool = True,
|
134
142
|
memory_pool: pa.MemoryPool = None,
|
135
143
|
):
|
@@ -151,7 +159,7 @@ class TableDataset(pa.dataset.Dataset):
|
|
151
159
|
batch_size: int | None = None,
|
152
160
|
batch_readahead: int | None = None,
|
153
161
|
fragment_readahead: int | None = None,
|
154
|
-
fragment_scan_options:
|
162
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
155
163
|
use_threads: bool = True,
|
156
164
|
memory_pool: pa.MemoryPool = None,
|
157
165
|
):
|
@@ -173,7 +181,7 @@ class TableDataset(pa.dataset.Dataset):
|
|
173
181
|
batch_size: int | None = None,
|
174
182
|
batch_readahead: int | None = None,
|
175
183
|
fragment_readahead: int | None = None,
|
176
|
-
fragment_scan_options:
|
184
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
177
185
|
use_threads: bool = True,
|
178
186
|
memory_pool: pa.MemoryPool = None,
|
179
187
|
):
|
@@ -189,12 +197,17 @@ class TableDataset(pa.dataset.Dataset):
|
|
189
197
|
).to_table()
|
190
198
|
|
191
199
|
|
192
|
-
class TableScanner(
|
200
|
+
class TableScanner(ds.Scanner):
|
193
201
|
"""A PyArrow Dataset Scanner that reads from a Spiral Table."""
|
194
202
|
|
195
|
-
def __init__(
|
203
|
+
def __init__(
|
204
|
+
self,
|
205
|
+
scan: Scan,
|
206
|
+
key_table: pa.Table | pa.RecordBatchReader | None = None,
|
207
|
+
):
|
196
208
|
self._scan = scan
|
197
209
|
self._schema = scan.schema
|
210
|
+
self.key_table = key_table
|
198
211
|
|
199
212
|
# We don't actually initialize a Dataset, we just implement enough of the API
|
200
213
|
# to fool both DuckDB and Polars.
|
@@ -233,7 +246,195 @@ class TableScanner(pa.dataset.Scanner):
|
|
233
246
|
return self.to_reader()
|
234
247
|
|
235
248
|
def to_reader(self):
|
236
|
-
return self._scan.to_record_batches()
|
249
|
+
return self._scan.to_record_batches(key_table=self.key_table)
|
237
250
|
|
238
251
|
def to_table(self):
|
239
252
|
return self.to_reader().read_all()
|
253
|
+
|
254
|
+
|
255
|
+
class ScanDataset(ds.Dataset):
|
256
|
+
def __init__(
|
257
|
+
self,
|
258
|
+
scan: Scan,
|
259
|
+
key_table: pa.Table | pa.RecordBatchReader | None = None,
|
260
|
+
):
|
261
|
+
self._scan = scan
|
262
|
+
self._schema: pa.Schema = scan.schema.to_arrow()
|
263
|
+
self._key_table = key_table
|
264
|
+
|
265
|
+
# We don't actually initialize a Dataset, we just implement enough of the API
|
266
|
+
# to fool both DuckDB and Polars.
|
267
|
+
# super().__init__()
|
268
|
+
|
269
|
+
@property
|
270
|
+
def schema(self) -> pa.Schema:
|
271
|
+
return self._schema
|
272
|
+
|
273
|
+
def count_rows(
|
274
|
+
self,
|
275
|
+
filter: pc.Expression | None = None,
|
276
|
+
batch_size: int | None = None,
|
277
|
+
batch_readahead: int | None = None,
|
278
|
+
fragment_readahead: int | None = None,
|
279
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
280
|
+
use_threads: bool = True,
|
281
|
+
memory_pool: pa.MemoryPool = None,
|
282
|
+
):
|
283
|
+
return self.scanner(
|
284
|
+
None,
|
285
|
+
filter,
|
286
|
+
batch_size,
|
287
|
+
batch_readahead,
|
288
|
+
fragment_readahead,
|
289
|
+
fragment_scan_options,
|
290
|
+
use_threads,
|
291
|
+
memory_pool,
|
292
|
+
).count_rows()
|
293
|
+
|
294
|
+
def filter(self, expression: pc.Expression) -> "TableDataset":
|
295
|
+
raise NotImplementedError("filter not implemented")
|
296
|
+
|
297
|
+
def get_fragments(self, filter: pc.Expression | None = None):
|
298
|
+
"""TODO(ngates): perhaps we should return ranges as per our split API?"""
|
299
|
+
raise NotImplementedError("get_fragments not implemented")
|
300
|
+
|
301
|
+
def head(
|
302
|
+
self,
|
303
|
+
num_rows: int,
|
304
|
+
columns: list[str] | None = None,
|
305
|
+
filter: pc.Expression | None = None,
|
306
|
+
batch_size: int | None = None,
|
307
|
+
batch_readahead: int | None = None,
|
308
|
+
fragment_readahead: int | None = None,
|
309
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
310
|
+
use_threads: bool = True,
|
311
|
+
memory_pool: pa.MemoryPool = None,
|
312
|
+
):
|
313
|
+
return self.scanner(
|
314
|
+
columns,
|
315
|
+
filter,
|
316
|
+
batch_size,
|
317
|
+
batch_readahead,
|
318
|
+
fragment_readahead,
|
319
|
+
fragment_scan_options,
|
320
|
+
use_threads,
|
321
|
+
memory_pool,
|
322
|
+
).head(num_rows)
|
323
|
+
|
324
|
+
def join(
|
325
|
+
self,
|
326
|
+
right_dataset,
|
327
|
+
keys,
|
328
|
+
right_keys=None,
|
329
|
+
join_type=None,
|
330
|
+
left_suffix=None,
|
331
|
+
right_suffix=None,
|
332
|
+
coalesce_keys=True,
|
333
|
+
use_threads=True,
|
334
|
+
):
|
335
|
+
raise NotImplementedError("join not implemented")
|
336
|
+
|
337
|
+
def join_asof(self, right_dataset, on, by, tolerance, right_on=None, right_by=None):
|
338
|
+
raise NotImplementedError("join_asof not implemented")
|
339
|
+
|
340
|
+
def replace_schema(self, schema: pa.Schema) -> "TableDataset":
|
341
|
+
raise NotImplementedError("replace_schema not implemented")
|
342
|
+
|
343
|
+
def scanner(
|
344
|
+
self,
|
345
|
+
columns: list[str] | None = None,
|
346
|
+
filter: pc.Expression | None = None,
|
347
|
+
batch_size: int | None = None,
|
348
|
+
batch_readahead: int | None = None,
|
349
|
+
fragment_readahead: int | None = None,
|
350
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
351
|
+
use_threads: bool = True,
|
352
|
+
memory_pool: pa.MemoryPool = None,
|
353
|
+
) -> "TableScanner":
|
354
|
+
if columns is not None:
|
355
|
+
columns = set(columns)
|
356
|
+
names = set(self.schema.names)
|
357
|
+
if len(columns - names) != 0 or len(names - columns) != 0:
|
358
|
+
raise NotImplementedError("columns", columns, self.schema)
|
359
|
+
if filter is not None:
|
360
|
+
raise NotImplementedError("filter")
|
361
|
+
if batch_size is not None:
|
362
|
+
raise NotImplementedError("batch_size")
|
363
|
+
if batch_readahead is not None:
|
364
|
+
raise NotImplementedError("batch_readahead")
|
365
|
+
if fragment_readahead is not None:
|
366
|
+
raise NotImplementedError("fragment_readahead")
|
367
|
+
if fragment_scan_options is not None:
|
368
|
+
raise NotImplementedError("fragment_scan_options")
|
369
|
+
|
370
|
+
return TableScanner(self._scan, key_table=self._key_table)
|
371
|
+
|
372
|
+
def sort_by(self, sorting, **kwargs):
|
373
|
+
raise NotImplementedError("sort_by not implemented")
|
374
|
+
|
375
|
+
def take(
|
376
|
+
self,
|
377
|
+
indices: pa.Array | Any,
|
378
|
+
columns: list[str] | None = None,
|
379
|
+
filter: pc.Expression | None = None,
|
380
|
+
batch_size: int | None = None,
|
381
|
+
batch_readahead: int | None = None,
|
382
|
+
fragment_readahead: int | None = None,
|
383
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
384
|
+
use_threads: bool = True,
|
385
|
+
memory_pool: pa.MemoryPool = None,
|
386
|
+
):
|
387
|
+
return self.scanner(
|
388
|
+
columns,
|
389
|
+
filter,
|
390
|
+
batch_size,
|
391
|
+
batch_readahead,
|
392
|
+
fragment_readahead,
|
393
|
+
fragment_scan_options,
|
394
|
+
use_threads,
|
395
|
+
memory_pool,
|
396
|
+
).take(indices)
|
397
|
+
|
398
|
+
def to_batches(
|
399
|
+
self,
|
400
|
+
columns: list[str] | None = None,
|
401
|
+
filter: pc.Expression | None = None,
|
402
|
+
batch_size: int | None = None,
|
403
|
+
batch_readahead: int | None = None,
|
404
|
+
fragment_readahead: int | None = None,
|
405
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
406
|
+
use_threads: bool = True,
|
407
|
+
memory_pool: pa.MemoryPool = None,
|
408
|
+
):
|
409
|
+
return self.scanner(
|
410
|
+
columns,
|
411
|
+
filter,
|
412
|
+
batch_size,
|
413
|
+
batch_readahead,
|
414
|
+
fragment_readahead,
|
415
|
+
fragment_scan_options,
|
416
|
+
use_threads,
|
417
|
+
memory_pool,
|
418
|
+
).to_batches()
|
419
|
+
|
420
|
+
def to_table(
|
421
|
+
self,
|
422
|
+
columns=None,
|
423
|
+
filter: pc.Expression | None = None,
|
424
|
+
batch_size: int | None = None,
|
425
|
+
batch_readahead: int | None = None,
|
426
|
+
fragment_readahead: int | None = None,
|
427
|
+
fragment_scan_options: ds.FragmentScanOptions | None = None,
|
428
|
+
use_threads: bool = True,
|
429
|
+
memory_pool: pa.MemoryPool = None,
|
430
|
+
):
|
431
|
+
return self.scanner(
|
432
|
+
columns,
|
433
|
+
filter,
|
434
|
+
batch_size,
|
435
|
+
batch_readahead,
|
436
|
+
fragment_readahead,
|
437
|
+
fragment_scan_options,
|
438
|
+
use_threads,
|
439
|
+
memory_pool,
|
440
|
+
).to_table()
|
spiral/expressions/__init__.py
CHANGED
@@ -10,6 +10,9 @@ from spiral import _lib, arrow
|
|
10
10
|
from . import http as http
|
11
11
|
from . import io as io
|
12
12
|
from . import list_ as list
|
13
|
+
from . import mp4 as mp4
|
14
|
+
from . import png as png
|
15
|
+
from . import qoi as qoi
|
13
16
|
from . import refs as refs
|
14
17
|
from . import str_ as str
|
15
18
|
from . import struct as struct
|
@@ -42,6 +45,7 @@ __all__ = [
|
|
42
45
|
"not_",
|
43
46
|
"or_",
|
44
47
|
"pack",
|
48
|
+
"keyed",
|
45
49
|
"ref",
|
46
50
|
"refs",
|
47
51
|
"scalar",
|
@@ -52,6 +56,9 @@ __all__ = [
|
|
52
56
|
"tiff",
|
53
57
|
"var",
|
54
58
|
"xor",
|
59
|
+
"png",
|
60
|
+
"qoi",
|
61
|
+
"mp4",
|
55
62
|
]
|
56
63
|
|
57
64
|
# Inline some of the struct expressions since they're so common
|
@@ -88,6 +95,10 @@ def lift(expr: ExprLike) -> Expr:
|
|
88
95
|
|
89
96
|
# If the value is struct-like, we un-nest any dot-separated field names
|
90
97
|
if isinstance(expr, pa.StructArray | pa.StructScalar):
|
98
|
+
if isinstance(expr, pa.StructArray) and expr.null_count != 0:
|
99
|
+
raise ValueError("lift: cannot lift a struct array with nulls.")
|
100
|
+
if isinstance(expr, pa.StructArray) and not expr.is_valid():
|
101
|
+
raise ValueError("lift: cannot lift a struct scalar with nulls.")
|
91
102
|
return lift(arrow.nest_structs(expr))
|
92
103
|
|
93
104
|
if isinstance(expr, pa.Array):
|
@@ -97,9 +108,13 @@ def lift(expr: ExprLike) -> Expr:
|
|
97
108
|
return scalar(expr)
|
98
109
|
|
99
110
|
|
100
|
-
def
|
101
|
-
"""Create a variable expression.
|
102
|
-
|
111
|
+
def key(name: builtins.str) -> Expr:
|
112
|
+
"""Create a variable expression referencing a key column.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
name: variable name
|
116
|
+
"""
|
117
|
+
return Expr(_lib.spql.expr.keyed(name))
|
103
118
|
|
104
119
|
|
105
120
|
def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
@@ -112,7 +127,7 @@ def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
|
|
112
127
|
name: variable name
|
113
128
|
dtype: must match dtype of the column in the key table.
|
114
129
|
"""
|
115
|
-
return Expr(_lib.spql.expr.keyed(
|
130
|
+
return Expr(_lib.spql.expr.keyed(name, dtype))
|
116
131
|
|
117
132
|
|
118
133
|
def scalar(value: Any) -> Expr:
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
from spiral.expressions.base import Expr, ExprLike
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from spiral import Table
|
9
|
+
|
10
|
+
_MP4_RES_DTYPE: pa.DataType = pa.struct(
|
11
|
+
[
|
12
|
+
pa.field("pixels", pa.large_binary()),
|
13
|
+
pa.field("height", pa.uint32()),
|
14
|
+
pa.field("width", pa.uint32()),
|
15
|
+
pa.field("frames", pa.uint32()),
|
16
|
+
]
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
# TODO(marko): Support optional range and crop.
|
21
|
+
# IMPORTANT: Frames is currently broken and defaults to full.
|
22
|
+
def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *, table: "Table" = None):
|
23
|
+
"""
|
24
|
+
Read referenced cell in a `MP4` format. Requires `ffmpeg`.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
expr: The referenced `Mp4` bytes.
|
28
|
+
A str is assumed to be the `se.keyed` expression.
|
29
|
+
frames: The range of frames to read. Each element must be a list of two uint32,
|
30
|
+
frame start and frame end, or null / empty list to read all frames.
|
31
|
+
A str is assumed to be the `se.keyed` expression.
|
32
|
+
crop: The crop of the frames to read. Each element must be a list of four uint32,
|
33
|
+
x, y, width, height or null / empty list to read full frames.
|
34
|
+
A str is assumed to be the `se.keyed` expression.
|
35
|
+
table (optional): The table to de-reference from, if not available in input expression.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
An array where each element is a decoded cropped video with fields:
|
39
|
+
pixels: RGB8 bytes, frames * width * height * 3.
|
40
|
+
width: Width of the image with type `pa.uint32()`.
|
41
|
+
height: Height of the image with type `pa.uint32()`.
|
42
|
+
frames: Number of frames with type `pa.uint32()`.
|
43
|
+
"""
|
44
|
+
from spiral import _lib
|
45
|
+
from spiral.expressions import keyed, lift
|
46
|
+
|
47
|
+
if isinstance(expr, str):
|
48
|
+
expr = keyed(
|
49
|
+
expr,
|
50
|
+
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
51
|
+
)
|
52
|
+
if isinstance(frames, str):
|
53
|
+
frames = keyed(frames, pa.list_(pa.uint32()))
|
54
|
+
if isinstance(crop, str):
|
55
|
+
crop = keyed(crop, pa.list_(pa.uint32()))
|
56
|
+
|
57
|
+
expr = lift(expr)
|
58
|
+
frames = lift(frames)
|
59
|
+
crop = lift(crop)
|
60
|
+
|
61
|
+
return Expr(
|
62
|
+
_lib.spql.expr.video.read(
|
63
|
+
expr.__expr__,
|
64
|
+
frames.__expr__,
|
65
|
+
crop.__expr__,
|
66
|
+
format="mp4",
|
67
|
+
table=table._table if table is not None else None,
|
68
|
+
)
|
69
|
+
)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def encode(expr: ExprLike) -> Expr:
|
5
|
+
"""Encode the given expression as a PNG image.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The expression to encode.
|
9
|
+
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
The encoded PNG images.
|
13
|
+
"""
|
14
|
+
from spiral import _lib
|
15
|
+
from spiral.expressions import lift
|
16
|
+
|
17
|
+
expr = lift(expr)
|
18
|
+
return Expr(_lib.spql.expr.img.encode(expr.__expr__, format="png"))
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from spiral.expressions.base import Expr, ExprLike
|
2
|
+
|
3
|
+
|
4
|
+
def encode(expr: ExprLike) -> Expr:
|
5
|
+
"""Encode the given expression as a QOI image.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
expr: The expression to encode.
|
9
|
+
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
The encoded QOI images.
|
13
|
+
"""
|
14
|
+
from spiral import _lib
|
15
|
+
from spiral.expressions import lift
|
16
|
+
|
17
|
+
expr = lift(expr)
|
18
|
+
return Expr(_lib.spql.expr.img.encode(expr.__expr__, format="qoi"))
|
spiral/expressions/refs.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import TYPE_CHECKING
|
2
2
|
|
3
|
+
import pyarrow as pa
|
4
|
+
|
3
5
|
from spiral.expressions.base import Expr, ExprLike
|
4
6
|
|
5
7
|
if TYPE_CHECKING:
|
@@ -25,20 +27,38 @@ def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
|
25
27
|
return Expr(_lib.spql.expr.ref(expr.__expr__, field))
|
26
28
|
|
27
29
|
|
28
|
-
def deref(expr: ExprLike, field: str | None = None, table: "Table" = None) -> Expr:
|
30
|
+
def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = None) -> Expr:
|
29
31
|
"""De-reference referenced values.
|
30
32
|
|
31
33
|
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
32
34
|
column back into their original form, e.g. binary.
|
33
35
|
|
34
36
|
Args:
|
35
|
-
expr: The expression to de-reference.
|
37
|
+
expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
|
36
38
|
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
37
39
|
If `None`, the expr must evaluate into a reference type.
|
38
40
|
table (optional): The table to de-reference from, if not available in input expression.
|
39
41
|
"""
|
40
42
|
from spiral import _lib
|
43
|
+
from spiral.expressions import keyed, lift
|
44
|
+
|
45
|
+
if isinstance(expr, str):
|
46
|
+
expr = keyed(
|
47
|
+
expr,
|
48
|
+
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
49
|
+
)
|
50
|
+
|
51
|
+
expr = lift(expr)
|
52
|
+
return Expr(_lib.spql.expr.deref(expr.__expr__, field=field, table=table._table if table is not None else None))
|
53
|
+
|
54
|
+
|
55
|
+
def nbytes(expr: ExprLike) -> Expr:
|
56
|
+
"""Return the number of bytes in a reference.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
expr: The ref expression to get the number of bytes from.
|
60
|
+
"""
|
41
61
|
from spiral.expressions import lift
|
42
62
|
|
43
63
|
expr = lift(expr)
|
44
|
-
return
|
64
|
+
return expr["__ref__"]["end"] - expr["__ref__"]["begin"]
|