pyspiral 0.6.9__cp312-abi3-macosx_11_0_arm64.whl → 0.7.12__cp312-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/METADATA +9 -8
- {pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/RECORD +53 -45
- {pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/entry_points.txt +1 -0
- spiral/__init__.py +20 -0
- spiral/_lib.abi3.so +0 -0
- spiral/api/__init__.py +1 -1
- spiral/api/client.py +1 -1
- spiral/api/types.py +1 -0
- spiral/cli/admin.py +2 -2
- spiral/cli/app.py +8 -4
- spiral/cli/fs.py +4 -4
- spiral/cli/iceberg.py +1 -1
- spiral/cli/key_spaces.py +15 -1
- spiral/cli/login.py +4 -3
- spiral/cli/orgs.py +8 -7
- spiral/cli/projects.py +4 -4
- spiral/cli/state.py +5 -3
- spiral/cli/tables.py +59 -36
- spiral/cli/telemetry.py +1 -1
- spiral/cli/types.py +2 -2
- spiral/cli/workloads.py +3 -3
- spiral/client.py +69 -22
- spiral/core/client/__init__.pyi +48 -13
- spiral/core/config/__init__.pyi +47 -0
- spiral/core/expr/__init__.pyi +15 -0
- spiral/core/expr/images/__init__.pyi +3 -0
- spiral/core/expr/list_/__init__.pyi +4 -0
- spiral/core/expr/refs/__init__.pyi +4 -0
- spiral/core/expr/str_/__init__.pyi +3 -0
- spiral/core/expr/struct_/__init__.pyi +6 -0
- spiral/core/expr/text/__init__.pyi +5 -0
- spiral/core/expr/udf/__init__.pyi +14 -0
- spiral/core/expr/video/__init__.pyi +3 -0
- spiral/core/table/__init__.pyi +37 -2
- spiral/core/table/spec/__init__.pyi +6 -4
- spiral/dataloader.py +52 -38
- spiral/dataset.py +10 -1
- spiral/enrichment.py +304 -0
- spiral/expressions/__init__.py +21 -23
- spiral/expressions/base.py +9 -4
- spiral/expressions/file.py +17 -0
- spiral/expressions/http.py +11 -80
- spiral/expressions/s3.py +16 -0
- spiral/expressions/tiff.py +2 -3
- spiral/expressions/udf.py +38 -24
- spiral/iceberg.py +3 -3
- spiral/project.py +34 -6
- spiral/scan.py +80 -33
- spiral/settings.py +19 -97
- spiral/streaming_/stream.py +1 -1
- spiral/table.py +40 -10
- spiral/transaction.py +99 -2
- spiral/expressions/io.py +0 -100
- spiral/expressions/mp4.py +0 -62
- spiral/expressions/png.py +0 -18
- spiral/expressions/qoi.py +0 -18
- spiral/expressions/refs.py +0 -58
- {pyspiral-0.6.9.dist-info → pyspiral-0.7.12.dist-info}/WHEEL +0 -0
spiral/table.py
CHANGED
|
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
3
3
|
|
|
4
4
|
from spiral.core.table import Table as CoreTable
|
|
5
5
|
from spiral.core.table.spec import Schema
|
|
6
|
+
from spiral.enrichment import Enrichment
|
|
6
7
|
from spiral.expressions.base import Expr, ExprLike
|
|
7
8
|
from spiral.settings import settings
|
|
8
9
|
from spiral.snapshot import Snapshot
|
|
@@ -12,12 +13,11 @@ if TYPE_CHECKING:
|
|
|
12
13
|
import duckdb
|
|
13
14
|
import polars as pl
|
|
14
15
|
import pyarrow.dataset as ds
|
|
15
|
-
import streaming
|
|
16
|
-
import torch.utils.data as torchdata # noqa
|
|
17
16
|
|
|
18
17
|
from spiral.client import Spiral
|
|
19
18
|
from spiral.dataloader import SpiralDataLoader
|
|
20
19
|
from spiral.key_space_index import KeySpaceIndex
|
|
20
|
+
from spiral.streaming_ import SpiralStream
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Table(Expr):
|
|
@@ -50,6 +50,14 @@ class Table(Expr):
|
|
|
50
50
|
"""Returns the fully qualified identifier of the table."""
|
|
51
51
|
return self._identifier or self.table_id
|
|
52
52
|
|
|
53
|
+
@property
|
|
54
|
+
def project(self) -> str | None:
|
|
55
|
+
"""Returns the project of the table."""
|
|
56
|
+
if self._identifier is None:
|
|
57
|
+
return None
|
|
58
|
+
project, _, _ = self._identifier.split(".")
|
|
59
|
+
return project
|
|
60
|
+
|
|
53
61
|
@property
|
|
54
62
|
def dataset(self) -> str | None:
|
|
55
63
|
"""Returns the dataset of the table."""
|
|
@@ -75,7 +83,7 @@ class Table(Expr):
|
|
|
75
83
|
def __repr__(self):
|
|
76
84
|
return f'Table("{self.identifier}")'
|
|
77
85
|
|
|
78
|
-
def __getitem__(self, item: str) -> Expr:
|
|
86
|
+
def __getitem__(self, item: str | int | list[str]) -> Expr:
|
|
79
87
|
return super().__getitem__(item)
|
|
80
88
|
|
|
81
89
|
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
|
@@ -110,6 +118,28 @@ class Table(Expr):
|
|
|
110
118
|
partition_size_bytes=partition_size_bytes,
|
|
111
119
|
)
|
|
112
120
|
|
|
121
|
+
def enrich(
|
|
122
|
+
self,
|
|
123
|
+
*projections: ExprLike,
|
|
124
|
+
where: ExprLike | None = None,
|
|
125
|
+
) -> Enrichment:
|
|
126
|
+
"""Returns an Enrichment object that, when applied, produces new columns.
|
|
127
|
+
|
|
128
|
+
Enrichment can be applied in different ways, e.g. distributed.
|
|
129
|
+
|
|
130
|
+
:param projections: Projection expressions deriving new columns to write back.
|
|
131
|
+
Expressions can be over multiple Spiral tables, but all tables including
|
|
132
|
+
this one must share the same key schema.
|
|
133
|
+
:param where: Optional filter expression to apply when reading the input tables.
|
|
134
|
+
"""
|
|
135
|
+
from spiral import expressions as se
|
|
136
|
+
|
|
137
|
+
projection = se.merge(*projections)
|
|
138
|
+
if where is not None:
|
|
139
|
+
where = se.lift(where)
|
|
140
|
+
|
|
141
|
+
return Enrichment(self, projection, where)
|
|
142
|
+
|
|
113
143
|
def drop_columns(self, column_paths: list[str]) -> None:
|
|
114
144
|
"""
|
|
115
145
|
Drops the specified columns from the table.
|
|
@@ -136,7 +166,7 @@ class Table(Expr):
|
|
|
136
166
|
it is important that the primary key columns are unique within the transaction.
|
|
137
167
|
The behavior is undefined if this is not the case.
|
|
138
168
|
"""
|
|
139
|
-
return Transaction(self.spiral.
|
|
169
|
+
return Transaction(self.spiral.core.transaction(self.core, settings().file_format, retries=retries))
|
|
140
170
|
|
|
141
171
|
def to_dataset(self) -> "ds.Dataset":
|
|
142
172
|
"""Returns a PyArrow Dataset representing the table."""
|
|
@@ -175,7 +205,7 @@ class Table(Expr):
|
|
|
175
205
|
if index.asof == 0:
|
|
176
206
|
raise ValueError("Index have to be synced before it can be used.")
|
|
177
207
|
|
|
178
|
-
shards = self.spiral.
|
|
208
|
+
shards = self.spiral.internal.compute_shards(index=index.core)
|
|
179
209
|
|
|
180
210
|
return self.spiral.scan(
|
|
181
211
|
projection if projection is not None else index.projection,
|
|
@@ -208,7 +238,7 @@ class Table(Expr):
|
|
|
208
238
|
if index.asof == 0:
|
|
209
239
|
raise ValueError("Index have to be synced before it can be used.")
|
|
210
240
|
|
|
211
|
-
shards = self.spiral.
|
|
241
|
+
shards = self.spiral.core.internal.compute_shards(index=index.core)
|
|
212
242
|
|
|
213
243
|
return self.spiral.scan(
|
|
214
244
|
projection if projection is not None else index.projection,
|
|
@@ -240,7 +270,7 @@ class Table(Expr):
|
|
|
240
270
|
if index.asof == 0:
|
|
241
271
|
raise ValueError("Index have to be synced before it can be used.")
|
|
242
272
|
|
|
243
|
-
shards = self.spiral.
|
|
273
|
+
shards = self.spiral.core.internal.compute_shards(index=index.core)
|
|
244
274
|
|
|
245
275
|
return self.spiral.scan(
|
|
246
276
|
index.projection,
|
|
@@ -255,7 +285,7 @@ class Table(Expr):
|
|
|
255
285
|
projection: Expr | None = None,
|
|
256
286
|
cache_dir: str | None = None,
|
|
257
287
|
shard_row_block_size: int | None = None,
|
|
258
|
-
) -> "
|
|
288
|
+
) -> "SpiralStream":
|
|
259
289
|
"""Returns a stream to be used with MosaicML's StreamingDataset.
|
|
260
290
|
|
|
261
291
|
Requires `streaming` package to be installed.
|
|
@@ -282,7 +312,7 @@ class Table(Expr):
|
|
|
282
312
|
where=index.filter,
|
|
283
313
|
asof=index.asof,
|
|
284
314
|
)
|
|
285
|
-
shards = self.spiral.
|
|
315
|
+
shards = self.spiral.internal.compute_shards(index=index.core)
|
|
286
316
|
|
|
287
317
|
return SpiralStream(
|
|
288
318
|
sp=self.spiral,
|
|
@@ -290,4 +320,4 @@ class Table(Expr):
|
|
|
290
320
|
shards=shards,
|
|
291
321
|
cache_dir=cache_dir,
|
|
292
322
|
shard_row_block_size=shard_row_block_size,
|
|
293
|
-
)
|
|
323
|
+
)
|
spiral/transaction.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from spiral.core.table import KeyRange
|
|
1
5
|
from spiral.core.table import Transaction as CoreTransaction
|
|
6
|
+
from spiral.core.table.spec import Operation
|
|
2
7
|
from spiral.expressions.base import ExprLike
|
|
8
|
+
from spiral.scan import Scan
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
3
11
|
|
|
4
12
|
|
|
5
13
|
class Transaction:
|
|
@@ -17,6 +25,10 @@ class Transaction:
|
|
|
17
25
|
"""The status of the transaction."""
|
|
18
26
|
return self._core.status
|
|
19
27
|
|
|
28
|
+
def is_empty(self) -> bool:
|
|
29
|
+
"""Check if the transaction has no operations."""
|
|
30
|
+
return self._core.is_empty()
|
|
31
|
+
|
|
20
32
|
def __enter__(self):
|
|
21
33
|
return self
|
|
22
34
|
|
|
@@ -39,6 +51,27 @@ class Transaction:
|
|
|
39
51
|
|
|
40
52
|
self._core.write(record_batches, partition_size_bytes=partition_size_bytes)
|
|
41
53
|
|
|
54
|
+
def writeback(
|
|
55
|
+
self,
|
|
56
|
+
scan: Scan,
|
|
57
|
+
*,
|
|
58
|
+
key_range: KeyRange | None = None,
|
|
59
|
+
partition_size_bytes: int | None = None,
|
|
60
|
+
batch_readahead: int | None = None,
|
|
61
|
+
):
|
|
62
|
+
"""Write back the results of a scan to the table.
|
|
63
|
+
|
|
64
|
+
:param scan: The scan to write back.
|
|
65
|
+
The scan does NOT need to be over the same table as transaction,
|
|
66
|
+
but it does need to have the same key schema.
|
|
67
|
+
:param key_range: Optional key range to limit the writeback to.
|
|
68
|
+
:param partition_size_bytes: The maximum partition size in bytes.
|
|
69
|
+
:param batch_readahead: The number of batches to read ahead when evaluating the scan.
|
|
70
|
+
"""
|
|
71
|
+
self._core.writeback(
|
|
72
|
+
scan.core, key_range=key_range, partition_size_bytes=partition_size_bytes, batch_readahead=batch_readahead
|
|
73
|
+
)
|
|
74
|
+
|
|
42
75
|
def drop_columns(self, column_paths: list[str]):
|
|
43
76
|
"""
|
|
44
77
|
Drops the specified columns from the table.
|
|
@@ -49,9 +82,73 @@ class Transaction:
|
|
|
49
82
|
"""
|
|
50
83
|
self._core.drop_columns(column_paths)
|
|
51
84
|
|
|
52
|
-
def
|
|
85
|
+
def take(self) -> list[Operation]:
|
|
86
|
+
"""Take the operations from the transaction
|
|
87
|
+
|
|
88
|
+
Transaction can no longer be committed or aborted after calling this method.
|
|
89
|
+
."""
|
|
90
|
+
return self._core.take()
|
|
91
|
+
|
|
92
|
+
def include(self, ops: list[Operation]):
|
|
93
|
+
"""Include the given operations in the transaction.
|
|
94
|
+
|
|
95
|
+
Checks for conflicts between the included operations and any existing operations.
|
|
96
|
+
"""
|
|
97
|
+
self._core.include(ops)
|
|
98
|
+
|
|
99
|
+
def commit(self, *, compact: bool = False, tx_dump: str | None = None):
|
|
53
100
|
"""Commit the transaction."""
|
|
54
|
-
|
|
101
|
+
if tx_dump is not None:
|
|
102
|
+
try:
|
|
103
|
+
# Create parent directories if they don't exist
|
|
104
|
+
dump_path = Path(tx_dump)
|
|
105
|
+
dump_path.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
# Write operations to a JSONL file
|
|
108
|
+
with open(dump_path, "w") as f:
|
|
109
|
+
for op in self._core.ops():
|
|
110
|
+
f.write(op.to_json() + "\n")
|
|
111
|
+
|
|
112
|
+
logger.info(f"Transaction dumped to {tx_dump}")
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Failed to dump transaction to {tx_dump}: {e}")
|
|
115
|
+
|
|
116
|
+
self._core.commit(compact=compact)
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def load_dumps(*tx_dump: str) -> list[Operation]:
|
|
120
|
+
"""Load a transaction from a dump file."""
|
|
121
|
+
import json
|
|
122
|
+
|
|
123
|
+
dumps = list(tx_dump)
|
|
124
|
+
ops: list[Operation] = []
|
|
125
|
+
|
|
126
|
+
for dump in dumps:
|
|
127
|
+
with open(dump) as f:
|
|
128
|
+
lines = f.readlines()
|
|
129
|
+
|
|
130
|
+
for line in lines:
|
|
131
|
+
line = line.strip()
|
|
132
|
+
if not line:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
# Each line may contain multiple JSON objects concatenated together
|
|
136
|
+
# This is due to a bug in the dump writing code.
|
|
137
|
+
# Use JSONDecoder to parse them one by one
|
|
138
|
+
decoder = json.JSONDecoder()
|
|
139
|
+
idx = 0
|
|
140
|
+
while idx < len(line):
|
|
141
|
+
try:
|
|
142
|
+
obj, end_idx = decoder.raw_decode(line, idx)
|
|
143
|
+
ops.append(Operation.from_json(json.dumps(obj)))
|
|
144
|
+
idx = end_idx
|
|
145
|
+
# Skip whitespace between JSON objects
|
|
146
|
+
while idx < len(line) and line[idx].isspace():
|
|
147
|
+
idx += 1
|
|
148
|
+
except json.JSONDecodeError as e:
|
|
149
|
+
raise ValueError(f"Failed to parse JSON at position {idx} in line: {line}") from e
|
|
150
|
+
|
|
151
|
+
return ops
|
|
55
152
|
|
|
56
153
|
def abort(self):
|
|
57
154
|
"""Abort the transaction."""
|
spiral/expressions/io.py
DELETED
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
import tarfile
|
|
2
|
-
from io import BytesIO
|
|
3
|
-
|
|
4
|
-
import pyarrow as pa
|
|
5
|
-
|
|
6
|
-
from spiral.expressions.base import Expr, ExprLike
|
|
7
|
-
from spiral.expressions.struct import pack
|
|
8
|
-
from spiral.expressions.udf import UDF
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def read_file(path: ExprLike) -> Expr:
|
|
12
|
-
"""
|
|
13
|
-
Read file path(s) from disk into a struct with a single field "bytes" containing the file contents.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
path: Expression evaluating to an array of strings representing local disk paths.
|
|
17
|
-
"""
|
|
18
|
-
to_pack = {"path": path}
|
|
19
|
-
return FileRead()(pack(to_pack))
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class FileRead(UDF):
|
|
23
|
-
RES_DTYPE: pa.DataType = pa.struct(
|
|
24
|
-
[
|
|
25
|
-
pa.field("bytes", pa.large_binary()),
|
|
26
|
-
]
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
def __init__(self):
|
|
30
|
-
super().__init__("file.read")
|
|
31
|
-
|
|
32
|
-
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
|
33
|
-
return FileRead.RES_DTYPE
|
|
34
|
-
|
|
35
|
-
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
|
36
|
-
if len(input_args) != 1:
|
|
37
|
-
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
|
38
|
-
arg = input_args[0]
|
|
39
|
-
|
|
40
|
-
res = []
|
|
41
|
-
for req in arg:
|
|
42
|
-
with open(req["path"].as_py(), "rb") as f:
|
|
43
|
-
res.append({"bytes": f.read()})
|
|
44
|
-
|
|
45
|
-
return pa.array(res, type=FileRead.RES_DTYPE)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def read_tar(path: ExprLike = None, bytes_: ExprLike = None) -> "Expr":
|
|
49
|
-
# Untar a vector of paths / byte arrays representing tarballs.
|
|
50
|
-
if path is None and bytes_ is None:
|
|
51
|
-
raise ValueError("Expected either path or bytes_ to be provided")
|
|
52
|
-
to_pack = {}
|
|
53
|
-
if path is not None:
|
|
54
|
-
to_pack["path"] = path
|
|
55
|
-
if bytes_ is not None:
|
|
56
|
-
to_pack["bytes"] = bytes_
|
|
57
|
-
return TarRead()(pack(to_pack))
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class TarRead(UDF):
|
|
61
|
-
RES_DTYPE = pa.list_(
|
|
62
|
-
pa.struct(
|
|
63
|
-
[
|
|
64
|
-
pa.field("name", pa.string()),
|
|
65
|
-
pa.field("bytes", pa.large_binary()),
|
|
66
|
-
]
|
|
67
|
-
)
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
def __init__(self):
|
|
71
|
-
super().__init__("tar.read")
|
|
72
|
-
|
|
73
|
-
def return_type(self, *input_types: pa.DataType) -> pa.DataType:
|
|
74
|
-
return TarRead.RES_DTYPE
|
|
75
|
-
|
|
76
|
-
def invoke(self, *input_args: pa.Array) -> pa.Array:
|
|
77
|
-
if len(input_args) != 1:
|
|
78
|
-
raise ValueError(f"Expected 1 argument, got {len(input_args)}")
|
|
79
|
-
arg = input_args[0]
|
|
80
|
-
|
|
81
|
-
res = []
|
|
82
|
-
for req in arg:
|
|
83
|
-
if "path" in req:
|
|
84
|
-
kwargs = {"name": req["path"].as_py()}
|
|
85
|
-
elif "bytes" in req:
|
|
86
|
-
kwargs = {"fileobj": BytesIO(req["bytes"].as_py())}
|
|
87
|
-
else:
|
|
88
|
-
raise ValueError("Expected path or bytes_ to be provided")
|
|
89
|
-
|
|
90
|
-
files = []
|
|
91
|
-
with tarfile.open(**kwargs) as f:
|
|
92
|
-
for m in f.getmembers():
|
|
93
|
-
m: tarfile.TarInfo
|
|
94
|
-
if m.type == tarfile.DIRTYPE:
|
|
95
|
-
continue
|
|
96
|
-
# TODO(ngates): skip other types too maybe? Why are we even skipping directories?
|
|
97
|
-
files.append({"name": m.name, "bytes": f.extractfile(m).read()})
|
|
98
|
-
res.append(files)
|
|
99
|
-
|
|
100
|
-
return pa.array(res, type=TarRead.RES_DTYPE)
|
spiral/expressions/mp4.py
DELETED
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
import pyarrow as pa
|
|
2
|
-
|
|
3
|
-
from spiral.expressions.base import Expr, ExprLike
|
|
4
|
-
|
|
5
|
-
_MP4_RES_DTYPE: pa.DataType = pa.struct(
|
|
6
|
-
[
|
|
7
|
-
pa.field("pixels", pa.large_binary()),
|
|
8
|
-
pa.field("height", pa.uint32()),
|
|
9
|
-
pa.field("width", pa.uint32()),
|
|
10
|
-
pa.field("frames", pa.uint32()),
|
|
11
|
-
]
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
# TODO(marko): Support optional range and crop.
|
|
16
|
-
# IMPORTANT: Frames is currently broken and defaults to full.
|
|
17
|
-
def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
|
|
18
|
-
"""
|
|
19
|
-
Read referenced cell in a `MP4` format. Requires `ffmpeg`.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
expr: The referenced `Mp4` bytes.
|
|
23
|
-
A str is assumed to be the `se.aux` expression.
|
|
24
|
-
frames: The range of frames to read. Each element must be a list of two uint32,
|
|
25
|
-
frame start and frame end, or null / empty list to read all frames.
|
|
26
|
-
A str is assumed to be the `se.aux` expression.
|
|
27
|
-
crop: The crop of the frames to read. Each element must be a list of four uint32,
|
|
28
|
-
x, y, width, height or null / empty list to read full frames.
|
|
29
|
-
A str is assumed to be the `se.aux` expression.
|
|
30
|
-
|
|
31
|
-
Returns:
|
|
32
|
-
An array where each element is a decoded cropped video with fields:
|
|
33
|
-
pixels: RGB8 bytes, frames * width * height * 3.
|
|
34
|
-
width: Width of the image with type `pa.uint32()`.
|
|
35
|
-
height: Height of the image with type `pa.uint32()`.
|
|
36
|
-
frames: Number of frames with type `pa.uint32()`.
|
|
37
|
-
"""
|
|
38
|
-
from spiral import _lib
|
|
39
|
-
from spiral.expressions import aux, lift
|
|
40
|
-
|
|
41
|
-
if isinstance(expr, str):
|
|
42
|
-
expr = aux(
|
|
43
|
-
expr,
|
|
44
|
-
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
|
45
|
-
)
|
|
46
|
-
if isinstance(frames, str):
|
|
47
|
-
frames = aux(frames, pa.list_(pa.uint32()))
|
|
48
|
-
if isinstance(crop, str):
|
|
49
|
-
crop = aux(crop, pa.list_(pa.uint32()))
|
|
50
|
-
|
|
51
|
-
expr = lift(expr)
|
|
52
|
-
frames = lift(frames)
|
|
53
|
-
crop = lift(crop)
|
|
54
|
-
|
|
55
|
-
return Expr(
|
|
56
|
-
_lib.expr.video.read(
|
|
57
|
-
expr.__expr__,
|
|
58
|
-
frames.__expr__,
|
|
59
|
-
crop.__expr__,
|
|
60
|
-
format="mp4",
|
|
61
|
-
)
|
|
62
|
-
)
|
spiral/expressions/png.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from spiral.expressions.base import Expr, ExprLike
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def encode(expr: ExprLike) -> Expr:
|
|
5
|
-
"""Encode the given expression as a PNG image.
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
expr: The expression to encode.
|
|
9
|
-
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
The encoded PNG images.
|
|
13
|
-
"""
|
|
14
|
-
from spiral import _lib
|
|
15
|
-
from spiral.expressions import lift
|
|
16
|
-
|
|
17
|
-
expr = lift(expr)
|
|
18
|
-
return Expr(_lib.expr.img.encode(expr.__expr__, format="png"))
|
spiral/expressions/qoi.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from spiral.expressions.base import Expr, ExprLike
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def encode(expr: ExprLike) -> Expr:
|
|
5
|
-
"""Encode the given expression as a QOI image.
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
expr: The expression to encode.
|
|
9
|
-
Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
The encoded QOI images.
|
|
13
|
-
"""
|
|
14
|
-
from spiral import _lib
|
|
15
|
-
from spiral.expressions import lift
|
|
16
|
-
|
|
17
|
-
expr = lift(expr)
|
|
18
|
-
return Expr(_lib.expr.img.encode(expr.__expr__, format="qoi"))
|
spiral/expressions/refs.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import pyarrow as pa
|
|
2
|
-
|
|
3
|
-
from spiral.expressions.base import Expr, ExprLike
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def ref(expr: ExprLike, field: str | None = None) -> Expr:
|
|
7
|
-
"""Store binary values as references. This expression can only be used on write.
|
|
8
|
-
|
|
9
|
-
It is often better to store large cell values, such as bytes columns, that aren't used in filter expressions as
|
|
10
|
-
references. This enables more efficient scan pruning. Many of the Spiral's cell pushdown expressions work
|
|
11
|
-
over references.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
expr: The expression to store as a reference.
|
|
15
|
-
field: If the expr evaluates into struct, the field name of that struct that should be referenced.
|
|
16
|
-
If `None`, the expr must evaluate into a type that supports referencing.
|
|
17
|
-
"""
|
|
18
|
-
from spiral import _lib
|
|
19
|
-
from spiral.expressions import lift
|
|
20
|
-
|
|
21
|
-
expr = lift(expr)
|
|
22
|
-
return Expr(_lib.expr.refs.ref(expr.__expr__, field))
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
|
|
26
|
-
"""De-reference referenced values.
|
|
27
|
-
|
|
28
|
-
See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
|
|
29
|
-
column back into their original form, e.g. binary.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
expr: The expression to de-reference. A str is assumed to be the `se.aux` expression.
|
|
33
|
-
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
|
34
|
-
If `None`, the expr must evaluate into a reference type.
|
|
35
|
-
"""
|
|
36
|
-
from spiral import _lib
|
|
37
|
-
from spiral.expressions import aux, lift
|
|
38
|
-
|
|
39
|
-
if isinstance(expr, str):
|
|
40
|
-
expr = aux(
|
|
41
|
-
expr,
|
|
42
|
-
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
expr = lift(expr)
|
|
46
|
-
return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def nbytes(expr: ExprLike) -> Expr:
|
|
50
|
-
"""Return the number of bytes in a reference.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
expr: The ref expression to get the number of bytes from.
|
|
54
|
-
"""
|
|
55
|
-
from spiral.expressions import lift
|
|
56
|
-
|
|
57
|
-
expr = lift(expr)
|
|
58
|
-
return expr["__ref__"]["end"] - expr["__ref__"]["begin"]
|
|
File without changes
|