pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.3.1__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/METADATA +8 -6
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/RECORD +29 -25
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/WHEEL +1 -1
- spiral/__init__.py +3 -1
- spiral/_lib.abi3.so +0 -0
- spiral/api/__init__.py +8 -4
- spiral/api/filesystems.py +1 -1
- spiral/api/tables.py +3 -6
- spiral/catalog.py +15 -0
- spiral/cli/fs.py +2 -2
- spiral/cli/project.py +5 -3
- spiral/core/core/__init__.pyi +34 -6
- spiral/core/spec/__init__.pyi +8 -26
- spiral/dataset.py +221 -20
- spiral/expressions/__init__.py +19 -4
- spiral/expressions/mp4.py +69 -0
- spiral/expressions/png.py +18 -0
- spiral/expressions/qoi.py +18 -0
- spiral/expressions/refs.py +23 -3
- spiral/expressions/tiff.py +88 -88
- spiral/maintenance.py +12 -0
- spiral/proto/_/scandal/__init__.py +78 -11
- spiral/proto/_/spiral/table/__init__.py +53 -2
- spiral/scan_.py +75 -24
- spiral/settings.py +6 -0
- spiral/substrait_.py +1 -1
- spiral/table.py +35 -21
- spiral/txn.py +48 -0
- spiral/config.py +0 -26
- {pyspiral-0.2.5.dist-info → pyspiral-0.3.1.dist-info}/entry_points.txt +0 -0
spiral/substrait_.py
CHANGED
@@ -251,7 +251,7 @@ class SubstraitConverter:
|
|
251
251
|
|
252
252
|
if field_name in self.key_names:
|
253
253
|
# This is a key column, so we need to select it from the scope.
|
254
|
-
return se.
|
254
|
+
return se.key(field_name)
|
255
255
|
|
256
256
|
scope = se.getitem(scope, field_name)
|
257
257
|
scope_type = scope_type.field(ref.field).type
|
spiral/table.py
CHANGED
@@ -3,11 +3,13 @@ from typing import TYPE_CHECKING, Literal
|
|
3
3
|
|
4
4
|
import pyarrow as pa
|
5
5
|
|
6
|
-
from spiral import expressions as se
|
7
|
-
from spiral.config import FILE_FORMAT, Config
|
8
6
|
from spiral.core.core import Table as CoreTable
|
9
|
-
from spiral.core.core import
|
7
|
+
from spiral.core.core import TableMaintenance, TableTransaction
|
8
|
+
from spiral.core.spec import Schema
|
10
9
|
from spiral.expressions.base import Expr, ExprLike
|
10
|
+
from spiral.maintenance import Maintenance
|
11
|
+
from spiral.settings import FILE_FORMAT
|
12
|
+
from spiral.txn import Transaction
|
11
13
|
|
12
14
|
if TYPE_CHECKING:
|
13
15
|
import duckdb
|
@@ -53,7 +55,7 @@ class Table(Expr):
|
|
53
55
|
from spiral import expressions as se
|
54
56
|
|
55
57
|
if item in self._key_columns:
|
56
|
-
return se.
|
58
|
+
return se.key(name=item)
|
57
59
|
|
58
60
|
return super().__getitem__(item)
|
59
61
|
|
@@ -72,7 +74,7 @@ class Table(Expr):
|
|
72
74
|
|
73
75
|
from spiral import expressions as se
|
74
76
|
|
75
|
-
return se.merge(se.pack({key: se.
|
77
|
+
return se.merge(se.pack({key: se.key(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
|
76
78
|
|
77
79
|
@property
|
78
80
|
def key_schema(self) -> pa.Schema:
|
@@ -80,7 +82,7 @@ class Table(Expr):
|
|
80
82
|
return self._key_schema
|
81
83
|
|
82
84
|
@property
|
83
|
-
def schema(self) ->
|
85
|
+
def schema(self) -> Schema:
|
84
86
|
"""Returns the FULL schema of the table.
|
85
87
|
|
86
88
|
NOTE: This can be expensive for large tables.
|
@@ -111,8 +113,6 @@ class Table(Expr):
|
|
111
113
|
where: ExprLike | None = None,
|
112
114
|
asof: datetime | int | str = None,
|
113
115
|
exclude_keys: bool = False,
|
114
|
-
# TODO(marko): Support config.
|
115
|
-
# config: Config | None = None,
|
116
116
|
) -> "Scan":
|
117
117
|
"""Reads the table. If projections are not provided, the entire table is read.
|
118
118
|
|
@@ -128,7 +128,6 @@ class Table(Expr):
|
|
128
128
|
where=where,
|
129
129
|
asof=asof,
|
130
130
|
exclude_keys=exclude_keys,
|
131
|
-
# config=config,
|
132
131
|
)
|
133
132
|
|
134
133
|
# NOTE: "vortex" is valid format. We don't want that visible in the API docs.
|
@@ -137,21 +136,36 @@ class Table(Expr):
|
|
137
136
|
expr: ExprLike,
|
138
137
|
*,
|
139
138
|
format: Literal["parquet"] | None = None,
|
140
|
-
|
141
|
-
config: Config | None = None,
|
139
|
+
partition_size_bytes: int | None = None,
|
142
140
|
) -> None:
|
143
141
|
"""Write an item to the table inside a single transaction.
|
144
142
|
|
145
143
|
:param expr: The expression to write. Must evaluate to a struct array.
|
146
144
|
:param format: the format to write the data in. Defaults to "parquet".
|
147
|
-
:param
|
145
|
+
:param partition_size_bytes: The maximum partition size in bytes.
|
148
146
|
"""
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
147
|
+
format = format or FILE_FORMAT
|
148
|
+
|
149
|
+
with self.txn(format=format) as txn:
|
150
|
+
txn.write(
|
151
|
+
expr,
|
152
|
+
partition_size_bytes=partition_size_bytes,
|
153
|
+
)
|
154
|
+
|
155
|
+
# NOTE: "vortex" is valid format. We don't want that visible in the API docs.
|
156
|
+
def txn(self, format: Literal["parquet"] | None = None) -> Transaction:
|
157
|
+
"""Begins a new transaction. Transaction must be committed for writes to become visible.
|
158
|
+
|
159
|
+
IMPORTANT: While transaction can be used to atomically write data to the table,
|
160
|
+
it is important that the primary key columns are unique within the transaction.
|
161
|
+
|
162
|
+
:param format: The format to use for the transaction. Defaults to "parquet".
|
163
|
+
"""
|
164
|
+
return Transaction(TableTransaction(self._table.metastore, format or FILE_FORMAT))
|
165
|
+
|
166
|
+
def maintenance(self, format: Literal["parquet"] | None = None) -> Maintenance:
|
167
|
+
"""Maintenance tasks for the table.
|
168
|
+
|
169
|
+
:param format: The format to use. Defaults to "parquet".
|
170
|
+
"""
|
171
|
+
return Maintenance(TableMaintenance(self._table.metastore, format or FILE_FORMAT))
|
spiral/txn.py
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
from spiral.core.core import TableTransaction
|
2
|
+
from spiral.expressions import ExprLike
|
3
|
+
|
4
|
+
|
5
|
+
class Transaction:
|
6
|
+
"""Spiral table transaction.
|
7
|
+
|
8
|
+
IMPORTANT: While transaction can be used to atomically write data to the table,
|
9
|
+
it is important that the primary key columns are unique within the transaction.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def __init__(self, transaction: TableTransaction):
|
13
|
+
self._transaction = transaction
|
14
|
+
|
15
|
+
@property
|
16
|
+
def status(self) -> str:
|
17
|
+
"""The status of the transaction."""
|
18
|
+
return self._transaction.status
|
19
|
+
|
20
|
+
def __enter__(self):
|
21
|
+
return self
|
22
|
+
|
23
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
24
|
+
if exc_type is None:
|
25
|
+
self._transaction.commit()
|
26
|
+
else:
|
27
|
+
self._transaction.abort()
|
28
|
+
|
29
|
+
def write(self, expr: ExprLike, *, partition_size_bytes: int | None = None):
|
30
|
+
"""Write an item to the table inside a single transaction.
|
31
|
+
|
32
|
+
:param expr: The expression to write. Must evaluate to a struct array.
|
33
|
+
:param partition_size_bytes: The maximum partition size in bytes.
|
34
|
+
If not provided, the default partition size is used.
|
35
|
+
"""
|
36
|
+
from spiral import expressions as se
|
37
|
+
|
38
|
+
expr = se.lift(expr)
|
39
|
+
|
40
|
+
self._transaction.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
|
41
|
+
|
42
|
+
def commit(self):
|
43
|
+
"""Commit the transaction."""
|
44
|
+
self._transaction.commit()
|
45
|
+
|
46
|
+
def abort(self):
|
47
|
+
"""Abort the transaction."""
|
48
|
+
self._transaction.abort()
|
spiral/config.py
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
|
3
|
-
from pydantic_settings import BaseSettings, SettingsConfigDict
|
4
|
-
|
5
|
-
FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
|
6
|
-
|
7
|
-
|
8
|
-
class Config(BaseSettings):
|
9
|
-
model_config = SettingsConfigDict(
|
10
|
-
env_nested_delimiter="__",
|
11
|
-
env_prefix="SPIRAL_CORE__",
|
12
|
-
frozen=True,
|
13
|
-
)
|
14
|
-
|
15
|
-
partition_file_min_size: int = 256 * 1024 * 1024 # 256MB
|
16
|
-
flush_wal_on_write: bool = False
|
17
|
-
|
18
|
-
# TODO(marko): Support config. Unused after migration to Rust.
|
19
|
-
# #: Defaults to ThreadPoolExecutor's default (based on os.cpu_count().
|
20
|
-
# scan_num_threads: int | None = 61 # 61 is used by Golang and Tokio, for some reason...
|
21
|
-
#
|
22
|
-
# #: The duration of WAL that is preserved to allow for txn conflict resolution.
|
23
|
-
# transaction_window: int = 0 if DEV else timedelta(days=1).total_seconds()
|
24
|
-
#
|
25
|
-
# #: Truncation length of string statistics.
|
26
|
-
# string_truncation_length: int = 1024
|
File without changes
|