pyspiral 0.7.9__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.7.11__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyspiral might be problematic. Click here for more details.
- {pyspiral-0.7.9.dist-info → pyspiral-0.7.11.dist-info}/METADATA +1 -1
- {pyspiral-0.7.9.dist-info → pyspiral-0.7.11.dist-info}/RECORD +10 -10
- spiral/_lib.abi3.so +0 -0
- spiral/client.py +3 -4
- spiral/core/client/__init__.pyi +8 -0
- spiral/enrichment.py +66 -26
- spiral/settings.py +5 -3
- spiral/table.py +1 -3
- {pyspiral-0.7.9.dist-info → pyspiral-0.7.11.dist-info}/WHEEL +0 -0
- {pyspiral-0.7.9.dist-info → pyspiral-0.7.11.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.7.
|
|
2
|
-
pyspiral-0.7.
|
|
3
|
-
pyspiral-0.7.
|
|
1
|
+
pyspiral-0.7.11.dist-info/METADATA,sha256=Xok1DuwVcaDpgNdpQWSbppaxmOjqWVawQOSgRcIG96o,1875
|
|
2
|
+
pyspiral-0.7.11.dist-info/WHEEL,sha256=0ecHyBdkJfSXYIVmWsPh7S-4h4fSrB4FlXhlnIu9c_A,130
|
|
3
|
+
pyspiral-0.7.11.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
|
|
4
4
|
spiral/__init__.py,sha256=PwaYBWFBtB7cYi7peMmhk_Lm5XzjRoLwOtLbUhc1ZDo,1449
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
5
|
+
spiral/_lib.abi3.so,sha256=NeGs3eqjki0bSsmAPuiieCjmLSxbUL_bzPCZ7bBk3GY,61740360
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -35,11 +35,11 @@ spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
|
|
|
35
35
|
spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
|
|
36
36
|
spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
|
|
37
37
|
spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
|
|
38
|
-
spiral/client.py,sha256=
|
|
38
|
+
spiral/client.py,sha256=fkgauF1UhmQGkcWmKjBT1SVQ-4g9K6qVtjLBMpfHYKU,6479
|
|
39
39
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
|
|
41
41
|
spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
|
|
42
|
-
spiral/core/client/__init__.pyi,sha256=
|
|
42
|
+
spiral/core/client/__init__.pyi,sha256=WfHQ8xL6x6NGq32h9ERMCdYEwBVtEWWbmk3A5icE0l4,7153
|
|
43
43
|
spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
|
|
44
44
|
spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
|
|
45
45
|
spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
|
|
@@ -60,7 +60,7 @@ spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
60
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
61
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
62
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
63
|
-
spiral/enrichment.py,sha256=
|
|
63
|
+
spiral/enrichment.py,sha256=hFK9wlWWiGkp3ST18WN7QTKwMc7EO7peUwW3AAiC0Gc,11650
|
|
64
64
|
spiral/expressions/__init__.py,sha256=ZsD8g7vB0G7xy19GUiH4m79kw7KEkTQRwJl5Gn1cgtw,8049
|
|
65
65
|
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
66
66
|
spiral/expressions/file.py,sha256=7D9jIENJcoT0KFharBLkzK9dZgO4DYn5K_KCt0twefg,518
|
|
@@ -96,14 +96,14 @@ spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
|
|
96
96
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
97
|
spiral/scan.py,sha256=8IFuhqxzVGdXPW8uWWTFKiC38mTLpwkBZvK1YHqtbHM,12574
|
|
98
98
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
|
99
|
-
spiral/settings.py,sha256=
|
|
99
|
+
spiral/settings.py,sha256=QXVyu5uU-sClICyoAtnbOtJuOzbhCAH9rIvbjAi0I5c,3274
|
|
100
100
|
spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
|
|
101
101
|
spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
|
|
102
102
|
spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
|
|
103
103
|
spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
|
|
104
104
|
spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
|
|
105
|
-
spiral/table.py,sha256=
|
|
105
|
+
spiral/table.py,sha256=g6y1iV2Esk6kYhsIJfW7Wje0EGs7jgA0bVuZaSzNX1A,12068
|
|
106
106
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
107
107
|
spiral/transaction.py,sha256=bI5oqBAmPMSF0yOOYcPfGbV37Xc1-_V-wQNKw1xOlTA,4136
|
|
108
108
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
109
|
-
pyspiral-0.7.
|
|
109
|
+
pyspiral-0.7.11.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/client.py
CHANGED
|
@@ -6,7 +6,7 @@ import pyarrow as pa
|
|
|
6
6
|
|
|
7
7
|
from spiral.api import SpiralAPI
|
|
8
8
|
from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
|
|
9
|
-
from spiral.core.client import Internal
|
|
9
|
+
from spiral.core.client import Internal, KeyColumns
|
|
10
10
|
from spiral.core.client import Spiral as CoreSpiral
|
|
11
11
|
from spiral.datetime_ import timestamp_micros
|
|
12
12
|
from spiral.expressions import ExprLike
|
|
@@ -102,6 +102,7 @@ class Spiral:
|
|
|
102
102
|
*projections: ExprLike,
|
|
103
103
|
where: ExprLike | None = None,
|
|
104
104
|
asof: datetime | int | None = None,
|
|
105
|
+
_key_columns: KeyColumns | None = None,
|
|
105
106
|
) -> Scan:
|
|
106
107
|
"""Starts a read transaction on the Spiral.
|
|
107
108
|
|
|
@@ -125,9 +126,7 @@ class Spiral:
|
|
|
125
126
|
return Scan(
|
|
126
127
|
self,
|
|
127
128
|
self.core.scan(
|
|
128
|
-
projection.__expr__,
|
|
129
|
-
filter=where.__expr__ if where else None,
|
|
130
|
-
asof=asof,
|
|
129
|
+
projection.__expr__, filter=where.__expr__ if where else None, asof=asof, key_columns=_key_columns
|
|
131
130
|
),
|
|
132
131
|
)
|
|
133
132
|
|
spiral/core/client/__init__.pyi
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from enum import Enum
|
|
1
2
|
from typing import Any, Literal
|
|
2
3
|
|
|
3
4
|
import pyarrow as pa
|
|
@@ -7,6 +8,12 @@ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, S
|
|
|
7
8
|
from spiral.core.table.spec import ColumnGroup, Schema
|
|
8
9
|
from spiral.expressions import Expr
|
|
9
10
|
|
|
11
|
+
# Only for typing, the actual definition is in Rust.
|
|
12
|
+
class KeyColumns(Enum):
|
|
13
|
+
IfProjected = 0
|
|
14
|
+
Included = 1
|
|
15
|
+
Only = 2
|
|
16
|
+
|
|
10
17
|
class Spiral:
|
|
11
18
|
"""A client for Spiral database"""
|
|
12
19
|
def __init__(
|
|
@@ -26,6 +33,7 @@ class Spiral:
|
|
|
26
33
|
projection: Expr,
|
|
27
34
|
filter: Expr | None = None,
|
|
28
35
|
asof: int | None = None,
|
|
36
|
+
key_columns: KeyColumns | None = None,
|
|
29
37
|
) -> Scan:
|
|
30
38
|
"""Construct a table scan."""
|
|
31
39
|
...
|
spiral/enrichment.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import logging
|
|
3
5
|
from functools import partial
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
|
-
from spiral.core.client import Shard
|
|
8
|
+
from spiral.core.client import KeyColumns, Shard
|
|
7
9
|
from spiral.core.table import KeyRange
|
|
8
10
|
from spiral.core.table.spec import Key, Operation
|
|
9
11
|
from spiral.expressions import Expr
|
|
@@ -27,7 +29,7 @@ class Enrichment:
|
|
|
27
29
|
|
|
28
30
|
def __init__(
|
|
29
31
|
self,
|
|
30
|
-
table:
|
|
32
|
+
table: Table,
|
|
31
33
|
projection: Expr,
|
|
32
34
|
where: Expr | None,
|
|
33
35
|
):
|
|
@@ -36,7 +38,7 @@ class Enrichment:
|
|
|
36
38
|
self._where = where
|
|
37
39
|
|
|
38
40
|
@property
|
|
39
|
-
def table(self) ->
|
|
41
|
+
def table(self) -> Table:
|
|
40
42
|
"""The table to write back into."""
|
|
41
43
|
return self._table
|
|
42
44
|
|
|
@@ -50,8 +52,8 @@ class Enrichment:
|
|
|
50
52
|
"""The filter expression."""
|
|
51
53
|
return self._where
|
|
52
54
|
|
|
53
|
-
def _scan(self) ->
|
|
54
|
-
return self._table.spiral.scan(self._projection, where=self._where)
|
|
55
|
+
def _scan(self) -> Scan:
|
|
56
|
+
return self._table.spiral.scan(self._projection, where=self._where, _key_columns=KeyColumns.Included)
|
|
55
57
|
|
|
56
58
|
def apply(
|
|
57
59
|
self, *, batch_readahead: int | None = None, partition_size_bytes: int | None = None, tx_dump: str | None = None
|
|
@@ -87,11 +89,11 @@ class Enrichment:
|
|
|
87
89
|
def apply_dask(
|
|
88
90
|
self,
|
|
89
91
|
*,
|
|
90
|
-
index:
|
|
92
|
+
index: KeySpaceIndex | None = None,
|
|
91
93
|
partition_size_bytes: int | None = None,
|
|
92
94
|
tx_dump: str | None = None,
|
|
93
95
|
checkpoint_dump: str | None = None,
|
|
94
|
-
client:
|
|
96
|
+
client: dask.distributed.Client | None = None,
|
|
95
97
|
**kwargs,
|
|
96
98
|
) -> None:
|
|
97
99
|
"""Use distributed Dask to apply the enrichment. Requires `dask[distributed]` to be installed.
|
|
@@ -146,10 +148,21 @@ class Enrichment:
|
|
|
146
148
|
if shards is None:
|
|
147
149
|
shards = plan_scan.shards()
|
|
148
150
|
|
|
151
|
+
# TODO(marko): This is temporary workaround. Passing token is a bad idea.
|
|
152
|
+
# Token can expire during long-running enrichments.
|
|
153
|
+
# Maybe if device code is used, we can pass something.
|
|
154
|
+
token = self._table.spiral.config.authn.token()
|
|
155
|
+
if token is None:
|
|
156
|
+
raise ValueError("Spiral client is not authenticated.")
|
|
157
|
+
settings_dict = self._table.spiral.config.model_dump()
|
|
158
|
+
spiraldb_dict = settings_dict.get("spiraldb", {})
|
|
159
|
+
spiraldb_dict["token"] = token.expose_secret()
|
|
160
|
+
settings_dict["spiraldb"] = spiraldb_dict
|
|
161
|
+
|
|
149
162
|
# Partially bind the enrichment function.
|
|
150
163
|
_compute = partial(
|
|
151
164
|
_enrichment_task,
|
|
152
|
-
settings_dict=
|
|
165
|
+
settings_dict=settings_dict,
|
|
153
166
|
state_json=plan_scan.core.plan_state().to_json(),
|
|
154
167
|
output_table_id=self._table.table_id,
|
|
155
168
|
partition_size_bytes=partition_size_bytes,
|
|
@@ -160,19 +173,51 @@ class Enrichment:
|
|
|
160
173
|
logger.info(f"Applying enrichment with {len(shards)} shards. Follow progress at {client.dashboard_link}")
|
|
161
174
|
|
|
162
175
|
failed_ranges = []
|
|
163
|
-
|
|
164
|
-
result:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
176
|
+
try:
|
|
177
|
+
for result, shard in zip(client.gather(enrichments), shards):
|
|
178
|
+
result: EnrichmentTaskResult
|
|
179
|
+
|
|
180
|
+
if result.error is not None:
|
|
181
|
+
logger.error(f"Enrichment task failed for range {shard.key_range}: {result.error}")
|
|
182
|
+
failed_ranges.append(shard.key_range)
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
tx.include(result.ops)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
# If not incremental, re-raise the exception.
|
|
188
|
+
if checkpoint_dump is None:
|
|
189
|
+
raise e
|
|
190
|
+
|
|
191
|
+
# Handle worker failures (e.g., KilledWorker from Dask)
|
|
192
|
+
from dask.distributed import KilledWorker
|
|
193
|
+
|
|
194
|
+
if isinstance(e, KilledWorker):
|
|
195
|
+
logger.error(f"Dask worker was killed during enrichment: {e}")
|
|
196
|
+
|
|
197
|
+
# Try to gather partial results and mark remaining tasks as failed
|
|
198
|
+
for future, shard in zip(enrichments, shards):
|
|
199
|
+
if future.done() and not future.exception():
|
|
200
|
+
try:
|
|
201
|
+
result = future.result()
|
|
202
|
+
|
|
203
|
+
if result.error is not None:
|
|
204
|
+
logger.error(f"Enrichment task failed for range {shard.key_range}: {result.error}")
|
|
205
|
+
failed_ranges.append(shard.key_range)
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
tx.include(result.ops)
|
|
209
|
+
except Exception:
|
|
210
|
+
# Task failed or incomplete, add to failed ranges
|
|
211
|
+
failed_ranges.append(shard.key_range)
|
|
212
|
+
else:
|
|
213
|
+
# Task didn't complete, add to failed ranges
|
|
214
|
+
failed_ranges.append(shard.key_range)
|
|
172
215
|
|
|
173
216
|
# Dump checkpoint of failed ranges, if any.
|
|
174
217
|
if checkpoint_dump is not None:
|
|
175
|
-
logger.info(
|
|
218
|
+
logger.info(
|
|
219
|
+
f"Dumping checkpoint with failed {len(failed_ranges)}/{len(shards)} ranges to {checkpoint_dump}."
|
|
220
|
+
)
|
|
176
221
|
_checkpoint_dump_key_ranges(checkpoint_dump, failed_ranges)
|
|
177
222
|
|
|
178
223
|
if tx.is_empty():
|
|
@@ -212,7 +257,6 @@ def _checkpoint_dump_key_ranges(checkpoint_dump: str, ranges: list[KeyRange]):
|
|
|
212
257
|
|
|
213
258
|
@dataclasses.dataclass
|
|
214
259
|
class EnrichmentTaskResult:
|
|
215
|
-
key_range: KeyRange
|
|
216
260
|
ops: list[Operation]
|
|
217
261
|
error: str | None = None
|
|
218
262
|
|
|
@@ -220,12 +264,9 @@ class EnrichmentTaskResult:
|
|
|
220
264
|
return {
|
|
221
265
|
"ops": [op.to_json() for op in self.ops],
|
|
222
266
|
"error": self.error,
|
|
223
|
-
"begin": bytes(self.key_range.begin),
|
|
224
|
-
"end": bytes(self.key_range.end),
|
|
225
267
|
}
|
|
226
268
|
|
|
227
269
|
def __setstate__(self, state):
|
|
228
|
-
self.key_range = KeyRange(begin=Key(state["begin"]), end=Key(state["end"]))
|
|
229
270
|
self.ops = [Operation.from_json(op_json) for op_json in state["ops"]]
|
|
230
271
|
self.error = state["error"]
|
|
231
272
|
|
|
@@ -250,17 +291,16 @@ def _enrichment_task(
|
|
|
250
291
|
state = ScanState.from_json(state_json)
|
|
251
292
|
task_scan = Scan(sp, sp.core.load_scan(state))
|
|
252
293
|
table = sp.table(output_table_id)
|
|
253
|
-
|
|
254
294
|
task_tx = table.txn()
|
|
255
295
|
|
|
256
296
|
try:
|
|
257
297
|
task_tx.writeback(task_scan, key_range=shard.key_range, partition_size_bytes=partition_size_bytes)
|
|
258
|
-
return EnrichmentTaskResult(
|
|
298
|
+
return EnrichmentTaskResult(ops=task_tx.take())
|
|
259
299
|
except Exception as e:
|
|
260
300
|
task_tx.abort()
|
|
261
301
|
|
|
262
302
|
if incremental:
|
|
263
|
-
return EnrichmentTaskResult(
|
|
303
|
+
return EnrichmentTaskResult(ops=[], error=str(e))
|
|
264
304
|
|
|
265
305
|
logger.error(f"Enrichment task failed for shard {shard}: {e}")
|
|
266
306
|
raise e
|
spiral/settings.py
CHANGED
|
@@ -28,9 +28,11 @@ PACKAGE_NAME = "pyspiral"
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def validate_token(v, handler: ValidatorFunctionWrapHandler):
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
if isinstance(v, Token):
|
|
32
|
+
return v
|
|
33
|
+
if isinstance(v, str):
|
|
34
|
+
return Token(v)
|
|
35
|
+
raise ValueError("Token value (SPIRAL__SPIRALDB__TOKEN) must be a string")
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
TokenType = Annotated[
|
spiral/table.py
CHANGED
|
@@ -134,9 +134,7 @@ class Table(Expr):
|
|
|
134
134
|
"""
|
|
135
135
|
from spiral import expressions as se
|
|
136
136
|
|
|
137
|
-
|
|
138
|
-
# Include key columns in the projection.
|
|
139
|
-
projection = se.merge(self.select(*self.key_schema.names), *projections)
|
|
137
|
+
projection = se.merge(*projections)
|
|
140
138
|
if where is not None:
|
|
141
139
|
where = se.lift(where)
|
|
142
140
|
|
|
File without changes
|
|
File without changes
|