pyspiral 0.7.9__cp312-abi3-manylinux_2_28_x86_64.whl → 0.7.11__cp312-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyspiral might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.7.9
3
+ Version: 0.7.11
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -1,8 +1,8 @@
1
- pyspiral-0.7.9.dist-info/METADATA,sha256=La_MkKyTCJ_qrYYT2eGLaPYkaf9OSpNa36nYOfMaN_4,1874
2
- pyspiral-0.7.9.dist-info/WHEEL,sha256=ydlpo1_yEJ2g1Axq3LoOd_OfioJa2swc2j5IDCa4uho,107
3
- pyspiral-0.7.9.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
1
+ pyspiral-0.7.11.dist-info/METADATA,sha256=Xok1DuwVcaDpgNdpQWSbppaxmOjqWVawQOSgRcIG96o,1875
2
+ pyspiral-0.7.11.dist-info/WHEEL,sha256=ydlpo1_yEJ2g1Axq3LoOd_OfioJa2swc2j5IDCa4uho,107
3
+ pyspiral-0.7.11.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
4
4
  spiral/__init__.py,sha256=PwaYBWFBtB7cYi7peMmhk_Lm5XzjRoLwOtLbUhc1ZDo,1449
5
- spiral/_lib.abi3.so,sha256=jpQ5vqK5-SgdNp-oHrMgE2oJSuPxJamtjVNAmAfn8oI,68064024
5
+ spiral/_lib.abi3.so,sha256=KOjwXxKQFTN9ct3I5i_V69GOZriJ8haV4l11HOMFeUA,67991272
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -35,11 +35,11 @@ spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
35
35
  spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
36
36
  spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
37
37
  spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
38
- spiral/client.py,sha256=53dVv8wxYMmozUfR8MVcUufKGqdVIdb0yZ0gchczBoQ,6426
38
+ spiral/client.py,sha256=fkgauF1UhmQGkcWmKjBT1SVQ-4g9K6qVtjLBMpfHYKU,6479
39
39
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
41
41
  spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
42
- spiral/core/client/__init__.pyi,sha256=YgDM-MoIt3J-QKxvsfs5gRiaTBtOA6TphbNBIAnrFCw,6956
42
+ spiral/core/client/__init__.pyi,sha256=WfHQ8xL6x6NGq32h9ERMCdYEwBVtEWWbmk3A5icE0l4,7153
43
43
  spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
44
44
  spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
45
45
  spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
@@ -60,7 +60,7 @@ spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
61
61
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
62
62
  spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
63
- spiral/enrichment.py,sha256=j8CzWJqLRq0Zko-qz3NzicsFOAvdzQSRJ58vdmqxPsk,9879
63
+ spiral/enrichment.py,sha256=hFK9wlWWiGkp3ST18WN7QTKwMc7EO7peUwW3AAiC0Gc,11650
64
64
  spiral/expressions/__init__.py,sha256=ZsD8g7vB0G7xy19GUiH4m79kw7KEkTQRwJl5Gn1cgtw,8049
65
65
  spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
66
66
  spiral/expressions/file.py,sha256=7D9jIENJcoT0KFharBLkzK9dZgO4DYn5K_KCt0twefg,518
@@ -96,14 +96,14 @@ spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
96
96
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
97
  spiral/scan.py,sha256=8IFuhqxzVGdXPW8uWWTFKiC38mTLpwkBZvK1YHqtbHM,12574
98
98
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
99
- spiral/settings.py,sha256=sUhMMBCXaPvUYztN_gztD9TjeUYJwVeEcJrq4FLy6M0,3232
99
+ spiral/settings.py,sha256=QXVyu5uU-sClICyoAtnbOtJuOzbhCAH9rIvbjAi0I5c,3274
100
100
  spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
101
101
  spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
102
102
  spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
103
103
  spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
104
104
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
105
- spiral/table.py,sha256=p95AYv6b7e14F3t7j-B-r45k9CtG84ngikdlAhh9WxA,12260
105
+ spiral/table.py,sha256=g6y1iV2Esk6kYhsIJfW7Wje0EGs7jgA0bVuZaSzNX1A,12068
106
106
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
107
107
  spiral/transaction.py,sha256=bI5oqBAmPMSF0yOOYcPfGbV37Xc1-_V-wQNKw1xOlTA,4136
108
108
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
109
- pyspiral-0.7.9.dist-info/RECORD,,
109
+ pyspiral-0.7.11.dist-info/RECORD,,
spiral/_lib.abi3.so CHANGED
Binary file
spiral/client.py CHANGED
@@ -6,7 +6,7 @@ import pyarrow as pa
6
6
 
7
7
  from spiral.api import SpiralAPI
8
8
  from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
- from spiral.core.client import Internal
9
+ from spiral.core.client import Internal, KeyColumns
10
10
  from spiral.core.client import Spiral as CoreSpiral
11
11
  from spiral.datetime_ import timestamp_micros
12
12
  from spiral.expressions import ExprLike
@@ -102,6 +102,7 @@ class Spiral:
102
102
  *projections: ExprLike,
103
103
  where: ExprLike | None = None,
104
104
  asof: datetime | int | None = None,
105
+ _key_columns: KeyColumns | None = None,
105
106
  ) -> Scan:
106
107
  """Starts a read transaction on the Spiral.
107
108
 
@@ -125,9 +126,7 @@ class Spiral:
125
126
  return Scan(
126
127
  self,
127
128
  self.core.scan(
128
- projection.__expr__,
129
- filter=where.__expr__ if where else None,
130
- asof=asof,
129
+ projection.__expr__, filter=where.__expr__ if where else None, asof=asof, key_columns=_key_columns
131
130
  ),
132
131
  )
133
132
 
@@ -1,3 +1,4 @@
1
+ from enum import Enum
1
2
  from typing import Any, Literal
2
3
 
3
4
  import pyarrow as pa
@@ -7,6 +8,12 @@ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, S
7
8
  from spiral.core.table.spec import ColumnGroup, Schema
8
9
  from spiral.expressions import Expr
9
10
 
11
+ # Only for typing, the actual definition is in Rust.
12
+ class KeyColumns(Enum):
13
+ IfProjected = 0
14
+ Included = 1
15
+ Only = 2
16
+
10
17
  class Spiral:
11
18
  """A client for Spiral database"""
12
19
  def __init__(
@@ -26,6 +33,7 @@ class Spiral:
26
33
  projection: Expr,
27
34
  filter: Expr | None = None,
28
35
  asof: int | None = None,
36
+ key_columns: KeyColumns | None = None,
29
37
  ) -> Scan:
30
38
  """Construct a table scan."""
31
39
  ...
spiral/enrichment.py CHANGED
@@ -1,9 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import dataclasses
2
4
  import logging
3
5
  from functools import partial
4
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING
5
7
 
6
- from spiral.core.client import Shard
8
+ from spiral.core.client import KeyColumns, Shard
7
9
  from spiral.core.table import KeyRange
8
10
  from spiral.core.table.spec import Key, Operation
9
11
  from spiral.expressions import Expr
@@ -27,7 +29,7 @@ class Enrichment:
27
29
 
28
30
  def __init__(
29
31
  self,
30
- table: "Table",
32
+ table: Table,
31
33
  projection: Expr,
32
34
  where: Expr | None,
33
35
  ):
@@ -36,7 +38,7 @@ class Enrichment:
36
38
  self._where = where
37
39
 
38
40
  @property
39
- def table(self) -> "Table":
41
+ def table(self) -> Table:
40
42
  """The table to write back into."""
41
43
  return self._table
42
44
 
@@ -50,8 +52,8 @@ class Enrichment:
50
52
  """The filter expression."""
51
53
  return self._where
52
54
 
53
- def _scan(self) -> "Scan":
54
- return self._table.spiral.scan(self._projection, where=self._where)
55
+ def _scan(self) -> Scan:
56
+ return self._table.spiral.scan(self._projection, where=self._where, _key_columns=KeyColumns.Included)
55
57
 
56
58
  def apply(
57
59
  self, *, batch_readahead: int | None = None, partition_size_bytes: int | None = None, tx_dump: str | None = None
@@ -87,11 +89,11 @@ class Enrichment:
87
89
  def apply_dask(
88
90
  self,
89
91
  *,
90
- index: Optional["KeySpaceIndex"] = None,
92
+ index: KeySpaceIndex | None = None,
91
93
  partition_size_bytes: int | None = None,
92
94
  tx_dump: str | None = None,
93
95
  checkpoint_dump: str | None = None,
94
- client: Optional["dask.distributed.Client"] = None,
96
+ client: dask.distributed.Client | None = None,
95
97
  **kwargs,
96
98
  ) -> None:
97
99
  """Use distributed Dask to apply the enrichment. Requires `dask[distributed]` to be installed.
@@ -146,10 +148,21 @@ class Enrichment:
146
148
  if shards is None:
147
149
  shards = plan_scan.shards()
148
150
 
151
+ # TODO(marko): This is temporary workaround. Passing token is a bad idea.
152
+ # Token can expire during long-running enrichments.
153
+ # Maybe if device code is used, we can pass something.
154
+ token = self._table.spiral.config.authn.token()
155
+ if token is None:
156
+ raise ValueError("Spiral client is not authenticated.")
157
+ settings_dict = self._table.spiral.config.model_dump()
158
+ spiraldb_dict = settings_dict.get("spiraldb", {})
159
+ spiraldb_dict["token"] = token.expose_secret()
160
+ settings_dict["spiraldb"] = spiraldb_dict
161
+
149
162
  # Partially bind the enrichment function.
150
163
  _compute = partial(
151
164
  _enrichment_task,
152
- settings_dict=self._table.spiral.config.model_dump(),
165
+ settings_dict=settings_dict,
153
166
  state_json=plan_scan.core.plan_state().to_json(),
154
167
  output_table_id=self._table.table_id,
155
168
  partition_size_bytes=partition_size_bytes,
@@ -160,19 +173,51 @@ class Enrichment:
160
173
  logger.info(f"Applying enrichment with {len(shards)} shards. Follow progress at {client.dashboard_link}")
161
174
 
162
175
  failed_ranges = []
163
- for result in client.gather(enrichments):
164
- result: EnrichmentTaskResult
165
-
166
- if result.error is not None:
167
- logger.error(f"Enrichment task failed for range {result.key_range}: {result.error}")
168
- failed_ranges.append(result.key_range)
169
- continue
170
-
171
- tx.include(result.ops)
176
+ try:
177
+ for result, shard in zip(client.gather(enrichments), shards):
178
+ result: EnrichmentTaskResult
179
+
180
+ if result.error is not None:
181
+ logger.error(f"Enrichment task failed for range {shard.key_range}: {result.error}")
182
+ failed_ranges.append(shard.key_range)
183
+ continue
184
+
185
+ tx.include(result.ops)
186
+ except Exception as e:
187
+ # If not incremental, re-raise the exception.
188
+ if checkpoint_dump is None:
189
+ raise e
190
+
191
+ # Handle worker failures (e.g., KilledWorker from Dask)
192
+ from dask.distributed import KilledWorker
193
+
194
+ if isinstance(e, KilledWorker):
195
+ logger.error(f"Dask worker was killed during enrichment: {e}")
196
+
197
+ # Try to gather partial results and mark remaining tasks as failed
198
+ for future, shard in zip(enrichments, shards):
199
+ if future.done() and not future.exception():
200
+ try:
201
+ result = future.result()
202
+
203
+ if result.error is not None:
204
+ logger.error(f"Enrichment task failed for range {shard.key_range}: {result.error}")
205
+ failed_ranges.append(shard.key_range)
206
+ continue
207
+
208
+ tx.include(result.ops)
209
+ except Exception:
210
+ # Task failed or incomplete, add to failed ranges
211
+ failed_ranges.append(shard.key_range)
212
+ else:
213
+ # Task didn't complete, add to failed ranges
214
+ failed_ranges.append(shard.key_range)
172
215
 
173
216
  # Dump checkpoint of failed ranges, if any.
174
217
  if checkpoint_dump is not None:
175
- logger.info(f"Dumping checkpoint with {len(failed_ranges)} failed ranges to {checkpoint_dump}.")
218
+ logger.info(
219
+ f"Dumping checkpoint with failed {len(failed_ranges)}/{len(shards)} ranges to {checkpoint_dump}."
220
+ )
176
221
  _checkpoint_dump_key_ranges(checkpoint_dump, failed_ranges)
177
222
 
178
223
  if tx.is_empty():
@@ -212,7 +257,6 @@ def _checkpoint_dump_key_ranges(checkpoint_dump: str, ranges: list[KeyRange]):
212
257
 
213
258
  @dataclasses.dataclass
214
259
  class EnrichmentTaskResult:
215
- key_range: KeyRange
216
260
  ops: list[Operation]
217
261
  error: str | None = None
218
262
 
@@ -220,12 +264,9 @@ class EnrichmentTaskResult:
220
264
  return {
221
265
  "ops": [op.to_json() for op in self.ops],
222
266
  "error": self.error,
223
- "begin": bytes(self.key_range.begin),
224
- "end": bytes(self.key_range.end),
225
267
  }
226
268
 
227
269
  def __setstate__(self, state):
228
- self.key_range = KeyRange(begin=Key(state["begin"]), end=Key(state["end"]))
229
270
  self.ops = [Operation.from_json(op_json) for op_json in state["ops"]]
230
271
  self.error = state["error"]
231
272
 
@@ -250,17 +291,16 @@ def _enrichment_task(
250
291
  state = ScanState.from_json(state_json)
251
292
  task_scan = Scan(sp, sp.core.load_scan(state))
252
293
  table = sp.table(output_table_id)
253
-
254
294
  task_tx = table.txn()
255
295
 
256
296
  try:
257
297
  task_tx.writeback(task_scan, key_range=shard.key_range, partition_size_bytes=partition_size_bytes)
258
- return EnrichmentTaskResult(key_range=shard.key_range, ops=task_tx.take())
298
+ return EnrichmentTaskResult(ops=task_tx.take())
259
299
  except Exception as e:
260
300
  task_tx.abort()
261
301
 
262
302
  if incremental:
263
- return EnrichmentTaskResult(key_range=shard.key_range, ops=[], error=str(e))
303
+ return EnrichmentTaskResult(ops=[], error=str(e))
264
304
 
265
305
  logger.error(f"Enrichment task failed for shard {shard}: {e}")
266
306
  raise e
spiral/settings.py CHANGED
@@ -28,9 +28,11 @@ PACKAGE_NAME = "pyspiral"
28
28
 
29
29
 
30
30
  def validate_token(v, handler: ValidatorFunctionWrapHandler):
31
- if not isinstance(v, str):
32
- raise ValueError("Token value (SPIRAL__SPIRALDB__TOKEN) must be a string")
33
- return Token(v)
31
+ if isinstance(v, Token):
32
+ return v
33
+ if isinstance(v, str):
34
+ return Token(v)
35
+ raise ValueError("Token value (SPIRAL__SPIRALDB__TOKEN) must be a string")
34
36
 
35
37
 
36
38
  TokenType = Annotated[
spiral/table.py CHANGED
@@ -134,9 +134,7 @@ class Table(Expr):
134
134
  """
135
135
  from spiral import expressions as se
136
136
 
137
- # TODO(marko): This shouldn't need to happen. We should be able to read keys from writeback scan.
138
- # Include key columns in the projection.
139
- projection = se.merge(self.select(*self.key_schema.names), *projections)
137
+ projection = se.merge(*projections)
140
138
  if where is not None:
141
139
  where = se.lift(where)
142
140