pyspiral 0.7.7__cp312-abi3-manylinux_2_28_aarch64.whl → 0.7.9__cp312-abi3-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyspiral might be problematic. Click here for more details.
- {pyspiral-0.7.7.dist-info → pyspiral-0.7.9.dist-info}/METADATA +1 -1
- {pyspiral-0.7.7.dist-info → pyspiral-0.7.9.dist-info}/RECORD +10 -10
- spiral/_lib.abi3.so +0 -0
- spiral/cli/tables.py +17 -34
- spiral/core/client/__init__.pyi +1 -1
- spiral/core/table/__init__.pyi +11 -0
- spiral/dataset.py +10 -1
- spiral/enrichment.py +91 -11
- {pyspiral-0.7.7.dist-info → pyspiral-0.7.9.dist-info}/WHEEL +0 -0
- {pyspiral-0.7.7.dist-info → pyspiral-0.7.9.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.7.
|
|
2
|
-
pyspiral-0.7.
|
|
3
|
-
pyspiral-0.7.
|
|
1
|
+
pyspiral-0.7.9.dist-info/METADATA,sha256=La_MkKyTCJ_qrYYT2eGLaPYkaf9OSpNa36nYOfMaN_4,1874
|
|
2
|
+
pyspiral-0.7.9.dist-info/WHEEL,sha256=I5JYpyYzeAl2SOerY_wvkm-HJti0rDQc6zMeJs35MpM,108
|
|
3
|
+
pyspiral-0.7.9.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
|
|
4
4
|
spiral/__init__.py,sha256=PwaYBWFBtB7cYi7peMmhk_Lm5XzjRoLwOtLbUhc1ZDo,1449
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
5
|
+
spiral/_lib.abi3.so,sha256=mAjWINjLl5rrbGEDln3oZW4RJUV77O48x4mVDJuYr2g,61819128
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -30,7 +30,7 @@ spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
|
|
|
30
30
|
spiral/cli/printer.py,sha256=aosc763hDFgoXJGkiANmNyO3kAsecAS1JWgjEhn8GCM,1784
|
|
31
31
|
spiral/cli/projects.py,sha256=1M1nGrBT-t0aY9RV5Cnmzy7YrhIvmHwdkpa3y9j8rG8,5756
|
|
32
32
|
spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
|
|
33
|
-
spiral/cli/tables.py,sha256=
|
|
33
|
+
spiral/cli/tables.py,sha256=6vt6EBGt7I9b0kAQ6sQORbmWiKbRdH4ubQYjjuNBXEg,6900
|
|
34
34
|
spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
|
|
35
35
|
spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
|
|
36
36
|
spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
|
|
@@ -39,7 +39,7 @@ spiral/client.py,sha256=53dVv8wxYMmozUfR8MVcUufKGqdVIdb0yZ0gchczBoQ,6426
|
|
|
39
39
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
|
|
41
41
|
spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
|
|
42
|
-
spiral/core/client/__init__.pyi,sha256=
|
|
42
|
+
spiral/core/client/__init__.pyi,sha256=YgDM-MoIt3J-QKxvsfs5gRiaTBtOA6TphbNBIAnrFCw,6956
|
|
43
43
|
spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
|
|
44
44
|
spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
|
|
45
45
|
spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
|
|
@@ -49,18 +49,18 @@ spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNn
|
|
|
49
49
|
spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
|
|
50
50
|
spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
|
|
51
51
|
spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
|
|
52
|
-
spiral/core/table/__init__.pyi,sha256=
|
|
52
|
+
spiral/core/table/__init__.pyi,sha256=h84QDg6hLuPcmRpavx5zOZM77ZCi2-YwIlrrUZJp1sE,4374
|
|
53
53
|
spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
|
|
54
54
|
spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
|
|
55
55
|
spiral/core/table/spec/__init__.pyi,sha256=fVuc2j3uoTdWfYNm720OfUIgrLYw9fRwj44maI5bgdY,5709
|
|
56
56
|
spiral/dataloader.py,sha256=W9siY4BF4p_rwTTSS4KgsaQsPLxxza6XmQhrdBzzMJ8,10592
|
|
57
|
-
spiral/dataset.py,sha256=
|
|
57
|
+
spiral/dataset.py,sha256=S8pdiBXIhwMxQiJYgF7UI_8HkN7pZO798UzlO1LNXy4,8409
|
|
58
58
|
spiral/datetime_.py,sha256=elXaUWtZuuLVcu9E0aXnvYRPB9XWqZbLDToozQYQYjU,950
|
|
59
59
|
spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
61
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
62
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
63
|
-
spiral/enrichment.py,sha256=
|
|
63
|
+
spiral/enrichment.py,sha256=j8CzWJqLRq0Zko-qz3NzicsFOAvdzQSRJ58vdmqxPsk,9879
|
|
64
64
|
spiral/expressions/__init__.py,sha256=ZsD8g7vB0G7xy19GUiH4m79kw7KEkTQRwJl5Gn1cgtw,8049
|
|
65
65
|
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
66
66
|
spiral/expressions/file.py,sha256=7D9jIENJcoT0KFharBLkzK9dZgO4DYn5K_KCt0twefg,518
|
|
@@ -106,4 +106,4 @@ spiral/table.py,sha256=p95AYv6b7e14F3t7j-B-r45k9CtG84ngikdlAhh9WxA,12260
|
|
|
106
106
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
107
107
|
spiral/transaction.py,sha256=bI5oqBAmPMSF0yOOYcPfGbV37Xc1-_V-wQNKw1xOlTA,4136
|
|
108
108
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
109
|
-
pyspiral-0.7.
|
|
109
|
+
pyspiral-0.7.9.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/cli/tables.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import datetime
|
|
2
1
|
from collections.abc import Callable
|
|
3
|
-
from typing import Annotated
|
|
2
|
+
from typing import Annotated
|
|
4
3
|
|
|
5
4
|
import questionary
|
|
6
5
|
import rich
|
|
@@ -61,6 +60,21 @@ def ls(
|
|
|
61
60
|
CONSOLE.print(rich_table)
|
|
62
61
|
|
|
63
62
|
|
|
63
|
+
@app.command(help="Show the leading rows of the table.")
|
|
64
|
+
def head(
|
|
65
|
+
project: ProjectArg,
|
|
66
|
+
table: Annotated[str | None, Option(help="Table name.")] = None,
|
|
67
|
+
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
|
68
|
+
n: Annotated[int, Option("-n", help="Maximum number of rows to show. Defaults to 10.")] = 10,
|
|
69
|
+
):
|
|
70
|
+
import polars as pl
|
|
71
|
+
|
|
72
|
+
_, t = get_table(project, table, dataset)
|
|
73
|
+
|
|
74
|
+
with pl.Config(tbl_rows=-1):
|
|
75
|
+
CONSOLE.print(t.to_polars().limit(n).collect())
|
|
76
|
+
|
|
77
|
+
|
|
64
78
|
def validate_non_empty_str(text: str) -> bool | str:
|
|
65
79
|
if len(text) > 0:
|
|
66
80
|
return True
|
|
@@ -137,40 +151,9 @@ def flush(
|
|
|
137
151
|
project: ProjectArg,
|
|
138
152
|
table: Annotated[str | None, Option(help="Table name.")] = None,
|
|
139
153
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
|
140
|
-
keep: Annotated[
|
|
141
|
-
Literal["1h", "2h", "4h"] | None,
|
|
142
|
-
Option(help="Duration string that indicates how much WAL to keep. Defaults to 24h."),
|
|
143
|
-
] = None,
|
|
144
|
-
full: Annotated[bool, Option(help="Flush full Write-Ahead-Log.")] = False,
|
|
145
154
|
):
|
|
146
|
-
# TODO(marko): Use some human-readable duration parsing library.
|
|
147
|
-
duration = None
|
|
148
|
-
if keep is not None:
|
|
149
|
-
if full:
|
|
150
|
-
raise ValueError("Cannot specify both --keep and --full")
|
|
151
|
-
match keep:
|
|
152
|
-
case "1h":
|
|
153
|
-
duration = datetime.timedelta(hours=1)
|
|
154
|
-
case "2h":
|
|
155
|
-
duration = datetime.timedelta(hours=2)
|
|
156
|
-
case "4h":
|
|
157
|
-
duration = datetime.timedelta(hours=4)
|
|
158
|
-
case _:
|
|
159
|
-
raise ValueError(f"Invalid duration string: {keep}")
|
|
160
|
-
|
|
161
|
-
if full:
|
|
162
|
-
# Warn and wait for confirmation.
|
|
163
|
-
ERR_CONSOLE.print("[bold yellow]Warning: All currently open transaction will fail to commit.[/bold yellow]")
|
|
164
|
-
if not questionary.confirm("Are you sure you want to continue?", default=False).ask(): # pyright: ignore[reportAny]
|
|
165
|
-
ERR_CONSOLE.print("Aborting.")
|
|
166
|
-
raise typer.Exit(1)
|
|
167
|
-
|
|
168
|
-
duration = datetime.timedelta(hours=0)
|
|
169
|
-
|
|
170
|
-
keep_latest_s = int(duration.total_seconds()) if duration is not None else None
|
|
171
|
-
|
|
172
155
|
identifier, t = get_table(project, table, dataset)
|
|
173
|
-
state.spiral.internal.flush_wal(t.core
|
|
156
|
+
state.spiral.internal.flush_wal(t.core) # pyright: ignore[reportPrivateUsage]
|
|
174
157
|
CONSOLE.print(f"Flushed WAL for table {identifier} in project {project}.")
|
|
175
158
|
|
|
176
159
|
|
spiral/core/client/__init__.pyi
CHANGED
spiral/core/table/__init__.pyi
CHANGED
|
@@ -60,6 +60,13 @@ class ScanState:
|
|
|
60
60
|
class MaterializablePlan:
|
|
61
61
|
pass
|
|
62
62
|
|
|
63
|
+
class EvaluatedExecutablePlan:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
class EvaluatedPlanStream:
|
|
67
|
+
def __next__(self) -> EvaluatedExecutablePlan: ...
|
|
68
|
+
def __iter__(self) -> EvaluatedPlanStream: ...
|
|
69
|
+
|
|
63
70
|
class Scan:
|
|
64
71
|
def key_schema(self) -> Schema: ...
|
|
65
72
|
def schema(self) -> Schema: ...
|
|
@@ -90,6 +97,10 @@ class Scan:
|
|
|
90
97
|
# If `infinite` is True, shards are shuffled after exhausted but not before the first pass.
|
|
91
98
|
# Otherwise, shards are not shuffle and shuffle config is only used for shuffle buffer.
|
|
92
99
|
...
|
|
100
|
+
|
|
101
|
+
def evaluate_analyze(
|
|
102
|
+
self, key_table: pa.Table | pa.RecordBatch | None = None, batch_readahead: int | None = None
|
|
103
|
+
) -> EvaluatedPlanStream: ...
|
|
93
104
|
def metrics(self) -> dict[str, Any]: ...
|
|
94
105
|
|
|
95
106
|
class KeySpaceState:
|
spiral/dataset.py
CHANGED
|
@@ -226,7 +226,16 @@ class TableScanner(ds.Scanner):
|
|
|
226
226
|
|
|
227
227
|
def head(self, num_rows: int):
|
|
228
228
|
"""Return the first `num_rows` rows of the dataset."""
|
|
229
|
-
|
|
229
|
+
|
|
230
|
+
kwargs = {}
|
|
231
|
+
if num_rows <= 10_000:
|
|
232
|
+
# We are unlikely to need more than a couple batches
|
|
233
|
+
kwargs["batch_readahead"] = 1
|
|
234
|
+
# The progress bar length is the total number of splits in this dataset. We will likely
|
|
235
|
+
# stop streaming early. As a result, the progress bar is misleading.
|
|
236
|
+
kwargs["hide_progress_bar"] = True
|
|
237
|
+
|
|
238
|
+
reader = self._scan.to_record_batches(key_table=self.key_table, **kwargs)
|
|
230
239
|
batches = []
|
|
231
240
|
row_count = 0
|
|
232
241
|
for batch in reader:
|
spiral/enrichment.py
CHANGED
|
@@ -4,14 +4,14 @@ from functools import partial
|
|
|
4
4
|
from typing import TYPE_CHECKING, Optional
|
|
5
5
|
|
|
6
6
|
from spiral.core.client import Shard
|
|
7
|
-
from spiral.core.table import
|
|
8
|
-
from spiral.core.table.spec import Operation
|
|
7
|
+
from spiral.core.table import KeyRange
|
|
8
|
+
from spiral.core.table.spec import Key, Operation
|
|
9
9
|
from spiral.expressions import Expr
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
import dask.distributed
|
|
13
13
|
|
|
14
|
-
from spiral import KeySpaceIndex, Table
|
|
14
|
+
from spiral import KeySpaceIndex, Scan, Table
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -50,7 +50,7 @@ class Enrichment:
|
|
|
50
50
|
"""The filter expression."""
|
|
51
51
|
return self._where
|
|
52
52
|
|
|
53
|
-
def _scan(self) -> Scan:
|
|
53
|
+
def _scan(self) -> "Scan":
|
|
54
54
|
return self._table.spiral.scan(self._projection, where=self._where)
|
|
55
55
|
|
|
56
56
|
def apply(
|
|
@@ -90,6 +90,7 @@ class Enrichment:
|
|
|
90
90
|
index: Optional["KeySpaceIndex"] = None,
|
|
91
91
|
partition_size_bytes: int | None = None,
|
|
92
92
|
tx_dump: str | None = None,
|
|
93
|
+
checkpoint_dump: str | None = None,
|
|
93
94
|
client: Optional["dask.distributed.Client"] = None,
|
|
94
95
|
**kwargs,
|
|
95
96
|
) -> None:
|
|
@@ -109,6 +110,7 @@ class Enrichment:
|
|
|
109
110
|
partition_size_bytes: The maximum partition size in bytes.
|
|
110
111
|
If not provided, the default partition size is used.
|
|
111
112
|
tx_dump: Optional path to dump the transaction JSON for debugging.
|
|
113
|
+
checkpoint_dump: Optional path to dump intermediate checkpoints for incremental progress.
|
|
112
114
|
client: Optional Dask distributed client. If not provided, a new client will be created
|
|
113
115
|
**kwargs: Additional keyword arguments to pass to `dask.distributed.Client`
|
|
114
116
|
such as `address` to connect to an existing cluster.
|
|
@@ -126,11 +128,23 @@ class Enrichment:
|
|
|
126
128
|
tx = self._table.txn()
|
|
127
129
|
plan_scan = self._scan()
|
|
128
130
|
|
|
129
|
-
# Determine the "tasks".
|
|
130
|
-
shards =
|
|
131
|
-
|
|
131
|
+
# Determine the "tasks".
|
|
132
|
+
shards = None
|
|
133
|
+
# Use checkpoint, if provided.
|
|
134
|
+
if checkpoint_dump is not None:
|
|
135
|
+
checkpoint: list[KeyRange] | None = _checkpoint_load_key_ranges(checkpoint_dump)
|
|
136
|
+
if checkpoint is None:
|
|
137
|
+
logger.info(f"No existing checkpoint found at {checkpoint_dump}. Starting from scratch.")
|
|
138
|
+
else:
|
|
139
|
+
logger.info(f"Resuming enrichment from checkpoint at {checkpoint_dump} with {len(checkpoint)} ranges.")
|
|
140
|
+
shards = [Shard(kr, None) for kr in checkpoint]
|
|
141
|
+
# Fallback to index-based sharding.
|
|
142
|
+
if shards is None and index is not None:
|
|
132
143
|
# TODO(marko): This will use index's asof automatically.
|
|
133
144
|
shards = self._table.spiral.internal.compute_shards(index.core)
|
|
145
|
+
# Fallback to default sharding.
|
|
146
|
+
if shards is None:
|
|
147
|
+
shards = plan_scan.shards()
|
|
134
148
|
|
|
135
149
|
# Partially bind the enrichment function.
|
|
136
150
|
_compute = partial(
|
|
@@ -139,14 +153,28 @@ class Enrichment:
|
|
|
139
153
|
state_json=plan_scan.core.plan_state().to_json(),
|
|
140
154
|
output_table_id=self._table.table_id,
|
|
141
155
|
partition_size_bytes=partition_size_bytes,
|
|
156
|
+
incremental=checkpoint_dump is not None,
|
|
142
157
|
)
|
|
143
158
|
enrichments = client.map(_compute, shards)
|
|
144
159
|
|
|
145
160
|
logger.info(f"Applying enrichment with {len(shards)} shards. Follow progress at {client.dashboard_link}")
|
|
161
|
+
|
|
162
|
+
failed_ranges = []
|
|
146
163
|
for result in client.gather(enrichments):
|
|
147
164
|
result: EnrichmentTaskResult
|
|
165
|
+
|
|
166
|
+
if result.error is not None:
|
|
167
|
+
logger.error(f"Enrichment task failed for range {result.key_range}: {result.error}")
|
|
168
|
+
failed_ranges.append(result.key_range)
|
|
169
|
+
continue
|
|
170
|
+
|
|
148
171
|
tx.include(result.ops)
|
|
149
172
|
|
|
173
|
+
# Dump checkpoint of failed ranges, if any.
|
|
174
|
+
if checkpoint_dump is not None:
|
|
175
|
+
logger.info(f"Dumping checkpoint with {len(failed_ranges)} failed ranges to {checkpoint_dump}.")
|
|
176
|
+
_checkpoint_dump_key_ranges(checkpoint_dump, failed_ranges)
|
|
177
|
+
|
|
150
178
|
if tx.is_empty():
|
|
151
179
|
logger.warning("Transaction not committed. No rows were read for enrichment.")
|
|
152
180
|
return
|
|
@@ -155,20 +183,62 @@ class Enrichment:
|
|
|
155
183
|
tx.commit(compact=True, tx_dump=tx_dump)
|
|
156
184
|
|
|
157
185
|
|
|
186
|
+
def _checkpoint_load_key_ranges(checkpoint_dump: str) -> list[KeyRange] | None:
|
|
187
|
+
import json
|
|
188
|
+
import os
|
|
189
|
+
|
|
190
|
+
if not os.path.exists(checkpoint_dump):
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
with open(checkpoint_dump) as f:
|
|
194
|
+
data = json.load(f)
|
|
195
|
+
return [
|
|
196
|
+
KeyRange(begin=Key(bytes.fromhex(r["begin"])), end=Key(bytes.fromhex(r["end"])))
|
|
197
|
+
for r in data.get("key_ranges", [])
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _checkpoint_dump_key_ranges(checkpoint_dump: str, ranges: list[KeyRange]):
|
|
202
|
+
import json
|
|
203
|
+
import os
|
|
204
|
+
|
|
205
|
+
os.makedirs(os.path.dirname(checkpoint_dump), exist_ok=True)
|
|
206
|
+
with open(checkpoint_dump, "w") as f:
|
|
207
|
+
json.dump(
|
|
208
|
+
{"key_ranges": [{"begin": bytes(r.begin).hex(), "end": bytes(r.end).hex()} for r in ranges]},
|
|
209
|
+
f,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
158
213
|
@dataclasses.dataclass
|
|
159
214
|
class EnrichmentTaskResult:
|
|
215
|
+
key_range: KeyRange
|
|
160
216
|
ops: list[Operation]
|
|
217
|
+
error: str | None = None
|
|
161
218
|
|
|
162
219
|
def __getstate__(self):
|
|
163
|
-
return {
|
|
220
|
+
return {
|
|
221
|
+
"ops": [op.to_json() for op in self.ops],
|
|
222
|
+
"error": self.error,
|
|
223
|
+
"begin": bytes(self.key_range.begin),
|
|
224
|
+
"end": bytes(self.key_range.end),
|
|
225
|
+
}
|
|
164
226
|
|
|
165
227
|
def __setstate__(self, state):
|
|
228
|
+
self.key_range = KeyRange(begin=Key(state["begin"]), end=Key(state["end"]))
|
|
166
229
|
self.ops = [Operation.from_json(op_json) for op_json in state["ops"]]
|
|
230
|
+
self.error = state["error"]
|
|
167
231
|
|
|
168
232
|
|
|
169
233
|
# NOTE(marko): This function must be picklable!
|
|
170
234
|
def _enrichment_task(
|
|
171
|
-
shard: Shard,
|
|
235
|
+
shard: Shard,
|
|
236
|
+
*,
|
|
237
|
+
settings_dict,
|
|
238
|
+
state_json,
|
|
239
|
+
output_table_id,
|
|
240
|
+
partition_size_bytes: int | None,
|
|
241
|
+
incremental: bool,
|
|
172
242
|
) -> EnrichmentTaskResult:
|
|
173
243
|
# Returns operations that can be included in a transaction.
|
|
174
244
|
from spiral import Scan, Spiral
|
|
@@ -182,5 +252,15 @@ def _enrichment_task(
|
|
|
182
252
|
table = sp.table(output_table_id)
|
|
183
253
|
|
|
184
254
|
task_tx = table.txn()
|
|
185
|
-
|
|
186
|
-
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
task_tx.writeback(task_scan, key_range=shard.key_range, partition_size_bytes=partition_size_bytes)
|
|
258
|
+
return EnrichmentTaskResult(key_range=shard.key_range, ops=task_tx.take())
|
|
259
|
+
except Exception as e:
|
|
260
|
+
task_tx.abort()
|
|
261
|
+
|
|
262
|
+
if incremental:
|
|
263
|
+
return EnrichmentTaskResult(key_range=shard.key_range, ops=[], error=str(e))
|
|
264
|
+
|
|
265
|
+
logger.error(f"Enrichment task failed for shard {shard}: {e}")
|
|
266
|
+
raise e
|
|
File without changes
|
|
File without changes
|