pyspiral 0.6.18__cp312-abi3-manylinux_2_28_aarch64.whl → 0.6.20__cp312-abi3-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyspiral might be problematic. Click here for more details.
- {pyspiral-0.6.18.dist-info → pyspiral-0.6.20.dist-info}/METADATA +1 -1
- {pyspiral-0.6.18.dist-info → pyspiral-0.6.20.dist-info}/RECORD +9 -9
- {pyspiral-0.6.18.dist-info → pyspiral-0.6.20.dist-info}/entry_points.txt +1 -0
- spiral/_lib.abi3.so +0 -0
- spiral/core/table/__init__.pyi +2 -0
- spiral/enrichment.py +1 -10
- spiral/scan.py +6 -2
- spiral/transaction.py +14 -1
- {pyspiral-0.6.18.dist-info → pyspiral-0.6.20.dist-info}/WHEEL +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.6.
|
|
2
|
-
pyspiral-0.6.
|
|
3
|
-
pyspiral-0.6.
|
|
1
|
+
pyspiral-0.6.20.dist-info/METADATA,sha256=1lIl3Ge03ZbmZSes7OFOIAreoQnR8jrYm7je4lmXVS4,1875
|
|
2
|
+
pyspiral-0.6.20.dist-info/WHEEL,sha256=I5JYpyYzeAl2SOerY_wvkm-HJti0rDQc6zMeJs35MpM,108
|
|
3
|
+
pyspiral-0.6.20.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
|
|
4
4
|
spiral/__init__.py,sha256=gAysTwG_oEeKVMdCOfOzDhl0bM2miiK8Ds2vvUihBWw,1153
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
5
|
+
spiral/_lib.abi3.so,sha256=vBtNqnSXnFcNs6fbPXs-P9i9H5DHOQd7ZWNtXckf_qw,61320232
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -49,7 +49,7 @@ spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNn
|
|
|
49
49
|
spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
|
|
50
50
|
spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
|
|
51
51
|
spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
|
|
52
|
-
spiral/core/table/__init__.pyi,sha256=
|
|
52
|
+
spiral/core/table/__init__.pyi,sha256=szCtZqZ_L0vF_99x7d3olwazJdEn5LwkgRK-8QEOzrI,3914
|
|
53
53
|
spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
|
|
54
54
|
spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
|
|
55
55
|
spiral/core/table/spec/__init__.pyi,sha256=fVuc2j3uoTdWfYNm720OfUIgrLYw9fRwj44maI5bgdY,5709
|
|
@@ -60,7 +60,7 @@ spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
60
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
61
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
62
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
63
|
-
spiral/enrichment.py,sha256=
|
|
63
|
+
spiral/enrichment.py,sha256=aXkKgV24w_0XmhTel_SOshJ2xCgkmXsFGZvaE7yXQ8k,6075
|
|
64
64
|
spiral/expressions/__init__.py,sha256=vMNFeeozkWph3dBpEkHPThUhZdT9ZZzxHe71HnkWlDU,8020
|
|
65
65
|
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
66
66
|
spiral/expressions/file.py,sha256=HRzGjc3goIlUlKjysoirexDaflNdnj9OoZ6j2uTKZnA,388
|
|
@@ -94,7 +94,7 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
|
|
|
94
94
|
spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
|
96
96
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
|
-
spiral/scan.py,sha256=
|
|
97
|
+
spiral/scan.py,sha256=0ZgzPX5gZ3uvbCyifpjHKXYk_ERphf9xHbrOVKRNkXc,12574
|
|
98
98
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
|
99
99
|
spiral/settings.py,sha256=sUhMMBCXaPvUYztN_gztD9TjeUYJwVeEcJrq4FLy6M0,3232
|
|
100
100
|
spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
|
|
@@ -104,6 +104,6 @@ spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5
|
|
|
104
104
|
spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
|
|
105
105
|
spiral/table.py,sha256=prjDBcm6Qerdq3ypXzfbXb7ngAcO0j-Z9aTeZvzKoqs,12209
|
|
106
106
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
107
|
-
spiral/transaction.py,sha256=
|
|
107
|
+
spiral/transaction.py,sha256=KQhx3DvQyxG2C8md-YGsF_PgBRfayI0r_7ebMItDHdI,3938
|
|
108
108
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
109
|
-
pyspiral-0.6.
|
|
109
|
+
pyspiral-0.6.20.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/core/table/__init__.pyi
CHANGED
|
@@ -73,6 +73,7 @@ class Scan:
|
|
|
73
73
|
key_range: KeyRange | None = None,
|
|
74
74
|
key_table: pa.Table | pa.RecordBatch | None = None,
|
|
75
75
|
batch_readahead: int | None = None,
|
|
76
|
+
progress: bool = True,
|
|
76
77
|
) -> pa.RecordBatchReader: ...
|
|
77
78
|
def to_shuffled_record_batches(
|
|
78
79
|
self,
|
|
@@ -111,6 +112,7 @@ class Transaction:
|
|
|
111
112
|
batch_readahead: int | None = None,
|
|
112
113
|
): ...
|
|
113
114
|
def drop_columns(self, column_paths: list[str]): ...
|
|
115
|
+
def ops(self) -> list[Operation]: ...
|
|
114
116
|
def take(self) -> list[Operation]: ...
|
|
115
117
|
def include(self, ops: list[Operation]): ...
|
|
116
118
|
def commit(self): ...
|
spiral/enrichment.py
CHANGED
|
@@ -86,7 +86,6 @@ class Enrichment:
|
|
|
86
86
|
If not provided, the table's default sharding will be used.
|
|
87
87
|
partition_size_bytes: The maximum partition size in bytes.
|
|
88
88
|
If not provided, the default partition size is used.
|
|
89
|
-
tx_dump: Optional path to dump the transaction operations as a JSON file for debugging.
|
|
90
89
|
**kwargs: Additional keyword arguments to pass to `dask.distributed.Client`
|
|
91
90
|
such as `address` to connect to an existing cluster.
|
|
92
91
|
"""
|
|
@@ -100,7 +99,6 @@ class Enrichment:
|
|
|
100
99
|
|
|
101
100
|
# Start a transaction BEFORE the planning scan.
|
|
102
101
|
tx = self._table.txn()
|
|
103
|
-
backup_ops = []
|
|
104
102
|
plan_scan = self._table.spiral.scan(self._projection, where=self._where)
|
|
105
103
|
|
|
106
104
|
# Determine the "tasks". Use the index if provided.
|
|
@@ -123,19 +121,12 @@ class Enrichment:
|
|
|
123
121
|
for result in dask_client.gather(enrichments):
|
|
124
122
|
result: EnrichmentTaskResult
|
|
125
123
|
tx.include(result.ops)
|
|
126
|
-
backup_ops.extend(result.ops)
|
|
127
124
|
|
|
128
125
|
if tx.is_empty():
|
|
129
126
|
logger.warning("Transaction not committed. No rows were read for enrichment.")
|
|
130
127
|
return
|
|
131
128
|
|
|
132
|
-
|
|
133
|
-
if tx_dump is not None:
|
|
134
|
-
with open(tx_dump, "w") as f:
|
|
135
|
-
f.writelines([op.to_json() for op in backup_ops])
|
|
136
|
-
logger.info(f"Transaction dumped to {tx_dump}")
|
|
137
|
-
|
|
138
|
-
tx.commit()
|
|
129
|
+
tx.commit(tx_dump=tx_dump)
|
|
139
130
|
|
|
140
131
|
|
|
141
132
|
@dataclasses.dataclass
|
spiral/scan.py
CHANGED
|
@@ -58,6 +58,7 @@ class Scan:
|
|
|
58
58
|
key_table: pa.Table | pa.RecordBatchReader | None = None,
|
|
59
59
|
batch_size: int | None = None,
|
|
60
60
|
batch_readahead: int | None = None,
|
|
61
|
+
hide_progress_bar: bool = False,
|
|
61
62
|
) -> pa.RecordBatchReader:
|
|
62
63
|
"""Read as a stream of RecordBatches.
|
|
63
64
|
|
|
@@ -71,6 +72,7 @@ class Scan:
|
|
|
71
72
|
IMPORTANT: This is currently only respected when the key_table is used. If key table is a
|
|
72
73
|
RecordBatchReader, the batch_size argument must be None, and the existing batching is respected.
|
|
73
74
|
batch_readahead: the number of batches to prefetch in the background.
|
|
75
|
+
hide_progress_bar: If True, disables the progress bar during reading.
|
|
74
76
|
"""
|
|
75
77
|
if key_range is not None and key_table is not None:
|
|
76
78
|
raise ValueError("Only one of key_range or key_table can be provided.")
|
|
@@ -83,7 +85,9 @@ class Scan:
|
|
|
83
85
|
elif isinstance(key_table, pa.Table):
|
|
84
86
|
key_table = key_table.to_reader(max_chunksize=batch_size)
|
|
85
87
|
|
|
86
|
-
return self.core.to_record_batches(
|
|
88
|
+
return self.core.to_record_batches(
|
|
89
|
+
key_range=key_range, key_table=key_table, batch_readahead=batch_readahead, progress=(not hide_progress_bar)
|
|
90
|
+
)
|
|
87
91
|
|
|
88
92
|
def to_table(
|
|
89
93
|
self,
|
|
@@ -325,4 +329,4 @@ def _read_shard_task(shard: Shard, *, settings_dict, state_json) -> "pd.DataFram
|
|
|
325
329
|
state = ScanState.from_json(state_json)
|
|
326
330
|
task_scan = Scan(sp, sp.core.load_scan(state))
|
|
327
331
|
|
|
328
|
-
return task_scan.
|
|
332
|
+
return task_scan.to_record_batches(key_range=shard.key_range, hide_progress_bar=True).read_all().to_pandas()
|
spiral/transaction.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
1
3
|
from spiral.core.table import KeyRange
|
|
2
4
|
from spiral.core.table import Transaction as CoreTransaction
|
|
3
5
|
from spiral.core.table.spec import Operation
|
|
4
6
|
from spiral.expressions.base import ExprLike
|
|
5
7
|
from spiral.scan import Scan
|
|
6
8
|
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
7
11
|
|
|
8
12
|
class Transaction:
|
|
9
13
|
"""Spiral table transaction.
|
|
@@ -91,8 +95,17 @@ class Transaction:
|
|
|
91
95
|
"""
|
|
92
96
|
self._core.include(ops)
|
|
93
97
|
|
|
94
|
-
def commit(self):
|
|
98
|
+
def commit(self, *, tx_dump: str | None = None):
|
|
95
99
|
"""Commit the transaction."""
|
|
100
|
+
# TODO(marko): We can remove this when I have more trust in large tx commits.
|
|
101
|
+
if tx_dump is not None:
|
|
102
|
+
try:
|
|
103
|
+
with open(tx_dump, "w") as f:
|
|
104
|
+
f.writelines([op.to_json() for op in self._core.ops()])
|
|
105
|
+
logger.info(f"Transaction dumped to {tx_dump}")
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error(f"Failed to dump transaction to {tx_dump}: {e}")
|
|
108
|
+
|
|
96
109
|
self._core.commit()
|
|
97
110
|
|
|
98
111
|
def abort(self):
|
|
File without changes
|