pyspiral 0.6.18__cp312-abi3-macosx_11_0_arm64.whl → 0.6.20__cp312-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyspiral might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.6.18
3
+ Version: 0.6.20
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -1,8 +1,8 @@
1
- pyspiral-0.6.18.dist-info/METADATA,sha256=25UcwQPTisubAD48g4I_FVeNQ7PhcM5NFyE1loBsGxo,1875
2
- pyspiral-0.6.18.dist-info/WHEEL,sha256=KQvxBiy7GLcML6Ad3w_ZPrgSvER1uXd7aYb6wy6b44Y,103
3
- pyspiral-0.6.18.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
1
+ pyspiral-0.6.20.dist-info/METADATA,sha256=1lIl3Ge03ZbmZSes7OFOIAreoQnR8jrYm7je4lmXVS4,1875
2
+ pyspiral-0.6.20.dist-info/WHEEL,sha256=KQvxBiy7GLcML6Ad3w_ZPrgSvER1uXd7aYb6wy6b44Y,103
3
+ pyspiral-0.6.20.dist-info/entry_points.txt,sha256=R96Y3FpYX6XbQu9qMPfUTgiCcf4qM9OBQQZTDdBkZwA,74
4
4
  spiral/__init__.py,sha256=gAysTwG_oEeKVMdCOfOzDhl0bM2miiK8Ds2vvUihBWw,1153
5
- spiral/_lib.abi3.so,sha256=pLhJoDJIT_f0Ibvp2FwaTnI6VWXBdb-GPABakjheHHM,70263280
5
+ spiral/_lib.abi3.so,sha256=OjvPM5bRKKcb2gqVePTP3-DLnCQvLkv_ZrZBGdgQ-sQ,70404048
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -49,7 +49,7 @@ spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNn
49
49
  spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
50
50
  spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
51
51
  spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
52
- spiral/core/table/__init__.pyi,sha256=YBL12_JPTWz2mNbqlDqbT1exxVJYzwfXdHCi6Z37JxA,3841
52
+ spiral/core/table/__init__.pyi,sha256=szCtZqZ_L0vF_99x7d3olwazJdEn5LwkgRK-8QEOzrI,3914
53
53
  spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
54
54
  spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
55
55
  spiral/core/table/spec/__init__.pyi,sha256=fVuc2j3uoTdWfYNm720OfUIgrLYw9fRwj44maI5bgdY,5709
@@ -60,7 +60,7 @@ spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
61
61
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
62
62
  spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
63
- spiral/enrichment.py,sha256=YDaXcJPtmJzpLrYmn2pdllVcRIkXlb578KKgkIb38Eo,6518
63
+ spiral/enrichment.py,sha256=aXkKgV24w_0XmhTel_SOshJ2xCgkmXsFGZvaE7yXQ8k,6075
64
64
  spiral/expressions/__init__.py,sha256=vMNFeeozkWph3dBpEkHPThUhZdT9ZZzxHe71HnkWlDU,8020
65
65
  spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
66
66
  spiral/expressions/file.py,sha256=HRzGjc3goIlUlKjysoirexDaflNdnj9OoZ6j2uTKZnA,388
@@ -94,7 +94,7 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
94
94
  spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
95
  spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
96
96
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
- spiral/scan.py,sha256=csbk5ePbU-RlEVIF7isccF2zRBB8L8ZY_HEpalMjgLY,12340
97
+ spiral/scan.py,sha256=0ZgzPX5gZ3uvbCyifpjHKXYk_ERphf9xHbrOVKRNkXc,12574
98
98
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
99
99
  spiral/settings.py,sha256=sUhMMBCXaPvUYztN_gztD9TjeUYJwVeEcJrq4FLy6M0,3232
100
100
  spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
@@ -104,6 +104,6 @@ spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5
104
104
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
105
105
  spiral/table.py,sha256=prjDBcm6Qerdq3ypXzfbXb7ngAcO0j-Z9aTeZvzKoqs,12209
106
106
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
107
- spiral/transaction.py,sha256=hQm6DfCklMDpIYJ9qA2wR45cCuUPGCiJy1tHGE3AsEY,3418
107
+ spiral/transaction.py,sha256=KQhx3DvQyxG2C8md-YGsF_PgBRfayI0r_7ebMItDHdI,3938
108
108
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
109
- pyspiral-0.6.18.dist-info/RECORD,,
109
+ pyspiral-0.6.20.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  spiral=spiral.cli.app:main
3
+ pyspiral=spiral.cli.app:main
spiral/_lib.abi3.so CHANGED
Binary file
@@ -73,6 +73,7 @@ class Scan:
73
73
  key_range: KeyRange | None = None,
74
74
  key_table: pa.Table | pa.RecordBatch | None = None,
75
75
  batch_readahead: int | None = None,
76
+ progress: bool = True,
76
77
  ) -> pa.RecordBatchReader: ...
77
78
  def to_shuffled_record_batches(
78
79
  self,
@@ -111,6 +112,7 @@ class Transaction:
111
112
  batch_readahead: int | None = None,
112
113
  ): ...
113
114
  def drop_columns(self, column_paths: list[str]): ...
115
+ def ops(self) -> list[Operation]: ...
114
116
  def take(self) -> list[Operation]: ...
115
117
  def include(self, ops: list[Operation]): ...
116
118
  def commit(self): ...
spiral/enrichment.py CHANGED
@@ -86,7 +86,6 @@ class Enrichment:
86
86
  If not provided, the table's default sharding will be used.
87
87
  partition_size_bytes: The maximum partition size in bytes.
88
88
  If not provided, the default partition size is used.
89
- tx_dump: Optional path to dump the transaction operations as a JSON file for debugging.
90
89
  **kwargs: Additional keyword arguments to pass to `dask.distributed.Client`
91
90
  such as `address` to connect to an existing cluster.
92
91
  """
@@ -100,7 +99,6 @@ class Enrichment:
100
99
 
101
100
  # Start a transaction BEFORE the planning scan.
102
101
  tx = self._table.txn()
103
- backup_ops = []
104
102
  plan_scan = self._table.spiral.scan(self._projection, where=self._where)
105
103
 
106
104
  # Determine the "tasks". Use the index if provided.
@@ -123,19 +121,12 @@ class Enrichment:
123
121
  for result in dask_client.gather(enrichments):
124
122
  result: EnrichmentTaskResult
125
123
  tx.include(result.ops)
126
- backup_ops.extend(result.ops)
127
124
 
128
125
  if tx.is_empty():
129
126
  logger.warning("Transaction not committed. No rows were read for enrichment.")
130
127
  return
131
128
 
132
- # TODO(marko): We can remove this when I have more trust in very large tx commits.
133
- if tx_dump is not None:
134
- with open(tx_dump, "w") as f:
135
- f.writelines([op.to_json() for op in backup_ops])
136
- logger.info(f"Transaction dumped to {tx_dump}")
137
-
138
- tx.commit()
129
+ tx.commit(tx_dump=tx_dump)
139
130
 
140
131
 
141
132
  @dataclasses.dataclass
spiral/scan.py CHANGED
@@ -58,6 +58,7 @@ class Scan:
58
58
  key_table: pa.Table | pa.RecordBatchReader | None = None,
59
59
  batch_size: int | None = None,
60
60
  batch_readahead: int | None = None,
61
+ hide_progress_bar: bool = False,
61
62
  ) -> pa.RecordBatchReader:
62
63
  """Read as a stream of RecordBatches.
63
64
 
@@ -71,6 +72,7 @@ class Scan:
71
72
  IMPORTANT: This is currently only respected when the key_table is used. If key table is a
72
73
  RecordBatchReader, the batch_size argument must be None, and the existing batching is respected.
73
74
  batch_readahead: the number of batches to prefetch in the background.
75
+ hide_progress_bar: If True, disables the progress bar during reading.
74
76
  """
75
77
  if key_range is not None and key_table is not None:
76
78
  raise ValueError("Only one of key_range or key_table can be provided.")
@@ -83,7 +85,9 @@ class Scan:
83
85
  elif isinstance(key_table, pa.Table):
84
86
  key_table = key_table.to_reader(max_chunksize=batch_size)
85
87
 
86
- return self.core.to_record_batches(key_range=key_range, key_table=key_table, batch_readahead=batch_readahead)
88
+ return self.core.to_record_batches(
89
+ key_range=key_range, key_table=key_table, batch_readahead=batch_readahead, progress=(not hide_progress_bar)
90
+ )
87
91
 
88
92
  def to_table(
89
93
  self,
@@ -325,4 +329,4 @@ def _read_shard_task(shard: Shard, *, settings_dict, state_json) -> "pd.DataFram
325
329
  state = ScanState.from_json(state_json)
326
330
  task_scan = Scan(sp, sp.core.load_scan(state))
327
331
 
328
- return task_scan.to_pandas(key_range=shard.key_range)
332
+ return task_scan.to_record_batches(key_range=shard.key_range, hide_progress_bar=True).read_all().to_pandas()
spiral/transaction.py CHANGED
@@ -1,9 +1,13 @@
1
+ import logging
2
+
1
3
  from spiral.core.table import KeyRange
2
4
  from spiral.core.table import Transaction as CoreTransaction
3
5
  from spiral.core.table.spec import Operation
4
6
  from spiral.expressions.base import ExprLike
5
7
  from spiral.scan import Scan
6
8
 
9
+ logger = logging.getLogger(__name__)
10
+
7
11
 
8
12
  class Transaction:
9
13
  """Spiral table transaction.
@@ -91,8 +95,17 @@ class Transaction:
91
95
  """
92
96
  self._core.include(ops)
93
97
 
94
- def commit(self):
98
+ def commit(self, *, tx_dump: str | None = None):
95
99
  """Commit the transaction."""
100
+ # TODO(marko): We can remove this when I have more trust in large tx commits.
101
+ if tx_dump is not None:
102
+ try:
103
+ with open(tx_dump, "w") as f:
104
+ f.writelines([op.to_json() for op in self._core.ops()])
105
+ logger.info(f"Transaction dumped to {tx_dump}")
106
+ except Exception as e:
107
+ logger.error(f"Failed to dump transaction to {tx_dump}: {e}")
108
+
96
109
  self._core.commit()
97
110
 
98
111
  def abort(self):