pyspiral 0.6.1__cp310-abi3-macosx_11_0_arm64.whl → 0.6.3__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -1,8 +1,8 @@
1
- pyspiral-0.6.1.dist-info/METADATA,sha256=dvU4zmSKjosVkzzsl8kYTMvaVneaplCKGJ0ml4SBRxo,1836
2
- pyspiral-0.6.1.dist-info/WHEEL,sha256=Eg6gwEJKNVa1g53Yg4W5oLzLA6e9MoXTlIiHdDxmtOw,103
3
- pyspiral-0.6.1.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
1
+ pyspiral-0.6.3.dist-info/METADATA,sha256=93TxmaIrXRNq5xlBQAx76ClIGjWdnh9bfL_f6KrB9K0,1836
2
+ pyspiral-0.6.3.dist-info/WHEEL,sha256=Eg6gwEJKNVa1g53Yg4W5oLzLA6e9MoXTlIiHdDxmtOw,103
3
+ pyspiral-0.6.3.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
4
  spiral/__init__.py,sha256=5c0faqg-kHZBDwriQ7LzLAMcFolIucp-IA1EzNvCZ3k,711
5
- spiral/_lib.abi3.so,sha256=PBqFvphQEfKc4zSGihmAZIq_t1uO1EvVJYzZXz1P5dI,62642240
5
+ spiral/_lib.abi3.so,sha256=mUXCHvNLIOssJZROGFsmyfHMqOeuLEmUXjgV1_8HORk,63655744
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -39,8 +39,8 @@ spiral/client.py,sha256=Po9xgCH3NwVsCeRZMm3eJUPV77Rknyj-9MfCS1TbdTg,6623
39
39
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  spiral/core/authn/__init__.pyi,sha256=Jw_8ywTMDTwgAtGxMtFED63rU0jOgrv-eZtaZ5sR5t4,757
41
41
  spiral/core/client/__init__.pyi,sha256=iEhZgbySG5LScfrtkiiHW1iHghgehsrVmPP-v5Pv_vk,5740
42
- spiral/core/table/__init__.pyi,sha256=_N9JyGoJXcbJWXjZal0HSSxNbbMptrqKZJHj5GA_mQg,2974
43
- spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
42
+ spiral/core/table/__init__.pyi,sha256=sjjShdgM_Uh8Roou1k02MnrqYpdAX4QuyRlIRlnyp1M,3073
43
+ spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
44
44
  spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
45
45
  spiral/core/table/spec/__init__.pyi,sha256=0NyGeyEhV_ebwKWVU3sqSvdF2D9v8kEVwo6wYAHF99M,5579
46
46
  spiral/dataset.py,sha256=NNqG-oOrhbmNC2OMZ9AYAm4YkwwBozeRI6zXtz4cspA,8008
@@ -48,7 +48,7 @@ spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
48
48
  spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  spiral/debug/manifests.py,sha256=oaPB4534pQdqvPXCZetVNSvvhpdXTrv_1pN-_bAkeAo,2893
50
50
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
51
- spiral/debug/scan.py,sha256=9bMmVQFs5M6Rldm0fmrmmvn9LbSSTKBV5tIu37mEn78,8938
51
+ spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
52
52
  spiral/expressions/__init__.py,sha256=T8PIb0_UB9kynK0dpWbUD4No5lKRTG-wKnao8xOcXjY,6381
53
53
  spiral/expressions/base.py,sha256=OOUDrbkLBE0lSkAmM-6FP2F2N8zhN_in3S_UDrWLDeQ,4805
54
54
  spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
@@ -90,10 +90,10 @@ spiral/settings.py,sha256=Nap68xM-1ZvF3yDhkyRnNDIAVMIgxmIksglg_1iT0-0,3069
90
90
  spiral/snapshot.py,sha256=_l2wrqUXz2RARjIDxOWw4aQpegJohvggIoWuCllzStA,1506
91
91
  spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
92
92
  spiral/streaming_/reader.py,sha256=Kpqknv2jn12jUhHOEEDArj0JZwrWb8XjoOGs9HrdVyA,4047
93
- spiral/streaming_/stream.py,sha256=xFTtGB6CspEKstzBeyyaOeOR3KDiJc21m07ZpD1AXZQ,5669
93
+ spiral/streaming_/stream.py,sha256=nxJEisPfZ2-Ebkm83hz_3v8NH27FxBku-1jw7UDlQuM,5881
94
94
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
95
- spiral/table.py,sha256=knJjSCPrFLXmjCbUzslGZ7avH0V_Z0o-rsctWDzmGK4,9821
95
+ spiral/table.py,sha256=ZQFq5tuovDjQcpi38b5FUMuHNGI5XV0MnZbC6vbza1o,10312
96
96
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
97
- spiral/transaction.py,sha256=O3vSaTc7zpeC5qbqnj-VWKwK6rrp_mYV2JuPHp2ZJ80,1464
97
+ spiral/transaction.py,sha256=nSykH4UGs9hGtWuSWK9YyT9jfEuvzfkKoUgMM5Xt4zU,1841
98
98
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
99
- pyspiral-0.6.1.dist-info/RECORD,,
99
+ pyspiral-0.6.3.dist-info/RECORD,,
spiral/_lib.abi3.so CHANGED
Binary file
@@ -77,6 +77,7 @@ class Scan:
77
77
  output_path: str,
78
78
  key_range: KeyRange,
79
79
  expected_cardinality: int | None = None,
80
+ shard_row_block_size: int = 8192,
80
81
  ) -> None: ...
81
82
 
82
83
  class KeySpaceState:
@@ -94,6 +95,7 @@ class Transaction:
94
95
  status: str
95
96
 
96
97
  def write(self, expr: Expr, *, partition_size_bytes: int | None = None): ...
98
+ def drop_columns(self, column_paths: list[str]): ...
97
99
  def commit(self): ...
98
100
  def abort(self): ...
99
101
  def metrics(self) -> dict[str, Any]: ...
@@ -8,7 +8,7 @@ class FragmentManifest:
8
8
  def __getitem__(self, idx: int): ...
9
9
  def to_arrow(self) -> pa.RecordBatchReader: ...
10
10
  @staticmethod
11
- def compute_schema(data_schema: pa.Schema) -> pa.Schema: ...
11
+ def compute_schema() -> pa.Schema: ...
12
12
  @staticmethod
13
13
  def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
14
14
  @staticmethod
spiral/debug/scan.py CHANGED
@@ -146,14 +146,32 @@ def _get_fragment_color(manifest_file: FragmentFile, color_index, total_colors):
146
146
  return cm.viridis(color_index / total_colors)
147
147
 
148
148
 
149
+ def _get_human_size(size_bytes: int) -> str:
150
+ # Convert bytes to a human-readable format
151
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
152
+ if size_bytes < 1024:
153
+ return f"{size_bytes:.2f} {unit}"
154
+ size_bytes /= 1024
155
+ return f"{size_bytes:.2f} PB"
156
+
157
+
158
+ def _maybe_truncate(text, max_length: int = 30) -> str:
159
+ text = str(text)
160
+ if len(text) <= max_length:
161
+ return text
162
+
163
+ half_length = (max_length - 3) // 2
164
+ return text[:half_length] + "..." + text[-half_length:]
165
+
166
+
149
167
  def _get_fragment_legend(manifest_file: FragmentFile):
150
168
  return "\n".join(
151
169
  [
152
170
  f"id: {manifest_file.id}",
153
- f"size: {manifest_file.size_bytes:,} bytes",
171
+ f"size: {_get_human_size(manifest_file.size_bytes)} ({manifest_file.size_bytes} bytes)",
154
172
  f"key_span: {manifest_file.key_span}",
155
- f"key_min: {manifest_file.key_extent.min}",
156
- f"key_max: {manifest_file.key_extent.max}",
173
+ f"key_min: {_maybe_truncate(manifest_file.key_extent.min)}",
174
+ f"key_max: {_maybe_truncate(manifest_file.key_extent.max)}",
157
175
  f"format: {manifest_file.format}",
158
176
  f"level: {manifest_file.level}",
159
177
  f"committed_at: {_format_timestamp(manifest_file.committed_at)}",
@@ -24,10 +24,13 @@ class SpiralStream:
24
24
  Stream can be passed to MDS's StreamingDataset in `streams` argument.
25
25
  """
26
26
 
27
- def __init__(self, scan: CoreScan, shards: list[Shard], cache_dir: str | None = None):
27
+ def __init__(
28
+ self, scan: CoreScan, shards: list[Shard], cache_dir: str | None = None, shard_row_block_size: int = 8192
29
+ ):
28
30
  self._scan = scan
29
31
  # TODO(marko): Read shards only on world.is_local_leader in `get_shards` and materialize on disk.
30
32
  self._shards = shards
33
+ self.shard_row_block_size = shard_row_block_size
31
34
 
32
35
  if cache_dir is not None:
33
36
  if not os.path.exists(cache_dir):
@@ -92,7 +95,12 @@ class SpiralStream:
92
95
  return 0
93
96
 
94
97
  # This method exists but it's hidden.
95
- self._scan._prepare_shard(shard_path, shard.shard.key_range, expected_cardinality=shard.shard.cardinality)
98
+ self._scan._prepare_shard(
99
+ shard_path,
100
+ shard.shard.key_range,
101
+ expected_cardinality=shard.shard.cardinality,
102
+ shard_row_block_size=self.shard_row_block_size,
103
+ )
96
104
 
97
105
  # Get the size of the file on disk.
98
106
  stat = os.stat(shard_path)
spiral/table.py CHANGED
@@ -109,6 +109,17 @@ class Table(Expr):
109
109
  partition_size_bytes=partition_size_bytes,
110
110
  )
111
111
 
112
+ def drop_columns(self, column_paths: list[str]) -> None:
113
+ """
114
+ Drops the specified columns from the table.
115
+
116
+
117
+ :param column_paths: Fully qualified column names. (e.g., "column_name" or "nested.field").
118
+ All columns must exist, if a a column doesn't exist the function will return an error.
119
+ """
120
+ with self.txn() as txn:
121
+ txn.drop_columns(column_paths)
122
+
112
123
  def snapshot(self, asof: datetime | int | None = None) -> Snapshot:
113
124
  """Returns a snapshot of the table at the given timestamp."""
114
125
  if isinstance(asof, datetime):
@@ -185,6 +196,7 @@ class Table(Expr):
185
196
  batch_size: int | None = None,
186
197
  cache_dir: str | None = None,
187
198
  cache_limit: int | str | None = None,
199
+ predownload: int | None = None,
188
200
  sampling_method: str = "balanced",
189
201
  sampling_granularity: int = 1,
190
202
  partition_algo: str = "relaxed",
@@ -215,6 +227,7 @@ class Table(Expr):
215
227
  streams=[stream],
216
228
  batch_size=batch_size,
217
229
  cache_limit=cache_limit,
230
+ predownload=predownload,
218
231
  sampling_method=sampling_method,
219
232
  sampling_granularity=sampling_granularity,
220
233
  partition_algo=partition_algo,
spiral/transaction.py CHANGED
@@ -39,6 +39,16 @@ class Transaction:
39
39
 
40
40
  self._core.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
41
41
 
42
+ def drop_columns(self, column_paths: list[str]):
43
+ """
44
+ Drops the specified columns from the table.
45
+
46
+
47
+ :param column_paths: Fully qualified column names. (e.g., "column_name" or "nested.field").
48
+ All columns must exist, if a a column doesn't exist the function will return an error.
49
+ """
50
+ self._core.drop_columns(column_paths)
51
+
42
52
  def commit(self):
43
53
  """Commit the transaction."""
44
54
  self._core.commit()