pyspiral 0.6.0__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.6.2__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -1,8 +1,8 @@
1
- pyspiral-0.6.0.dist-info/METADATA,sha256=ETkF1eW1JfgWeoRKxgF7eDX0K9LWDOOixTO8mWW7GlA,1836
2
- pyspiral-0.6.0.dist-info/WHEEL,sha256=PxcKzGLVtZeSnGJDErQ-Emkn2AvBXbmzIogfnaf7-q8,130
3
- pyspiral-0.6.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
- spiral/__init__.py,sha256=iAicRWWphlRNKjIS_BFTSqIweCLwJTJTbyWF0BvqMLY,667
5
- spiral/_lib.abi3.so,sha256=SbX-oo3QcW2lvfAkklKzK8dqE_iy2cL7uiyP_2y5m4E,54872904
1
+ pyspiral-0.6.2.dist-info/METADATA,sha256=n0a5SuYMybj-eUwB9No9IFxSk9Cn_pDZfjpx0HuDxRw,1836
2
+ pyspiral-0.6.2.dist-info/WHEEL,sha256=PxcKzGLVtZeSnGJDErQ-Emkn2AvBXbmzIogfnaf7-q8,130
3
+ pyspiral-0.6.2.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
+ spiral/__init__.py,sha256=5c0faqg-kHZBDwriQ7LzLAMcFolIucp-IA1EzNvCZ3k,711
5
+ spiral/_lib.abi3.so,sha256=oOsM8f904rj2leVKR8cCOkLQ7uxcf36tKVIN0bCVOok,55184120
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -24,13 +24,13 @@ spiral/cli/app.py,sha256=HWCjMJLzSz_JaiLF046jzC9A4-yvzS6506D3cOR2Vgc,1773
24
24
  spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
25
25
  spiral/cli/fs.py,sha256=UREIJhjr6MfIdcKK7pjUKICd0wsQULhQiWRVWUnQ0dc,4376
26
26
  spiral/cli/iceberg.py,sha256=Q14tcGcn1LixbFCYP0GhfYwFFXTmmi8tqBPYwalJEyE,3248
27
- spiral/cli/key_spaces.py,sha256=EEgn7Zjc16CkeQO-4vWdwEqCTddTMiUAdLh4vG4AoYk,2218
27
+ spiral/cli/key_spaces.py,sha256=x3IFRP5d47pKiAHeWExYMOBaT2TwxbWjVM01SUqKrwI,2943
28
28
  spiral/cli/login.py,sha256=TgTr37ImgG1NKN8VbtqkxVAYaZFpMXMwPAb23gVldEw,649
29
29
  spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
30
30
  spiral/cli/printer.py,sha256=HcvSUpaMItzmhBUfIHROK1Z3SL8J8wDopS3Qo8H00uw,1781
31
31
  spiral/cli/projects.py,sha256=UYrBlLcFacuXExdLX1sZByfvkz9MRtk_0oRAZvqHa0w,5105
32
32
  spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
33
- spiral/cli/tables.py,sha256=8-9ay0mXS1Ew7DMoYFfHqC-Ro0TWsOTTinusS7M1slE,4639
33
+ spiral/cli/tables.py,sha256=48lZ0wPQSCTul1vn-Qx6Be5eGnw75Abtw2zxMK9dCPg,4613
34
34
  spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
35
35
  spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
36
36
  spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
@@ -38,15 +38,15 @@ spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
38
38
  spiral/client.py,sha256=Po9xgCH3NwVsCeRZMm3eJUPV77Rknyj-9MfCS1TbdTg,6623
39
39
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  spiral/core/authn/__init__.pyi,sha256=Jw_8ywTMDTwgAtGxMtFED63rU0jOgrv-eZtaZ5sR5t4,757
41
- spiral/core/client/__init__.pyi,sha256=uONPrQbKvlNjnIDLT7c0wG9GMWwNveRd6aHJu6NuQ74,5228
42
- spiral/core/table/__init__.pyi,sha256=uZHXdm160fNAAoz3jnFPtbZl8EFEyLwS3wo0r7jEMOo,3807
43
- spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
41
+ spiral/core/client/__init__.pyi,sha256=iEhZgbySG5LScfrtkiiHW1iHghgehsrVmPP-v5Pv_vk,5740
42
+ spiral/core/table/__init__.pyi,sha256=sjjShdgM_Uh8Roou1k02MnrqYpdAX4QuyRlIRlnyp1M,3073
43
+ spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
44
44
  spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
45
45
  spiral/core/table/spec/__init__.pyi,sha256=0NyGeyEhV_ebwKWVU3sqSvdF2D9v8kEVwo6wYAHF99M,5579
46
46
  spiral/dataset.py,sha256=NNqG-oOrhbmNC2OMZ9AYAm4YkwwBozeRI6zXtz4cspA,8008
47
47
  spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
48
48
  spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- spiral/debug/manifests.py,sha256=CGC5C1HG4XCMhlkNZI-woBAIO-EQSVSqXuDUsiV-d7g,2935
49
+ spiral/debug/manifests.py,sha256=oaPB4534pQdqvPXCZetVNSvvhpdXTrv_1pN-_bAkeAo,2893
50
50
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
51
51
  spiral/debug/scan.py,sha256=9bMmVQFs5M6Rldm0fmrmmvn9LbSSTKBV5tIu37mEn78,8938
52
52
  spiral/expressions/__init__.py,sha256=T8PIb0_UB9kynK0dpWbUD4No5lKRTG-wKnao8xOcXjY,6381
@@ -84,16 +84,16 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=sCMvwWCXWu2cSGiTEH0hRj
84
84
  spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
86
86
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- spiral/scan.py,sha256=nItu2SNqp5-f2LPMl4EXrEUxV5tJJGEVQEPMSko3STY,7044
87
+ spiral/scan.py,sha256=20-NSGsoXYf6uKQ7yEdbbwT8ijIK7KxKTctycsl0AIk,7073
88
88
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
89
89
  spiral/settings.py,sha256=Nap68xM-1ZvF3yDhkyRnNDIAVMIgxmIksglg_1iT0-0,3069
90
90
  spiral/snapshot.py,sha256=_l2wrqUXz2RARjIDxOWw4aQpegJohvggIoWuCllzStA,1506
91
91
  spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
92
- spiral/streaming_/reader.py,sha256=CahNNeJznRuUUTtWNexoEBZtKh9bikfaI6UCnER3Jhw,3451
93
- spiral/streaming_/stream.py,sha256=xFTtGB6CspEKstzBeyyaOeOR3KDiJc21m07ZpD1AXZQ,5669
92
+ spiral/streaming_/reader.py,sha256=Kpqknv2jn12jUhHOEEDArj0JZwrWb8XjoOGs9HrdVyA,4047
93
+ spiral/streaming_/stream.py,sha256=nxJEisPfZ2-Ebkm83hz_3v8NH27FxBku-1jw7UDlQuM,5881
94
94
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
95
- spiral/table.py,sha256=Y5_FqZGjXwt7LT_SYzUA-M-zOBcOdJs7d_t919TAc1k,9605
95
+ spiral/table.py,sha256=ZQFq5tuovDjQcpi38b5FUMuHNGI5XV0MnZbC6vbza1o,10312
96
96
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
97
- spiral/transaction.py,sha256=O3vSaTc7zpeC5qbqnj-VWKwK6rrp_mYV2JuPHp2ZJ80,1464
97
+ spiral/transaction.py,sha256=nSykH4UGs9hGtWuSWK9YyT9jfEuvzfkKoUgMM5Xt4zU,1841
98
98
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
99
- pyspiral-0.6.0.dist-info/RECORD,,
99
+ pyspiral-0.6.2.dist-info/RECORD,,
spiral/__init__.py CHANGED
@@ -7,9 +7,10 @@ from spiral import _lib
7
7
  assert _lib, "Spiral library"
8
8
 
9
9
  from spiral.client import Spiral # noqa: E402
10
+ from spiral.core.client import ShuffleStrategy # noqa: E402
10
11
  from spiral.key_space_index import KeySpaceIndex # noqa: E402
11
12
  from spiral.project import Project # noqa: E402
12
- from spiral.scan import Scan, ShuffleStrategy # noqa: E402
13
+ from spiral.scan import Scan # noqa: E402
13
14
  from spiral.snapshot import Snapshot # noqa: E402
14
15
  from spiral.table import Table # noqa: E402
15
16
  from spiral.text_index import TextIndex # noqa: E402
spiral/_lib.abi3.so CHANGED
Binary file
spiral/cli/key_spaces.py CHANGED
@@ -56,6 +56,27 @@ def ls(
56
56
  CONSOLE.print(rich_table)
57
57
 
58
58
 
59
+ @app.command(help="Show index partitions.")
60
+ def show(
61
+ project: ProjectArg,
62
+ name: Annotated[str | None, Option(help="Index name.")] = None,
63
+ ):
64
+ """Show index partitions."""
65
+ index_id = get_index_id(project, name)
66
+ index = state.spiral.key_space_index(index_id)
67
+ shards = state.spiral._ops().compute_shards(index.core)
68
+
69
+ rich_table = rich.table.Table("Begin", "End", "Cardinality", title=f"Index {index.name} Partitions")
70
+ for partition in shards:
71
+ rich_table.add_row(
72
+ # TODO(marko): This isn't really pretty...
73
+ repr(partition.key_range.begin),
74
+ repr(partition.key_range.end),
75
+ str(partition.cardinality),
76
+ )
77
+ CONSOLE.print(rich_table)
78
+
79
+
59
80
  @app.command(help="Trigger a sync job for an index.")
60
81
  def sync(
61
82
  project: ProjectArg,
spiral/cli/tables.py CHANGED
@@ -111,12 +111,12 @@ def manifests(
111
111
  _, t = get_table(project, table, dataset)
112
112
  s = t.snapshot()
113
113
 
114
- key_space_state = state.spiral._ops().key_space_state(t.core, asof=s.asof) # pyright: ignore[reportPrivateUsage]
114
+ key_space_state = state.spiral._ops().key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
115
115
  key_space_manifest = key_space_state.manifest
116
116
 
117
- column_groups_states = state.spiral._ops().column_groups_states(t.core, key_space_state, asof=s.asof) # pyright: ignore[reportPrivateUsage]
117
+ column_groups_states = state.spiral._ops().column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
118
118
 
119
- display_manifests(key_space_manifest, ((x.column_group, x.manifest) for x in column_groups_states))
119
+ display_manifests(key_space_manifest, [(x.column_group, x.manifest) for x in column_groups_states])
120
120
 
121
121
 
122
122
  @app.command(help="Display the manifests which would be read by a scan of the given column group.")
@@ -124,6 +124,25 @@ class Shard:
124
124
  key_range: KeyRange
125
125
  cardinality: int
126
126
 
127
+ def __init__(self, key_range: KeyRange, cardinality: int): ...
128
+
129
+ class ShuffleStrategy:
130
+ # Results are buffered in a pool of `shuffle_buffer_size` rows and shuffled again.
131
+ shuffle_buffer_size: int
132
+
133
+ # All randomness is derived from this seed. If None, a random seed is generated from the OS.
134
+ seed: int | None
135
+
136
+ # Externally provided shards to shuffle before reading rows.
137
+ shards: list[Shard] | None
138
+
139
+ def __init__(
140
+ self,
141
+ seed: int | None = None,
142
+ shuffle_buffer_size: int | None = None,
143
+ shards: list[Shard] | None = None,
144
+ ): ...
145
+
127
146
  class Operations:
128
147
  def flush_wal(self, table: Table) -> None:
129
148
  """
@@ -163,21 +182,19 @@ class Operations:
163
182
  Index table changes up to the given snapshot.
164
183
  """
165
184
  ...
166
- def key_space_state(self, table: Table, *, asof: int | None = None) -> KeySpaceState:
185
+ def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
167
186
  """
168
187
  The key space state for the table.
169
188
  """
170
189
  ...
171
190
  def column_group_state(
172
- self, table: Table, key_space_state: KeySpaceState, column_group: ColumnGroup, *, asof: int | None = None
191
+ self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
173
192
  ) -> ColumnGroupState:
174
193
  """
175
194
  The state the column group of the table.
176
195
  """
177
196
  ...
178
- def column_groups_states(
179
- self, table: Table, key_space_state: KeySpaceState, *, asof: int | None = None
180
- ) -> list[ColumnGroupState]:
197
+ def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
181
198
  """
182
199
  The state of each column group of the table.
183
200
  """
@@ -1,6 +1,7 @@
1
1
  from typing import Any
2
2
 
3
3
  import pyarrow as pa
4
+ from spiral.core.client import ShuffleStrategy
4
5
  from spiral.expressions import Expr
5
6
 
6
7
  from .manifests import FragmentManifest
@@ -76,6 +77,7 @@ class Scan:
76
77
  output_path: str,
77
78
  key_range: KeyRange,
78
79
  expected_cardinality: int | None = None,
80
+ shard_row_block_size: int = 8192,
79
81
  ) -> None: ...
80
82
 
81
83
  class KeySpaceState:
@@ -93,27 +95,7 @@ class Transaction:
93
95
  status: str
94
96
 
95
97
  def write(self, expr: Expr, *, partition_size_bytes: int | None = None): ...
98
+ def drop_columns(self, column_paths: list[str]): ...
96
99
  def commit(self): ...
97
100
  def abort(self): ...
98
101
  def metrics(self) -> dict[str, Any]: ...
99
-
100
- class ShuffleStrategy:
101
- # Results are buffered in a pool of `buffer_size` rows and shuffled again.
102
- shuffle_buffer_size: int
103
-
104
- # All randomness is derived from this seed. If None, a random seed is generated from the OS.
105
- seed: int | None
106
-
107
- # `approximate_batch_size` controls the maximum approximate size of each shard. Shards that
108
- # are larger than this size are further split assuming uniform distribution of keys. Note
109
- # that this is a best-effort and can be widely off. The purpose of this is to improve
110
- # shuffling, rather than to support sharding. If not present, splits derived from the table
111
- # are used in the attempt to minimize wasted reads.
112
- approximate_buffer_size: int | None
113
-
114
- def __init__(
115
- self,
116
- seed: int | None = None,
117
- shard_size: int | None = None,
118
- buffer_size: int | None = None,
119
- ): ...
@@ -8,7 +8,7 @@ class FragmentManifest:
8
8
  def __getitem__(self, idx: int): ...
9
9
  def to_arrow(self) -> pa.RecordBatchReader: ...
10
10
  @staticmethod
11
- def compute_schema(data_schema: pa.Schema) -> pa.Schema: ...
11
+ def compute_schema() -> pa.Schema: ...
12
12
  @staticmethod
13
13
  def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
14
14
  @staticmethod
spiral/debug/manifests.py CHANGED
@@ -1,5 +1,3 @@
1
- from collections.abc import Iterable
2
-
3
1
  from spiral import datetime_
4
2
  from spiral.core.table import Scan
5
3
  from spiral.core.table.manifests import FragmentManifest
@@ -13,15 +11,15 @@ def display_scan_manifests(scan: Scan):
13
11
  raise NotImplementedError("Multiple table scans are not supported.")
14
12
  table_id = scan.table_ids()[0]
15
13
  key_space_manifest = scan.key_space_state(table_id).manifest
16
- column_group_manifests = (
14
+ column_group_manifests = [
17
15
  (column_group, scan.column_group_state(column_group).manifest) for column_group in scan.column_groups()
18
- )
16
+ ]
19
17
 
20
18
  display_manifests(key_space_manifest, column_group_manifests)
21
19
 
22
20
 
23
21
  def display_manifests(
24
- key_space_manifest: FragmentManifest, column_group_manifests: Iterable[tuple[ColumnGroup, FragmentManifest]]
22
+ key_space_manifest: FragmentManifest, column_group_manifests: list[tuple[ColumnGroup, FragmentManifest]]
25
23
  ):
26
24
  _table_of_fragments(
27
25
  key_space_manifest,
spiral/scan.py CHANGED
@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING, Any
3
3
 
4
4
  import pyarrow as pa
5
5
 
6
- from spiral.core.table import KeyRange, ShuffleStrategy
6
+ from spiral.core.client import ShuffleStrategy
7
+ from spiral.core.table import KeyRange
7
8
  from spiral.core.table import Scan as CoreScan
8
9
  from spiral.core.table.spec import Schema
9
10
  from spiral.settings import CI, DEV
@@ -177,7 +178,7 @@ class Scan:
177
178
  # Print manifests in a human-readable format.
178
179
  from spiral.debug.manifests import display_scan_manifests
179
180
 
180
- display_scan_manifests(self._core)
181
+ display_scan_manifests(self.core)
181
182
 
182
183
  def _dump_metrics(self):
183
184
  # Print metrics in a human-readable format.
@@ -70,6 +70,20 @@ class SpiralReader:
70
70
  """
71
71
  return [(FileInfo(basename=self.filename), None)]
72
72
 
73
+ def get_max_size(self) -> int:
74
+ """Get the full size of this shard.
75
+
76
+ "Max" in this case means both the raw (decompressed) and zip (compressed) versions are
77
+ resident (assuming it has a zip form). This is the maximum disk usage the shard can reach.
78
+ When compressed was used, even if keep_zip is ``False``, the zip form must still be
79
+ resident at the same time as the raw form during shard decompression.
80
+
81
+ Returns:
82
+ int: Size in bytes.
83
+ """
84
+ # TODO(marko): This is used to check cache limit is possible...
85
+ return 0
86
+
73
87
  @functools.cached_property
74
88
  def filename(self) -> str:
75
89
  """Used by SpiralStream to identify shard's file-on-disk, if it exists."""
@@ -24,10 +24,13 @@ class SpiralStream:
24
24
  Stream can be passed to MDS's StreamingDataset in `streams` argument.
25
25
  """
26
26
 
27
- def __init__(self, scan: CoreScan, shards: list[Shard], cache_dir: str | None = None):
27
+ def __init__(
28
+ self, scan: CoreScan, shards: list[Shard], cache_dir: str | None = None, shard_row_block_size: int = 8192
29
+ ):
28
30
  self._scan = scan
29
31
  # TODO(marko): Read shards only on world.is_local_leader in `get_shards` and materialize on disk.
30
32
  self._shards = shards
33
+ self.shard_row_block_size = shard_row_block_size
31
34
 
32
35
  if cache_dir is not None:
33
36
  if not os.path.exists(cache_dir):
@@ -92,7 +95,12 @@ class SpiralStream:
92
95
  return 0
93
96
 
94
97
  # This method exists but it's hidden.
95
- self._scan._prepare_shard(shard_path, shard.shard.key_range, expected_cardinality=shard.shard.cardinality)
98
+ self._scan._prepare_shard(
99
+ shard_path,
100
+ shard.shard.key_range,
101
+ expected_cardinality=shard.shard.cardinality,
102
+ shard_row_block_size=self.shard_row_block_size,
103
+ )
96
104
 
97
105
  # Get the size of the file on disk.
98
106
  stat = os.stat(shard_path)
spiral/table.py CHANGED
@@ -109,6 +109,17 @@ class Table(Expr):
109
109
  partition_size_bytes=partition_size_bytes,
110
110
  )
111
111
 
112
+ def drop_columns(self, column_paths: list[str]) -> None:
113
+ """
114
+ Drops the specified columns from the table.
115
+
116
+
117
+ :param column_paths: Fully qualified column names. (e.g., "column_name" or "nested.field").
118
+ All columns must exist, if a a column doesn't exist the function will return an error.
119
+ """
120
+ with self.txn() as txn:
121
+ txn.drop_columns(column_paths)
122
+
112
123
  def snapshot(self, asof: datetime | int | None = None) -> Snapshot:
113
124
  """Returns a snapshot of the table at the given timestamp."""
114
125
  if isinstance(asof, datetime):
@@ -183,7 +194,9 @@ class Table(Expr):
183
194
  *,
184
195
  index: "KeySpaceIndex",
185
196
  batch_size: int | None = None,
197
+ cache_dir: str | None = None,
186
198
  cache_limit: int | str | None = None,
199
+ predownload: int | None = None,
187
200
  sampling_method: str = "balanced",
188
201
  sampling_granularity: int = 1,
189
202
  partition_algo: str = "relaxed",
@@ -208,12 +221,13 @@ class Table(Expr):
208
221
  """
209
222
  from streaming import StreamingDataset
210
223
 
211
- stream = self.to_streaming(index=index)
224
+ stream = self.to_streaming(index=index, cache_dir=cache_dir)
212
225
 
213
226
  return StreamingDataset(
214
227
  streams=[stream],
215
228
  batch_size=batch_size,
216
229
  cache_limit=cache_limit,
230
+ predownload=predownload,
217
231
  sampling_method=sampling_method,
218
232
  sampling_granularity=sampling_granularity,
219
233
  partition_algo=partition_algo,
@@ -226,13 +240,14 @@ class Table(Expr):
226
240
  replication=replication,
227
241
  )
228
242
 
229
- def to_streaming(self, index: "KeySpaceIndex") -> "streaming.Stream":
243
+ def to_streaming(self, index: "KeySpaceIndex", *, cache_dir: str | None = None) -> "streaming.Stream":
230
244
  """Returns a stream to be used with MosaicML's StreamingDataset.
231
245
 
232
246
  Requires `streaming` package to be installed.
233
247
 
234
248
  Args:
235
249
  index: Prebuilt KeysIndex to use when creating the stream. The index's `asof` will be used when scanning.
250
+ cache_dir: Directory to use for caching data. If None, a temporary directory will be used.
236
251
  """
237
252
  from spiral.streaming_ import SpiralStream
238
253
 
@@ -254,4 +269,4 @@ class Table(Expr):
254
269
  # We have a world there and can compute shards only on leader.
255
270
  shards = self.spiral._core._ops().compute_shards(index=index.core)
256
271
 
257
- return SpiralStream(scan=scan.core, shards=shards) # type: ignore[return-value]
272
+ return SpiralStream(scan=scan.core, shards=shards, cache_dir=cache_dir) # type: ignore[return-value]
spiral/transaction.py CHANGED
@@ -39,6 +39,16 @@ class Transaction:
39
39
 
40
40
  self._core.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
41
41
 
42
+ def drop_columns(self, column_paths: list[str]):
43
+ """
44
+ Drops the specified columns from the table.
45
+
46
+
47
+ :param column_paths: Fully qualified column names. (e.g., "column_name" or "nested.field").
48
+ All columns must exist, if a a column doesn't exist the function will return an error.
49
+ """
50
+ self._core.drop_columns(column_paths)
51
+
42
52
  def commit(self):
43
53
  """Commit the transaction."""
44
54
  self._core.commit()