pyspiral 0.4.4__cp310-abi3-macosx_11_0_arm64.whl → 0.6.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/METADATA +10 -5
  2. pyspiral-0.6.0.dist-info/RECORD +99 -0
  3. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +10 -3
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +29 -11
  7. spiral/api/__init__.py +14 -0
  8. spiral/api/client.py +5 -1
  9. spiral/api/key_space_indexes.py +23 -0
  10. spiral/api/projects.py +17 -2
  11. spiral/api/text_indexes.py +56 -0
  12. spiral/api/types.py +2 -0
  13. spiral/api/workers.py +40 -0
  14. spiral/cli/__init__.py +15 -6
  15. spiral/cli/admin.py +2 -4
  16. spiral/cli/app.py +4 -2
  17. spiral/cli/fs.py +5 -6
  18. spiral/cli/iceberg.py +97 -0
  19. spiral/cli/key_spaces.py +68 -0
  20. spiral/cli/login.py +6 -7
  21. spiral/cli/orgs.py +7 -8
  22. spiral/cli/printer.py +3 -3
  23. spiral/cli/projects.py +5 -6
  24. spiral/cli/tables.py +131 -0
  25. spiral/cli/telemetry.py +3 -4
  26. spiral/cli/text.py +115 -0
  27. spiral/cli/types.py +3 -4
  28. spiral/cli/workloads.py +7 -8
  29. spiral/client.py +111 -8
  30. spiral/core/authn/__init__.pyi +27 -0
  31. spiral/core/client/__init__.pyi +135 -63
  32. spiral/core/table/__init__.pyi +36 -26
  33. spiral/core/table/metastore/__init__.pyi +0 -4
  34. spiral/core/table/spec/__init__.pyi +0 -2
  35. spiral/{tables/dataset.py → dataset.py} +13 -7
  36. spiral/{tables/debug → debug}/manifests.py +17 -6
  37. spiral/{tables/debug → debug}/scan.py +7 -7
  38. spiral/expressions/base.py +3 -3
  39. spiral/expressions/udf.py +1 -1
  40. spiral/{iceberg/client.py → iceberg.py} +1 -3
  41. spiral/key_space_index.py +44 -0
  42. spiral/project.py +171 -18
  43. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1668 -1110
  44. spiral/protogen/_/google/protobuf/__init__.py +2190 -0
  45. spiral/protogen/_/message_pool.py +3 -0
  46. spiral/protogen/_/py.typed +0 -0
  47. spiral/protogen/_/scandal/__init__.py +138 -126
  48. spiral/protogen/_/spfs/__init__.py +72 -0
  49. spiral/protogen/_/spql/__init__.py +61 -0
  50. spiral/protogen/_/substrait/__init__.py +5256 -2459
  51. spiral/protogen/_/substrait/extensions/__init__.py +103 -49
  52. spiral/{tables/scan.py → scan.py} +37 -44
  53. spiral/settings.py +14 -3
  54. spiral/snapshot.py +55 -0
  55. spiral/streaming_/__init__.py +3 -0
  56. spiral/streaming_/reader.py +117 -0
  57. spiral/streaming_/stream.py +146 -0
  58. spiral/substrait_.py +9 -9
  59. spiral/table.py +257 -0
  60. spiral/text_index.py +17 -0
  61. spiral/{tables/transaction.py → transaction.py} +11 -15
  62. pyspiral-0.4.4.dist-info/RECORD +0 -98
  63. spiral/cli/iceberg/__init__.py +0 -7
  64. spiral/cli/iceberg/namespaces.py +0 -47
  65. spiral/cli/iceberg/tables.py +0 -60
  66. spiral/cli/indexes/__init__.py +0 -19
  67. spiral/cli/tables/__init__.py +0 -121
  68. spiral/core/index/__init__.pyi +0 -15
  69. spiral/iceberg/__init__.py +0 -3
  70. spiral/indexes/__init__.py +0 -5
  71. spiral/indexes/client.py +0 -137
  72. spiral/indexes/index.py +0 -34
  73. spiral/indexes/scan.py +0 -22
  74. spiral/protogen/_/spiral/table/__init__.py +0 -22
  75. spiral/protogen/substrait/__init__.py +0 -3399
  76. spiral/protogen/substrait/extensions/__init__.py +0 -115
  77. spiral/tables/__init__.py +0 -12
  78. spiral/tables/client.py +0 -130
  79. spiral/tables/maintenance.py +0 -12
  80. spiral/tables/snapshot.py +0 -78
  81. spiral/tables/table.py +0 -145
  82. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/entry_points.txt +0 -0
  83. /spiral/{protogen/_/spiral → debug}/__init__.py +0 -0
  84. /spiral/{tables/debug → debug}/metrics.py +0 -0
  85. /spiral/{tables/debug → protogen/_/google}/__init__.py +0 -0
@@ -0,0 +1,146 @@
1
+ import os
2
+ import tempfile
3
+ from typing import TYPE_CHECKING
4
+
5
+ import numpy as np
6
+
7
+ from spiral.core.client import Shard
8
+ from spiral.core.table import Scan as CoreScan
9
+ from spiral.streaming_.reader import SpiralReader
10
+
11
+ if TYPE_CHECKING:
12
+ from streaming.base.array import NDArray
13
+ from streaming.base.format import Reader
14
+ from streaming.base.world import World
15
+
16
+
17
+ class SpiralStream:
18
+ """
19
+ An MDS (streaming) compatible Stream.
20
+
21
+ The stream does not extend the default Stream class, but it is compactible with its API.
22
+
23
+ The stream is not registered with MDS, as the only way to construct the stream is through Spiral client.
24
+ Stream can be passed to MDS's StreamingDataset in `streams` argument.
25
+ """
26
+
27
+ def __init__(self, scan: CoreScan, shards: list[Shard], cache_dir: str | None = None):
28
+ self._scan = scan
29
+ # TODO(marko): Read shards only on world.is_local_leader in `get_shards` and materialize on disk.
30
+ self._shards = shards
31
+
32
+ if cache_dir is not None:
33
+ if not os.path.exists(cache_dir):
34
+ os.makedirs(cache_dir, exist_ok=True)
35
+ if not os.path.isdir(cache_dir):
36
+ raise ValueError(f"Cache dir {cache_dir} is not a directory.")
37
+ else:
38
+ cache_dir = tempfile.gettempdir()
39
+ self._cache_dir = cache_dir
40
+
41
+ # Enure split directory exists.
42
+ os.makedirs(os.path.join(self._cache_dir, self.split), exist_ok=True)
43
+
44
+ @property
45
+ def local(self) -> str:
46
+ # Dataset: Register/lookup our shared memory prefix and filelock root directory.
47
+ return self._cache_dir
48
+
49
+ @property
50
+ def remote(self) -> str | None:
51
+ # Dataset: Register/lookup our shared memory prefix and filelock root directory.
52
+ return None
53
+
54
+ @property
55
+ def split(self) -> str:
56
+ # Dataset: Register/lookup our shared memory prefix and filelock root directory.
57
+ return "default"
58
+
59
+ @classmethod
60
+ def validate_weights(cls, streams) -> tuple[bool, bool]:
61
+ from streaming.base.stream import Stream
62
+
63
+ return Stream.validate_weights(streams)
64
+
65
+ @classmethod
66
+ def apply_weights(cls, streams, samples_per_stream, choose_per_epoch, seed) -> int:
67
+ from streaming.base.stream import Stream
68
+
69
+ return Stream.apply_weights(streams, samples_per_stream, choose_per_epoch, seed)
70
+
71
+ def apply_default(self, default: dict):
72
+ # Applies defaults from the StreamingDataset.
73
+ # 'remote', 'local', 'split', 'download_retry', 'download_timeout', 'validate_hash', 'keep_zip'
74
+ if default["split"] is not None:
75
+ raise ValueError("SpiralStream does not support split, as the split is defined in the Scan.")
76
+
77
+ def prepare_shard(self, shard: "Reader") -> int:
78
+ """Ensure (download, validate, extract, etc.) that we have the given shard.
79
+
80
+ Args:
81
+ shard (Reader): Which shard.
82
+
83
+ Returns:
84
+ int: Change in cache usage.
85
+ """
86
+ if not isinstance(shard, SpiralReader):
87
+ raise ValueError("Only SpiralReader is supported in SpiralStream")
88
+
89
+ shard_path = os.path.join(self._cache_dir, self.split, shard.filename)
90
+ if os.path.exists(shard_path):
91
+ # Already exists.
92
+ return 0
93
+
94
+ # This method exists but it's hidden.
95
+ self._scan._prepare_shard(shard_path, shard.shard.key_range, expected_cardinality=shard.shard.cardinality)
96
+
97
+ # Get the size of the file on disk.
98
+ stat = os.stat(shard_path)
99
+ return stat.st_size
100
+
101
+ def get_shards(self, world: "World", allow_unsafe_types: bool) -> list["Reader"]:
102
+ """Load this Stream's index, retrieving its shard readers.
103
+
104
+ Args:
105
+ world (World): Distributed context.
106
+ allow_unsafe_types (bool): If a shard contains Pickle, which allows arbitrary code
107
+ execution during deserialization, whether to keep going if ``True`` or raise an error.
108
+ This argument is ignored as SpiralStream does not support Pickle.
109
+
110
+ Returns:
111
+ `List[Reader]: Shard readers.
112
+ """
113
+ basepath = os.path.join(self._cache_dir, self.split)
114
+ return [SpiralReader(shard, basepath) for shard in self._shards] # type: ignore[return-value]
115
+
116
+ def set_up_local(self, shards: list["Reader"], cache_usage_per_shard: "NDArray[np.int64]") -> None:
117
+ """Bring a local directory into a consistent state, getting which shards are present.
118
+
119
+ Args:
120
+ shards (List[Reader]): List of this stream's shards.
121
+ cache_usage_per_shard (NDArray[np.int64]): Cache usage per shard of this stream.
122
+ """
123
+ listing = set()
124
+ for file in os.listdir(os.path.join(self._cache_dir, self.split)):
125
+ if os.path.isfile(os.path.join(self._cache_dir, self.split, file)) and file.endswith(".vortex"):
126
+ listing.add(file)
127
+
128
+ # Determine which shards are present, making local dir consistent.
129
+ for i, shard in enumerate(shards):
130
+ if not isinstance(shard, SpiralReader):
131
+ raise ValueError("Only SpiralReader is supported in SpiralStream")
132
+ if shard.filename in listing:
133
+ # Get the size of the file on disk.
134
+ stat = os.stat(os.path.join(self._cache_dir, self.split, shard.filename))
135
+ cache_usage_per_shard[i] = stat.st_size
136
+ else:
137
+ cache_usage_per_shard[i] = 0
138
+
139
+ def get_index_size(self) -> int:
140
+ """Get the size of the index file in bytes.
141
+
142
+ Returns:
143
+ int: Size in bytes.
144
+ """
145
+ # There is no index file stored on disk.
146
+ return 0
spiral/substrait_.py CHANGED
@@ -1,9 +1,9 @@
1
- import betterproto
1
+ import betterproto2
2
2
  import pyarrow as pa
3
3
 
4
4
  import spiral.expressions as se
5
5
  from spiral.expressions.base import Expr
6
- from spiral.protogen.substrait import (
6
+ from spiral.protogen._.substrait import (
7
7
  Expression,
8
8
  ExpressionFieldReference,
9
9
  ExpressionLiteral,
@@ -17,7 +17,7 @@ from spiral.protogen.substrait import (
17
17
  ExpressionScalarFunction,
18
18
  ExtendedExpression,
19
19
  )
20
- from spiral.protogen.substrait.extensions import (
20
+ from spiral.protogen._.substrait.extensions import (
21
21
  SimpleExtensionDeclaration,
22
22
  SimpleExtensionDeclarationExtensionFunction,
23
23
  SimpleExtensionDeclarationExtensionType,
@@ -58,7 +58,7 @@ class SubstraitConverter:
58
58
  return self._expr(expr.referred_expr[0].expression)
59
59
 
60
60
  def _extension_declaration(self, ext: SimpleExtensionDeclaration):
61
- match betterproto.which_one_of(ext, "mapping_type"):
61
+ match betterproto2.which_one_of(ext, "mapping_type"):
62
62
  case "extension_function", ext_func:
63
63
  self._extension_function(ext_func)
64
64
  case "extension_type", ext_type:
@@ -152,7 +152,7 @@ class SubstraitConverter:
152
152
  raise NotImplementedError()
153
153
 
154
154
  def _expr(self, expr: Expression) -> Expr:
155
- match betterproto.which_one_of(expr, "rex_type"):
155
+ match betterproto2.which_one_of(expr, "rex_type"):
156
156
  case "literal", e:
157
157
  return self._expr_literal(e)
158
158
  case "selection", e:
@@ -196,7 +196,7 @@ class SubstraitConverter:
196
196
  "fixed_binary",
197
197
  }
198
198
 
199
- match betterproto.which_one_of(expr, "literal_type"):
199
+ match betterproto2.which_one_of(expr, "literal_type"):
200
200
  case type_, v if type_ in simple:
201
201
  return se.scalar(pa.scalar(v))
202
202
  case "timestamp", v:
@@ -225,7 +225,7 @@ class SubstraitConverter:
225
225
  raise NotImplementedError(f"Literal type not supported: {literal_type}")
226
226
 
227
227
  def _expr_selection(self, expr: ExpressionFieldReference):
228
- match betterproto.which_one_of(expr, "root_type"):
228
+ match betterproto2.which_one_of(expr, "root_type"):
229
229
  case "root_reference", _:
230
230
  # The reference is relative to the root
231
231
  base_expr = self.scope
@@ -233,7 +233,7 @@ class SubstraitConverter:
233
233
  case _:
234
234
  raise NotImplementedError("Only root_reference expressions are supported")
235
235
 
236
- match betterproto.which_one_of(expr, "reference_type"):
236
+ match betterproto2.which_one_of(expr, "reference_type"):
237
237
  case "direct_reference", direct_ref:
238
238
  return self._expr_direct_reference(base_expr, base_type, direct_ref)
239
239
  case "masked_reference", masked_ref:
@@ -242,7 +242,7 @@ class SubstraitConverter:
242
242
  raise NotImplementedError()
243
243
 
244
244
  def _expr_direct_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionReferenceSegment):
245
- match betterproto.which_one_of(expr, "reference_type"):
245
+ match betterproto2.which_one_of(expr, "reference_type"):
246
246
  case "map_key", ref:
247
247
  raise NotImplementedError("Map types not yet supported in Spiral")
248
248
  case "struct_field", ref:
spiral/table.py ADDED
@@ -0,0 +1,257 @@
1
+ from datetime import datetime
2
+ from typing import TYPE_CHECKING
3
+
4
+ from spiral.core.table import Table as CoreTable
5
+ from spiral.core.table.spec import Schema
6
+ from spiral.expressions.base import Expr, ExprLike
7
+ from spiral.settings import settings
8
+ from spiral.snapshot import Snapshot
9
+ from spiral.transaction import Transaction
10
+
11
+ if TYPE_CHECKING:
12
+ import duckdb
13
+ import polars as pl
14
+ import pyarrow.dataset as ds
15
+ import streaming
16
+ import torch.utils.data as torchdata # noqa
17
+
18
+ from spiral.client import Spiral
19
+ from spiral.key_space_index import KeySpaceIndex
20
+
21
+
22
+ class Table(Expr):
23
+ """API for interacting with a SpiralDB's Table.
24
+
25
+ Spiral Table is a powerful and flexible way for storing, analyzing,
26
+ and querying massive and/or multimodal datasets. The data model will feel familiar
27
+ to users of SQL- or DataFrame-style systems, yet is designed to be more flexible, more powerful,
28
+ and more useful in the context of modern data processing.
29
+
30
+ Tables are stored and queried directly from object storage.
31
+ """
32
+
33
+ def __init__(self, spiral: "Spiral", core: CoreTable, *, identifier: str | None = None):
34
+ super().__init__(core.__expr__)
35
+
36
+ self.spiral = spiral
37
+ self.core = core
38
+
39
+ self._key_schema = core.key_schema
40
+ self._key_columns = set(self._key_schema.names)
41
+ self._identifier = identifier
42
+
43
+ @property
44
+ def table_id(self) -> str:
45
+ return self.core.id
46
+
47
+ @property
48
+ def identifier(self) -> str:
49
+ """Returns the fully qualified identifier of the table."""
50
+ return self._identifier or self.table_id
51
+
52
+ @property
53
+ def dataset(self) -> str | None:
54
+ """Returns the dataset of the table."""
55
+ if self._identifier is None:
56
+ return None
57
+ _, dataset, _ = self._identifier.split(".")
58
+ return dataset
59
+
60
+ @property
61
+ def name(self) -> str | None:
62
+ """Returns the name of the table."""
63
+ if self._identifier is None:
64
+ return None
65
+ _, _, name = self._identifier.split(".")
66
+ return name
67
+
68
+ def last_modified_at(self) -> int:
69
+ return self.core.get_wal(asof=None).last_modified_at
70
+
71
+ def __str__(self):
72
+ return self.identifier
73
+
74
+ def __repr__(self):
75
+ return f'Table("{self.identifier}")'
76
+
77
+ def __getitem__(self, item: str) -> Expr:
78
+ return super().__getitem__(item)
79
+
80
+ def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
81
+ return super().select(*paths, exclude=exclude)
82
+
83
+ @property
84
+ def key_schema(self) -> Schema:
85
+ """Returns the key schema of the table."""
86
+ return self._key_schema
87
+
88
+ def schema(self) -> Schema:
89
+ """Returns the FULL schema of the table.
90
+
91
+ NOTE: This can be expensive for large tables.
92
+ """
93
+ return self.core.get_schema(asof=None)
94
+
95
+ def write(
96
+ self,
97
+ expr: ExprLike,
98
+ *,
99
+ partition_size_bytes: int | None = None,
100
+ ) -> None:
101
+ """Write an item to the table inside a single transaction.
102
+
103
+ :param expr: The expression to write. Must evaluate to a struct array.
104
+ :param partition_size_bytes: The maximum partition size in bytes.
105
+ """
106
+ with self.txn() as txn:
107
+ txn.write(
108
+ expr,
109
+ partition_size_bytes=partition_size_bytes,
110
+ )
111
+
112
+ def snapshot(self, asof: datetime | int | None = None) -> Snapshot:
113
+ """Returns a snapshot of the table at the given timestamp."""
114
+ if isinstance(asof, datetime):
115
+ asof = int(asof.timestamp() * 1_000_000)
116
+ return Snapshot(self, self.core.get_snapshot(asof=asof))
117
+
118
+ def txn(self) -> Transaction:
119
+ """Begins a new transaction. Transaction must be committed for writes to become visible.
120
+
121
+ IMPORTANT: While transaction can be used to atomically write data to the table,
122
+ it is important that the primary key columns are unique within the transaction.
123
+ """
124
+ return Transaction(self.spiral._core.transaction(self.core, settings().file_format))
125
+
126
+ def to_dataset(self) -> "ds.Dataset":
127
+ """Returns a PyArrow Dataset representing the table."""
128
+ return self.snapshot().to_dataset()
129
+
130
+ def to_polars(self) -> "pl.LazyFrame":
131
+ """Returns a Polars LazyFrame for the Spiral table."""
132
+ return self.snapshot().to_polars()
133
+
134
+ def to_duckdb(self) -> "duckdb.DuckDBPyRelation":
135
+ """Returns a DuckDB relation for the Spiral table."""
136
+ return self.snapshot().to_duckdb()
137
+
138
+ def to_data_loader(self, *, index: "KeySpaceIndex", **kwargs) -> "torchdata.DataLoader":
139
+ """Returns a PyTorch DataLoader.
140
+
141
+ Requires `torch` and `streaming` package to be installed.
142
+
143
+ Args:
144
+ index: See `streaming` method.
145
+ **kwargs: Additional arguments passed to the PyTorch DataLoader constructor.
146
+
147
+ """
148
+ from streaming import StreamingDataLoader
149
+
150
+ dataset_kwargs = {}
151
+ if "batch_size" in kwargs:
152
+ # Keep it in kwargs for DataLoader
153
+ dataset_kwargs["batch_size"] = kwargs["batch_size"]
154
+ if "cache_limit" in kwargs:
155
+ dataset_kwargs["cache_limit"] = kwargs.pop("cache_limit")
156
+ if "sampling_method" in kwargs:
157
+ dataset_kwargs["sampling_method"] = kwargs.pop("sampling_method")
158
+ if "sampling_granularity" in kwargs:
159
+ dataset_kwargs["sampling_granularity"] = kwargs.pop("sampling_granularity")
160
+ if "partition_algo" in kwargs:
161
+ dataset_kwargs["partition_algo"] = kwargs.pop("partition_algo")
162
+ if "num_canonical_nodes" in kwargs:
163
+ dataset_kwargs["num_canonical_nodes"] = kwargs.pop("num_canonical_nodes")
164
+ if "shuffle" in kwargs:
165
+ dataset_kwargs["shuffle"] = kwargs.pop("shuffle")
166
+ if "shuffle_algo" in kwargs:
167
+ dataset_kwargs["shuffle_algo"] = kwargs.pop("shuffle_algo")
168
+ if "shuffle_seed" in kwargs:
169
+ dataset_kwargs["shuffle_seed"] = kwargs.pop("shuffle_seed")
170
+ if "shuffle_block_size" in kwargs:
171
+ dataset_kwargs["shuffle_block_size"] = kwargs.pop("shuffle_block_size")
172
+ if "batching_method" in kwargs:
173
+ dataset_kwargs["batching_method"] = kwargs.pop("batching_method")
174
+ if "replication" in kwargs:
175
+ dataset_kwargs["replication"] = kwargs.pop("replication")
176
+
177
+ dataset = self.to_streaming_dataset(index=index, **dataset_kwargs)
178
+
179
+ return StreamingDataLoader(dataset=dataset, **kwargs)
180
+
181
+ def to_streaming_dataset(
182
+ self,
183
+ *,
184
+ index: "KeySpaceIndex",
185
+ batch_size: int | None = None,
186
+ cache_limit: int | str | None = None,
187
+ sampling_method: str = "balanced",
188
+ sampling_granularity: int = 1,
189
+ partition_algo: str = "relaxed",
190
+ num_canonical_nodes: int | None = None,
191
+ shuffle: bool = False,
192
+ shuffle_algo: str = "py1e",
193
+ shuffle_seed: int = 9176,
194
+ shuffle_block_size: int | None = None,
195
+ batching_method: str = "random",
196
+ replication: int | None = None,
197
+ ) -> "streaming.StreamingDataset":
198
+ """Returns a MosaicML's StreamingDataset that can be used for distributed training.
199
+
200
+ Requires `streaming` package to be installed.
201
+
202
+ Args:
203
+ See `streaming` method for `index` arg.
204
+ See MosaicML's `StreamingDataset` for other args.
205
+
206
+ This is a helper method to construct a single stream dataset from the scan. When multiple streams are combined,
207
+ use `to_stream` to get the SpiralStream and construct the StreamingDataset manually using a `streams` arg.
208
+ """
209
+ from streaming import StreamingDataset
210
+
211
+ stream = self.to_streaming(index=index)
212
+
213
+ return StreamingDataset(
214
+ streams=[stream],
215
+ batch_size=batch_size,
216
+ cache_limit=cache_limit,
217
+ sampling_method=sampling_method,
218
+ sampling_granularity=sampling_granularity,
219
+ partition_algo=partition_algo,
220
+ num_canonical_nodes=num_canonical_nodes,
221
+ shuffle=shuffle,
222
+ shuffle_algo=shuffle_algo,
223
+ shuffle_seed=shuffle_seed,
224
+ shuffle_block_size=shuffle_block_size,
225
+ batching_method=batching_method,
226
+ replication=replication,
227
+ )
228
+
229
+ def to_streaming(self, index: "KeySpaceIndex") -> "streaming.Stream":
230
+ """Returns a stream to be used with MosaicML's StreamingDataset.
231
+
232
+ Requires `streaming` package to be installed.
233
+
234
+ Args:
235
+ index: Prebuilt KeysIndex to use when creating the stream. The index's `asof` will be used when scanning.
236
+ """
237
+ from spiral.streaming_ import SpiralStream
238
+
239
+ if index.table_id != self.table_id:
240
+ raise ValueError("Index must be built on the same table as the scan.")
241
+ if index.asof == 0:
242
+ raise ValueError("Index have to be synced before it can be used in a stream.")
243
+
244
+ # We know table from projection is in the session cause this method is on it.
245
+ scan = self.spiral.scan(
246
+ index.projection,
247
+ where=index.filter,
248
+ asof=index.asof,
249
+ # TODO(marko): This should be configurable?
250
+ exclude_keys=True,
251
+ )
252
+
253
+ # TODO(marko): This should happen in prepare_shards in Stream?
254
+ # We have a world there and can compute shards only on leader.
255
+ shards = self.spiral._core._ops().compute_shards(index=index.core)
256
+
257
+ return SpiralStream(scan=scan.core, shards=shards) # type: ignore[return-value]
spiral/text_index.py ADDED
@@ -0,0 +1,17 @@
1
+ from spiral.core.client import TextIndex as CoreTextIndex
2
+ from spiral.expressions import Expr
3
+
4
+
5
+ class TextIndex(Expr):
6
+ def __init__(self, core: CoreTextIndex, *, name: str | None = None):
7
+ super().__init__(core.__expr__)
8
+ self.core = core
9
+ self._name = name
10
+
11
+ @property
12
+ def index_id(self) -> str:
13
+ return self.core.id
14
+
15
+ @property
16
+ def name(self) -> str:
17
+ return self._name or self.index_id
@@ -1,9 +1,5 @@
1
- from typing import TYPE_CHECKING
2
-
3
- from spiral.core.table import TableTransaction
4
-
5
- if TYPE_CHECKING:
6
- from spiral.expressions.base import ExprLike
1
+ from spiral.core.table import Transaction as CoreTransaction
2
+ from spiral.expressions.base import ExprLike
7
3
 
8
4
 
9
5
  class Transaction:
@@ -13,24 +9,24 @@ class Transaction:
13
9
  it is important that the primary key columns are unique within the transaction.
14
10
  """
15
11
 
16
- def __init__(self, transaction: TableTransaction):
17
- self._transaction = transaction
12
+ def __init__(self, core: CoreTransaction):
13
+ self._core = core
18
14
 
19
15
  @property
20
16
  def status(self) -> str:
21
17
  """The status of the transaction."""
22
- return self._transaction.status
18
+ return self._core.status
23
19
 
24
20
  def __enter__(self):
25
21
  return self
26
22
 
27
23
  def __exit__(self, exc_type, exc_value, traceback):
28
24
  if exc_type is None:
29
- self._transaction.commit()
25
+ self._core.commit()
30
26
  else:
31
- self._transaction.abort()
27
+ self._core.abort()
32
28
 
33
- def write(self, expr: "ExprLike", *, partition_size_bytes: int | None = None):
29
+ def write(self, expr: ExprLike, *, partition_size_bytes: int | None = None):
34
30
  """Write an item to the table inside a single transaction.
35
31
 
36
32
  :param expr: The expression to write. Must evaluate to a struct array.
@@ -41,12 +37,12 @@ class Transaction:
41
37
 
42
38
  expr = se.lift(expr)
43
39
 
44
- self._transaction.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
40
+ self._core.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
45
41
 
46
42
  def commit(self):
47
43
  """Commit the transaction."""
48
- self._transaction.commit()
44
+ self._core.commit()
49
45
 
50
46
  def abort(self):
51
47
  """Abort the transaction."""
52
- self._transaction.abort()
48
+ self._core.abort()
@@ -1,98 +0,0 @@
1
- pyspiral-0.4.4.dist-info/METADATA,sha256=lVusDQ4LmIf3tRllkl2_hUNn1y0P_yMfNKhkRbpNLW0,1610
2
- pyspiral-0.4.4.dist-info/WHEEL,sha256=Mdosfxua6Dx1zYgObRH97e3wyiELqBbLtoRJj4RUSQE,103
3
- pyspiral-0.4.4.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
- spiral/__init__.py,sha256=Jv1vbcnnmcTsBLN5mSNjnX3ae4C_mgojXDSBFaqIhN0,208
5
- spiral/_lib.abi3.so,sha256=5-2gLMM7XOEgX6lXbIBu-_ucQsC5zUZWqZaH7oXkdSE,59883120
6
- spiral/adbc.py,sha256=HcvR60uQeEK2oggSAK6y5VYtIrACIiCQ-85MEf18EZc,14199
7
- spiral/api/__init__.py,sha256=_7BS1RhqEFjnt3XwFWZNCHVEQeSKpezPevAiGCsvDbE,1776
8
- spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
9
- spiral/api/client.py,sha256=9-L6T8niQAXo90jRxllJD4hXXmcGfHj7CW9X3XTYa5Q,4551
10
- spiral/api/filesystems.py,sha256=EA4iqhTeaIlvObvEUxHmZl0pQ24IOxUVWM3GPhFLw8o,4969
11
- spiral/api/organizations.py,sha256=B-8zZ7lFJANGK7dUNbo_aU-cgI959JBP9VcWb6wdgi0,1895
12
- spiral/api/projects.py,sha256=JBGof9A2Ivasu2jrULMjHBwlna0M8WRrTNqU-Es4GJ8,5673
13
- spiral/api/telemetry.py,sha256=tfdA3E_EWJwFVxkQfkm8tiYGRubnx2LuE5nbfsk1oG4,474
14
- spiral/api/types.py,sha256=zx-BRKsi1GHg9aL9gMUaVQWYYMXJcP0A8OQUc7jSIAc,653
15
- spiral/api/workloads.py,sha256=XAyXV7vgZcoyyoPoGvOT4jTpyFKFMvrrAfhL6d1h1kE,1748
16
- spiral/arrow_.py,sha256=T1LZ7bh9aMDbXfpUsf0dR0E1roTQyAYSgZ2mL4s8J_4,7681
17
- spiral/cli/__init__.py,sha256=ooAFz_iCpVCKHE0TiVElIynbP2PtTgD9cUw46Vh1lcw,2145
18
- spiral/cli/__main__.py,sha256=kNaKM2xgJo7GRogf83nYldLM-RGUR6vymdGwZxywQu0,71
19
- spiral/cli/admin.py,sha256=7WbU_tr05clUjmZ-RkKTlvcf1pbXIElRfHRJlCItFGk,326
20
- spiral/cli/app.py,sha256=-k0rrLbfJRLay_2_MOCt57PLcx0VnNMCkrnKV7j7nos,1725
21
- spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
22
- spiral/cli/fs.py,sha256=dVPoAoAbuQ9yJlfI-JiFgS9VdnPmeBMygVHgehJRj34,4367
23
- spiral/cli/iceberg/__init__.py,sha256=IQV_gwCFSj6Ubxs58VM9Pal1ymgG2bxdDgOPuk9E5bs,214
24
- spiral/cli/iceberg/namespaces.py,sha256=x9pvHlcXtcATYYjqimHa6CtkyL3taQUJ--ni_Bfoemc,1510
25
- spiral/cli/iceberg/tables.py,sha256=nSR4-t54otJfCmubB6vXnbOkbqPVGV0sHBlc-t9cIVg,1930
26
- spiral/cli/indexes/__init__.py,sha256=-USfxCIdckzZKBNQ-DXqe3V5ttWVo_Fsa1Mfcx5hdIw,467
27
- spiral/cli/login.py,sha256=InKMnpV8NATW5RPgB3ZL-DSVPzUuUByyK4Fx7pZEgfg,607
28
- spiral/cli/orgs.py,sha256=V-4ZTT3FwFQLcs1-BenC8uCgvWOJcxkZPSdCPfsexhc,2848
29
- spiral/cli/printer.py,sha256=W83KAE-7meoDD1yRltLQrZqrA2olGapBGy_2USWkY08,1778
30
- spiral/cli/projects.py,sha256=TKXu_VzkIUccwXzdlg-wQMkrB-Py33g052NrbuJx-D4,5096
31
- spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
32
- spiral/cli/tables/__init__.py,sha256=lkGLDeU28IVnuxJdlYSUh6QSB9fQ4_1MeZJL73iXcHo,3660
33
- spiral/cli/telemetry.py,sha256=ABDCyV5QJGOIJp4AxvK0LG5xNPIysP37K5haL38T7P4,586
34
- spiral/cli/types.py,sha256=YG1eHhRLaqlVU_18DQBuF_YMsabhMZLBY0V9CvbSxjY,1369
35
- spiral/cli/workloads.py,sha256=SbxgwiBlX1AuqpOLV3gs7DFkH-Tbeend7qJTwq0Je84,1994
36
- spiral/client.py,sha256=K-OuMOTgYxOA9vef5jSANjmPRBfGrzQ65fg6Fd-rHMY,2683
37
- spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- spiral/core/client/__init__.pyi,sha256=Tn1OJmkO1rQUsPE9BtfEyxIjoife6s16qOd8XiyHi2c,3475
39
- spiral/core/index/__init__.pyi,sha256=NPOG1ztFO6siBGpmJU3boRzX26xfxw--2TiCydosGvo,314
40
- spiral/core/table/__init__.pyi,sha256=agrxN1dYx--dte_edQOKgAXT8yPDeh_cHA8dYAOodbE,3290
41
- spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
42
- spiral/core/table/metastore/__init__.pyi,sha256=dMqySDnsjPUTBuFU2MaQGyocKEoGkWpeTQmUP2iIKbc,1880
43
- spiral/core/table/spec/__init__.pyi,sha256=D4GQp9RWwyLKTlRW7eDXcQE-xA5rF2iBcXZ8y7b48EE,5595
44
- spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
45
- spiral/expressions/__init__.py,sha256=T8PIb0_UB9kynK0dpWbUD4No5lKRTG-wKnao8xOcXjY,6381
46
- spiral/expressions/base.py,sha256=q_W9XslcdFQtOIE_d1VkEmLickaXKOAoIcFeMoh-nqQ,4751
47
- spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
48
- spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
49
- spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
50
- spiral/expressions/mp4.py,sha256=_xGVnkygddzxP9a8OACJ8_KXnejuVbYCVKBCXBQ798Y,2151
51
- spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
52
- spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
53
- spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
54
- spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
55
- spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
56
- spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
57
- spiral/expressions/tiff.py,sha256=fQwIn0kLFBM2Y3YYIHmTgb_EIRHKT2fNc77nioDQQw4,8044
58
- spiral/expressions/udf.py,sha256=r6398z2Aj7KnXtwEvCiGNbgOXa6xsb_bnnG-FEvFxV4,1370
59
- spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
60
- spiral/iceberg/__init__.py,sha256=jSIlTxWauAbJV5gsWglZisFbnfNNzLYN90scoYcdWzc,65
61
- spiral/iceberg/client.py,sha256=E6FyE_h2HLgDW1cAFg1XgglJr6rbVOCWjRtRmqoMVkM,1003
62
- spiral/indexes/__init__.py,sha256=TXLQ-_3xso3lFIp2lM58_ip9OPNwPKFv1FdsWiUF-d8,178
63
- spiral/indexes/client.py,sha256=NsFBILEHMjyCUruFrUEKucRQRrN4OvqgbL4pmzWs07g,5600
64
- spiral/indexes/index.py,sha256=4CmSFlZYp46B2CjqtiyZ7VF5EH3duiutz3nWFnyApLA,973
65
- spiral/indexes/scan.py,sha256=B2m-UgNuawNB90HXK33GTQfMy2WLdNNxiiB6cIjFW2Y,697
66
- spiral/project.py,sha256=0uJ1Jb88Ie-cCNnSdX3QfFtCUqrjLka4zCm_TxCpVak,1189
67
- spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- spiral/protogen/_/arrow/flight/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py,sha256=_xhj9QkWEW1qZ-iVxcQ8k4EjYr7KJ5ofitJGqVUGQi4,79921
72
- spiral/protogen/_/scandal/__init__.py,sha256=X5YJqErZDIXxTESw8fLqJp3P2wZlqAglBzPs3LpTd-w,5145
73
- spiral/protogen/_/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- spiral/protogen/_/spiral/table/__init__.py,sha256=o_aNyTuJBIRY6MlAWceMsjbfaSUuZphRxiG_IXmC0mU,629
75
- spiral/protogen/_/substrait/__init__.py,sha256=pV4-T-lwAHKkfFrNYSUGY4IkbIvuKjSo_imzF7BLj_s,126526
76
- spiral/protogen/_/substrait/extensions/__init__.py,sha256=yD7dg0TBqn-GK_L0qeVof1GKnwSLg_kPyQSV3kcSljs,3655
77
- spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- spiral/protogen/substrait/__init__.py,sha256=pV4-T-lwAHKkfFrNYSUGY4IkbIvuKjSo_imzF7BLj_s,126526
79
- spiral/protogen/substrait/extensions/__init__.py,sha256=yD7dg0TBqn-GK_L0qeVof1GKnwSLg_kPyQSV3kcSljs,3655
80
- spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
81
- spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
- spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
83
- spiral/settings.py,sha256=PIQV2ljtB3pEOWoMRVSRzSGJNrXviO2JBgZ5ZY_Nq2E,2794
84
- spiral/substrait_.py,sha256=2BYvwFGcCwJ0JXNhXOLdPuhM1PqFyaeSqFpQCtv-M4E,12581
85
- spiral/tables/__init__.py,sha256=iiP7BkHA117em37_e75jtdvoZC10xCXtld18gRnPbTw,430
86
- spiral/tables/client.py,sha256=l_wJJRf3BPD5lg4Q1Ll2lAqQIuBCnKwC6JtsAui91Tc,4915
87
- spiral/tables/dataset.py,sha256=DuHeKVCJfXLsbxmde9QW6yvesW5uhswG6qAxV5X0ZgA,7890
88
- spiral/tables/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- spiral/tables/debug/manifests.py,sha256=E_-DiMBg2EPL97cl9hLWhiqEsFtjEBgh_C7jZy8EWYc,2594
90
- spiral/tables/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
91
- spiral/tables/debug/scan.py,sha256=-IWX_UjO4QP9Hj7PtZ1rLlbswJcryOin56GT-exqFm4,8942
92
- spiral/tables/maintenance.py,sha256=7Xa2Jdu_OY1Qu6iN1sPVdywVZtk_Mv3EaC3G93cmQvI,305
93
- spiral/tables/scan.py,sha256=3lPf5fSyF1fHGdGJ-pvu5HxPWoonf_XL7neWTqzB-0I,7582
94
- spiral/tables/snapshot.py,sha256=2NTuVEp2uJ1pV3Q5tLj7FOzPSc9axlfb6uOITwHnj0g,2229
95
- spiral/tables/table.py,sha256=VM93Rsm67sJFendI1_VhlkFORIdBGfhCMBUBK4dve9I,4910
96
- spiral/tables/transaction.py,sha256=3a64R-mf_cmR54BNn8U-05jmWonp6Ivxhe6u01Dyjzo,1573
97
- spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
98
- pyspiral-0.4.4.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- from spiral.cli import AsyncTyper
2
-
3
- from . import namespaces, tables
4
-
5
- app = AsyncTyper(short_help="Apache Iceberg Catalog.")
6
- app.add_typer(tables.app, name="tables")
7
- app.add_typer(namespaces.app, name="namespaces")