pyspiral 0.6.8__cp312-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. pyspiral-0.6.8.dist-info/METADATA +51 -0
  2. pyspiral-0.6.8.dist-info/RECORD +102 -0
  3. pyspiral-0.6.8.dist-info/WHEEL +4 -0
  4. pyspiral-0.6.8.dist-info/entry_points.txt +2 -0
  5. spiral/__init__.py +35 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +411 -0
  8. spiral/api/__init__.py +78 -0
  9. spiral/api/admin.py +15 -0
  10. spiral/api/client.py +164 -0
  11. spiral/api/filesystems.py +134 -0
  12. spiral/api/key_space_indexes.py +23 -0
  13. spiral/api/organizations.py +77 -0
  14. spiral/api/projects.py +219 -0
  15. spiral/api/telemetry.py +19 -0
  16. spiral/api/text_indexes.py +56 -0
  17. spiral/api/types.py +22 -0
  18. spiral/api/workers.py +40 -0
  19. spiral/api/workloads.py +52 -0
  20. spiral/arrow_.py +216 -0
  21. spiral/cli/__init__.py +88 -0
  22. spiral/cli/__main__.py +4 -0
  23. spiral/cli/admin.py +14 -0
  24. spiral/cli/app.py +104 -0
  25. spiral/cli/console.py +95 -0
  26. spiral/cli/fs.py +76 -0
  27. spiral/cli/iceberg.py +97 -0
  28. spiral/cli/key_spaces.py +89 -0
  29. spiral/cli/login.py +24 -0
  30. spiral/cli/orgs.py +89 -0
  31. spiral/cli/printer.py +53 -0
  32. spiral/cli/projects.py +147 -0
  33. spiral/cli/state.py +5 -0
  34. spiral/cli/tables.py +174 -0
  35. spiral/cli/telemetry.py +17 -0
  36. spiral/cli/text.py +115 -0
  37. spiral/cli/types.py +50 -0
  38. spiral/cli/workloads.py +58 -0
  39. spiral/client.py +178 -0
  40. spiral/core/__init__.pyi +0 -0
  41. spiral/core/_tools/__init__.pyi +5 -0
  42. spiral/core/authn/__init__.pyi +27 -0
  43. spiral/core/client/__init__.pyi +237 -0
  44. spiral/core/table/__init__.pyi +101 -0
  45. spiral/core/table/manifests/__init__.pyi +35 -0
  46. spiral/core/table/metastore/__init__.pyi +58 -0
  47. spiral/core/table/spec/__init__.pyi +213 -0
  48. spiral/dataloader.py +285 -0
  49. spiral/dataset.py +255 -0
  50. spiral/datetime_.py +27 -0
  51. spiral/debug/__init__.py +0 -0
  52. spiral/debug/manifests.py +87 -0
  53. spiral/debug/metrics.py +56 -0
  54. spiral/debug/scan.py +266 -0
  55. spiral/expressions/__init__.py +276 -0
  56. spiral/expressions/base.py +157 -0
  57. spiral/expressions/http.py +86 -0
  58. spiral/expressions/io.py +100 -0
  59. spiral/expressions/list_.py +68 -0
  60. spiral/expressions/mp4.py +62 -0
  61. spiral/expressions/png.py +18 -0
  62. spiral/expressions/qoi.py +18 -0
  63. spiral/expressions/refs.py +58 -0
  64. spiral/expressions/str_.py +39 -0
  65. spiral/expressions/struct.py +59 -0
  66. spiral/expressions/text.py +62 -0
  67. spiral/expressions/tiff.py +223 -0
  68. spiral/expressions/udf.py +46 -0
  69. spiral/grpc_.py +32 -0
  70. spiral/iceberg.py +31 -0
  71. spiral/iterable_dataset.py +106 -0
  72. spiral/key_space_index.py +44 -0
  73. spiral/project.py +199 -0
  74. spiral/protogen/_/__init__.py +0 -0
  75. spiral/protogen/_/arrow/__init__.py +0 -0
  76. spiral/protogen/_/arrow/flight/__init__.py +0 -0
  77. spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
  78. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
  79. spiral/protogen/_/google/__init__.py +0 -0
  80. spiral/protogen/_/google/protobuf/__init__.py +2310 -0
  81. spiral/protogen/_/message_pool.py +3 -0
  82. spiral/protogen/_/py.typed +0 -0
  83. spiral/protogen/_/scandal/__init__.py +190 -0
  84. spiral/protogen/_/spfs/__init__.py +72 -0
  85. spiral/protogen/_/spql/__init__.py +61 -0
  86. spiral/protogen/_/substrait/__init__.py +6196 -0
  87. spiral/protogen/_/substrait/extensions/__init__.py +169 -0
  88. spiral/protogen/__init__.py +0 -0
  89. spiral/protogen/util.py +41 -0
  90. spiral/py.typed +0 -0
  91. spiral/scan.py +285 -0
  92. spiral/server.py +17 -0
  93. spiral/settings.py +114 -0
  94. spiral/snapshot.py +56 -0
  95. spiral/streaming_/__init__.py +3 -0
  96. spiral/streaming_/reader.py +133 -0
  97. spiral/streaming_/stream.py +157 -0
  98. spiral/substrait_.py +274 -0
  99. spiral/table.py +293 -0
  100. spiral/text_index.py +17 -0
  101. spiral/transaction.py +58 -0
  102. spiral/types_.py +6 -0
spiral/client.py ADDED
@@ -0,0 +1,178 @@
1
+ from datetime import datetime, timedelta
2
+ from typing import TYPE_CHECKING
3
+
4
+ import jwt
5
+ import pyarrow as pa
6
+
7
+ from spiral.api import SpiralAPI
8
+ from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
+ from spiral.core.client import Operations
10
+ from spiral.core.client import Spiral as CoreSpiral
11
+ from spiral.datetime_ import timestamp_micros
12
+ from spiral.expressions import ExprLike
13
+ from spiral.scan import Scan
14
+ from spiral.settings import Settings, settings
15
+
16
+ if TYPE_CHECKING:
17
+ from spiral.iceberg import Iceberg
18
+ from spiral.key_space_index import KeySpaceIndex
19
+ from spiral.project import Project
20
+ from spiral.table import Table
21
+ from spiral.text_index import TextIndex
22
+
23
+
24
+ class Spiral:
25
+ def __init__(self, config: Settings | None = None):
26
+ self._config = config or settings()
27
+ self._org = None
28
+
29
+ @property
30
+ def config(self) -> Settings:
31
+ return self._config
32
+
33
+ @property
34
+ def api(self) -> SpiralAPI:
35
+ return self._config.api
36
+
37
+ @property
38
+ def _core(self) -> CoreSpiral:
39
+ return self._config.core
40
+
41
+ @property
42
+ def organization(self) -> str:
43
+ if self._org is None:
44
+ token = self._config.authn.token()
45
+ if token is None:
46
+ raise ValueError("Authentication failed.")
47
+ token_payload = jwt.decode(token.expose_secret(), options={"verify_signature": False})
48
+ if "org_id" not in token_payload:
49
+ raise ValueError("Please create an organization.")
50
+ self._org = token_payload["org_id"]
51
+ return self._org
52
+
53
+ def list_projects(self) -> list["Project"]:
54
+ """List project IDs."""
55
+ from .project import Project
56
+
57
+ return [Project(self, project_id=p.id, name=p.name) for p in self.api.project.list()]
58
+
59
+ def create_project(
60
+ self,
61
+ id_prefix: str | None = None,
62
+ *,
63
+ name: str | None = None,
64
+ ) -> "Project":
65
+ """Create a project in the current, or given, organization."""
66
+ from .project import Project
67
+
68
+ res: CreateProjectResponse = self.api.project.create(CreateProjectRequest(id_prefix=id_prefix, name=name))
69
+ return Project(self, res.project.id, name=res.project.name)
70
+
71
+ def project(self, project_id: str) -> "Project":
72
+ """Open an existing project."""
73
+ from spiral.project import Project
74
+
75
+ # We avoid an API call since we'd just be fetching a human-readable name. Seems a waste in most cases.
76
+ return Project(self, project_id=project_id, name=project_id)
77
+
78
+ def table(self, table_id: str) -> "Table":
79
+ """Open a table using an ID."""
80
+ from spiral.table import Table
81
+
82
+ return Table(self, self._core.table(table_id))
83
+
84
+ def text_index(self, index_id: str) -> "TextIndex":
85
+ """Open a text index using an ID."""
86
+ from spiral.text_index import TextIndex
87
+
88
+ return TextIndex(self._core.text_index(index_id))
89
+
90
+ def key_space_index(self, index_id: str) -> "KeySpaceIndex":
91
+ """Open a key space index using an ID."""
92
+ from spiral.key_space_index import KeySpaceIndex
93
+
94
+ return KeySpaceIndex(self._core.key_space_index(index_id))
95
+
96
+ def scan(
97
+ self,
98
+ *projections: ExprLike,
99
+ where: ExprLike | None = None,
100
+ asof: datetime | int | None = None,
101
+ ) -> Scan:
102
+ """Starts a read transaction on the Spiral.
103
+
104
+ Args:
105
+ projections: a set of expressions that return struct arrays.
106
+ where: a query expression to apply to the data.
107
+ asof: only data written before the given timestamp will be returned, caveats around compaction.
108
+ """
109
+ from spiral import expressions as se
110
+
111
+ if isinstance(asof, datetime):
112
+ asof = timestamp_micros(asof)
113
+
114
+ # Combine all projections into a single struct.
115
+ projection = se.merge(*projections)
116
+ if where is not None:
117
+ where = se.lift(where)
118
+
119
+ return Scan(
120
+ self._core.scan(
121
+ projection.__expr__,
122
+ filter=where.__expr__ if where else None,
123
+ asof=asof,
124
+ ),
125
+ )
126
+
127
+ # TODO(marko): This should be query, and search should be query + scan.
128
+ def search(
129
+ self,
130
+ top_k: int,
131
+ *rank_by: ExprLike,
132
+ filters: ExprLike | None = None,
133
+ freshness_window: timedelta | None = None,
134
+ ) -> pa.RecordBatchReader:
135
+ """Queries the index with the given rank by and filters clauses. Returns a stream of scored keys.
136
+
137
+ Args:
138
+ top_k: The number of top results to return.
139
+ rank_by: Rank by expressions are combined for scoring.
140
+ See `se.text.find` and `se.text.boost` for scoring expressions.
141
+ filters: The `filters` expression is used to filter the results.
142
+ It must return a boolean value and use only conjunctions (ANDs). Expressions in filters
143
+ statement are considered either a `must` or `must_not` clause in search terminology.
144
+ freshness_window: If provided, the index will not be refreshed if its freshness does not exceed this window.
145
+ """
146
+ from spiral import expressions as se
147
+
148
+ if not rank_by:
149
+ raise ValueError("At least one rank by expression is required.")
150
+ rank_by = se.or_(*rank_by)
151
+ if filters is not None:
152
+ filters = se.lift(filters)
153
+
154
+ if freshness_window is None:
155
+ freshness_window = timedelta(seconds=0)
156
+ freshness_window_s = int(freshness_window.total_seconds())
157
+
158
+ return self._core.search(
159
+ top_k=top_k,
160
+ rank_by=rank_by.__expr__,
161
+ filters=filters.__expr__ if filters else None,
162
+ freshness_window_s=freshness_window_s,
163
+ )
164
+
165
+ def _ops(self) -> Operations:
166
+ """Access maintenance operations."""
167
+ return self._core._ops(format=settings().file_format)
168
+
169
+ @property
170
+ def iceberg(self) -> "Iceberg":
171
+ """
172
+ Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
173
+ Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
174
+ and ACID transactions to your warehouse.
175
+ """
176
+ from spiral.iceberg import Iceberg
177
+
178
+ return Iceberg(self)
File without changes
@@ -0,0 +1,5 @@
1
+ from ..table.spec import Schema
2
+
3
+ def pretty_key(key: bytes, schema: Schema) -> str:
4
+ """Represent a key in a human-readable way."""
5
+ ...
@@ -0,0 +1,27 @@
1
+ from spiral.api.types import OrgId
2
+
3
+ class Token:
4
+ def __init__(self, value: str): ...
5
+ def expose_secret(self) -> str: ...
6
+
7
+ class Authn:
8
+ @staticmethod
9
+ def from_token(token: Token) -> Authn: ...
10
+ @staticmethod
11
+ def from_fallback(api_url: str) -> Authn: ...
12
+ @staticmethod
13
+ def from_device() -> Authn: ...
14
+ def token(self) -> Token | None: ...
15
+
16
+ class DeviceCodeAuth:
17
+ @staticmethod
18
+ def default() -> DeviceCodeAuth:
19
+ """Return the static device code instance."""
20
+ ...
21
+ def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
22
+ """Authenticate using device code flow."""
23
+ ...
24
+
25
+ def logout(self) -> None:
26
+ """Logout from the device authentication session."""
27
+ ...
@@ -0,0 +1,237 @@
1
+ from typing import Any, Literal
2
+
3
+ import pyarrow as pa
4
+ from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
5
+ from spiral.core.authn import Authn
6
+ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
7
+ from spiral.core.table.spec import ColumnGroup, Schema
8
+ from spiral.expressions import Expr
9
+
10
+ class Spiral:
11
+ """A client for Spiral database"""
12
+ def __init__(
13
+ self,
14
+ api_url: str | None = None,
15
+ spfs_url: str | None = None,
16
+ authn: Authn | None = None,
17
+ ):
18
+ """Initialize the Spiral client."""
19
+ ...
20
+ def authn(self) -> Authn:
21
+ """Get the current authentication context."""
22
+ ...
23
+
24
+ def scan(
25
+ self,
26
+ projection: Expr,
27
+ filter: Expr | None = None,
28
+ asof: int | None = None,
29
+ ) -> Scan:
30
+ """Construct a table scan."""
31
+ ...
32
+
33
+ def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
34
+ """Being a table transaction."""
35
+ ...
36
+
37
+ def search(
38
+ self,
39
+ top_k: int,
40
+ rank_by: Expr,
41
+ *,
42
+ filters: Expr | None = None,
43
+ freshness_window_s: int | None = None,
44
+ ) -> pa.RecordBatchReader:
45
+ """Search an index.
46
+
47
+ Searching an index returns a stream of record batches that match table's key schema + float score column.
48
+ """
49
+ ...
50
+
51
+ def table(self, table_id: str) -> Table:
52
+ """Get a table."""
53
+ ...
54
+
55
+ def create_table(
56
+ self,
57
+ project_id: ProjectId,
58
+ dataset: DatasetName,
59
+ table: TableName,
60
+ key_schema: Schema,
61
+ *,
62
+ root_uri: RootUri | None = None,
63
+ exist_ok: bool = False,
64
+ ) -> Table:
65
+ """Create a new table in the specified project."""
66
+ ...
67
+
68
+ def text_index(self, index_id: str) -> TextIndex:
69
+ """Get a text index."""
70
+ ...
71
+
72
+ def create_text_index(
73
+ self,
74
+ project_id: ProjectId,
75
+ name: IndexName,
76
+ projection: Expr,
77
+ filter: Expr | None = None,
78
+ *,
79
+ root_uri: RootUri | None = None,
80
+ exist_ok: bool = False,
81
+ ) -> TextIndex:
82
+ """Create a new index in the specified project."""
83
+ ...
84
+
85
+ def key_space_index(self, index_id: str) -> KeySpaceIndex:
86
+ """Get a key space index."""
87
+ ...
88
+
89
+ def create_key_space_index(
90
+ self,
91
+ project_id: ProjectId,
92
+ name: IndexName,
93
+ granularity: int,
94
+ projection: Expr,
95
+ filter: Expr | None = None,
96
+ *,
97
+ root_uri: RootUri | None = None,
98
+ exist_ok: bool = False,
99
+ ) -> KeySpaceIndex:
100
+ """Create a new key space index in the specified project."""
101
+ ...
102
+
103
+ def _ops(self, *, format: str | None = None) -> Operations:
104
+ """Access maintenance operations.
105
+
106
+ IMPORTANT: This API is internal and is currently exposed for development & testing.
107
+ Maintenance operations are run by SpiralDB.
108
+ """
109
+ ...
110
+
111
+ class TextIndex:
112
+ id: str
113
+
114
+ class KeySpaceIndex:
115
+ id: str
116
+ table_id: str
117
+ granularity: int
118
+ projection: Expr
119
+ filter: Expr
120
+ asof: int
121
+
122
+ class Shard:
123
+ """A shard representing a partition of data.
124
+
125
+ Attributes:
126
+ key_range: The key range for this shard.
127
+ cardinality: The number of rows in this shard, if known.
128
+ """
129
+
130
+ key_range: KeyRange
131
+ cardinality: int | None
132
+
133
+ def __init__(self, key_range: KeyRange, cardinality: int | None): ...
134
+ def __getnewargs__(self) -> tuple[KeyRange, int | None]: ...
135
+
136
+ class ShuffleConfig:
137
+ """Configuration for within-shard sample shuffling.
138
+
139
+ This controls how samples are shuffled within a buffer, separate from
140
+ which shards to read (which is specified as a parameter to the scan).
141
+
142
+ Attributes:
143
+ buffer_size: Size of the buffer pool for shuffling samples.
144
+ seed: Random seed for reproducibility. If None, uses OS randomness.
145
+ max_batch_size: Maximum batch size for output chunks. If None,
146
+ defaults to max(1, buffer_size / 16).
147
+ """
148
+
149
+ buffer_size: int
150
+ seed: int | None
151
+ max_batch_size: int | None
152
+
153
+ def __init__(
154
+ self,
155
+ buffer_size: int,
156
+ *,
157
+ seed: int | None = None,
158
+ max_batch_size: int | None = None,
159
+ ): ...
160
+
161
+ class Operations:
162
+ def flush_wal(self, table: Table, *, keep_latest_s: int | None = None) -> None:
163
+ """
164
+ Flush the write-ahead log of the table.
165
+ """
166
+ ...
167
+ def compact_key_space(
168
+ self,
169
+ *,
170
+ table: Table,
171
+ mode: Literal["plan", "read", "write"] | None = None,
172
+ partition_bytes_min: int | None = None,
173
+ ):
174
+ """
175
+ Compact the key space of the table.
176
+ """
177
+ ...
178
+ def compact_column_group(
179
+ self,
180
+ table: Table,
181
+ column_group: ColumnGroup,
182
+ *,
183
+ mode: Literal["plan", "read", "write"] | None = None,
184
+ partition_bytes_min: int | None = None,
185
+ ):
186
+ """
187
+ Compact a column group in the table.
188
+ """
189
+ ...
190
+ def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
191
+ """
192
+ Index table changes up to the given snapshot.
193
+ """
194
+ ...
195
+ def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
196
+ """
197
+ Index table changes up to the given snapshot.
198
+ """
199
+ ...
200
+ def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
201
+ """
202
+ The key space state for the table.
203
+ """
204
+ ...
205
+ def column_group_state(
206
+ self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
207
+ ) -> ColumnGroupState:
208
+ """
209
+ The state the column group of the table.
210
+ """
211
+ ...
212
+ def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
213
+ """
214
+ The state of each column group of the table.
215
+ """
216
+ ...
217
+ def compute_shards(self, index: KeySpaceIndex) -> list[Shard]:
218
+ """
219
+ Compute the scan shards from a key space index.
220
+ """
221
+ ...
222
+ def prepare_shard(
223
+ self,
224
+ output_path: str,
225
+ scan: Scan,
226
+ shard: Shard,
227
+ row_block_size: int = 8192,
228
+ ) -> None:
229
+ """
230
+ Prepare a shard locally. Used for `SpiralStream` integration with `streaming` which requires on-disk shards.
231
+ """
232
+ ...
233
+ def metrics(self) -> dict[str, Any]: ...
234
+
235
+ def flush_telemetry() -> None:
236
+ """Flush telemetry data to the configured exporter."""
237
+ ...
@@ -0,0 +1,101 @@
1
+ from typing import Any
2
+
3
+ import pyarrow as pa
4
+ from spiral.core.client import Shard, ShuffleConfig
5
+
6
+ from .manifests import FragmentManifest
7
+ from .metastore import PyMetastore
8
+ from .spec import ColumnGroup, Key, Schema, WriteAheadLog
9
+
10
+ class KeyRange:
11
+ """A right-exclusive range of keys."""
12
+
13
+ def __init__(self, *, begin: Key, end: Key): ...
14
+
15
+ begin: Key
16
+ end: Key
17
+
18
+ def union(self, other: KeyRange) -> KeyRange: ...
19
+ def __or__(self, other: KeyRange) -> KeyRange: ...
20
+ def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
21
+ def __and__(self, other: KeyRange) -> KeyRange | None: ...
22
+ def contains(self, item: Key) -> bool: ...
23
+ def __contains__(self, item: Key) -> bool: ...
24
+ def is_disjoint(self, key_range: KeyRange) -> bool:
25
+ return self.end <= key_range.begin or self.begin >= key_range.end
26
+
27
+ @staticmethod
28
+ def beginning_with(begin: Key) -> KeyRange: ...
29
+ @staticmethod
30
+ def ending_with(end: Key) -> KeyRange: ...
31
+ @staticmethod
32
+ def full() -> KeyRange: ...
33
+ def __reduce__(self) -> tuple[type[KeyRange], tuple[Key, Key]]: ...
34
+
35
+ class Table:
36
+ def __init__(self, metastore: PyMetastore): ...
37
+
38
+ id: str
39
+ root_uri: str
40
+ mount_id: str | None
41
+ key_schema: Schema
42
+ metastore: PyMetastore
43
+
44
+ def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
45
+ def get_schema(self, *, asof: int | None) -> Schema: ...
46
+ def get_snapshot(self, *, asof: int | None) -> Snapshot: ...
47
+
48
+ class Snapshot:
49
+ """A snapshot of a table at a specific point in time."""
50
+
51
+ asof: int
52
+ table: Table
53
+ wal: WriteAheadLog
54
+
55
+ class Scan:
56
+ def key_schema(self) -> Schema: ...
57
+ def schema(self) -> Schema: ...
58
+ def is_empty(self) -> bool: ...
59
+ def splits(self) -> list[KeyRange]: ...
60
+ def shards(self) -> list[Shard]: ...
61
+ def table_ids(self) -> list[str]: ...
62
+ def column_groups(self) -> list[ColumnGroup]: ...
63
+ def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
64
+ def key_space_state(self, table_id: str) -> KeySpaceState: ...
65
+ def to_record_batches(
66
+ self,
67
+ key_table: pa.Table | pa.RecordBatch | None = None,
68
+ batch_readahead: int | None = None,
69
+ ) -> pa.RecordBatchReader: ...
70
+ def to_shuffled_record_batches(
71
+ self,
72
+ shards: list[Shard] | None = None,
73
+ shuffle: ShuffleConfig | None = None,
74
+ max_batch_size: int | None = None,
75
+ batch_readahead: int | None = None,
76
+ infinite: bool = False,
77
+ ) -> pa.RecordBatchReader:
78
+ # If `infinite` is True, shards are shuffled after exhausted but not before the first pass.
79
+ # Otherwise, shards are not shuffle and shuffle config is only used for shuffle buffer.
80
+ ...
81
+ def metrics(self) -> dict[str, Any]: ...
82
+
83
+ class KeySpaceState:
84
+ manifest: FragmentManifest
85
+
86
+ def key_schema(self) -> Schema: ...
87
+
88
+ class ColumnGroupState:
89
+ manifest: FragmentManifest
90
+ column_group: ColumnGroup
91
+
92
+ def schema(self) -> Schema: ...
93
+
94
+ class Transaction:
95
+ status: str
96
+
97
+ def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
98
+ def drop_columns(self, column_paths: list[str]): ...
99
+ def commit(self): ...
100
+ def abort(self): ...
101
+ def metrics(self) -> dict[str, Any]: ...
@@ -0,0 +1,35 @@
1
+ import pyarrow as pa
2
+ from spiral.core.table import KeyRange
3
+ from spiral.core.table.spec import FileFormat, FragmentLevel, KeyExtent, KeySpan
4
+ from spiral.types_ import Timestamp
5
+
6
+ class FragmentManifest:
7
+ def __len__(self): ...
8
+ def __getitem__(self, idx: int): ...
9
+ def to_arrow(self) -> pa.RecordBatchReader: ...
10
+ @staticmethod
11
+ def compute_schema() -> pa.Schema: ...
12
+ @staticmethod
13
+ def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
14
+ @staticmethod
15
+ def from_arrow(reader: pa.RecordBatchReader) -> FragmentManifest: ...
16
+ @staticmethod
17
+ def empty() -> FragmentManifest: ...
18
+
19
+ class FragmentFile:
20
+ id: str
21
+ committed_at: Timestamp | None
22
+ compacted_at: Timestamp | None
23
+ format: FileFormat
24
+ format_metadata: bytes | None
25
+ size_bytes: int
26
+ column_ids: list[str]
27
+ level: FragmentLevel
28
+ # NOTE: Empty for key space files.
29
+ column_ids: list[str]
30
+ ks_id: str
31
+ key_span: KeySpan
32
+ key_extent: KeyExtent
33
+
34
+ @property
35
+ def key_range(self) -> KeyRange: ...
@@ -0,0 +1,58 @@
1
+ """The SpiralDB metastore API."""
2
+
3
+ from spiral.core.client import Authn
4
+ from spiral.core.table.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, KeySpaceMetadata, Schema, WriteAheadLog
5
+ from spiral.types_ import Uri
6
+
7
+ class FileHandle:
8
+ def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
9
+
10
+ uri: str
11
+ format: FileFormat
12
+ spfs_token: str | None
13
+
14
+ class FileRef:
15
+ def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
16
+
17
+ id: str
18
+ file_type: FileType
19
+ file_format: FileFormat
20
+
21
+ def resolve_uri(self, root_uri: str) -> str:
22
+ """Resolves the file reference URI given the root URI."""
23
+
24
+ class FileType:
25
+ FragmentFile: FileType
26
+ FragmentManifest: FileType
27
+ ReferenceFile: FileType
28
+
29
+ def __int__(self) -> int:
30
+ """Returns the protobuf enum int value."""
31
+
32
+ class PyMetastore:
33
+ """Rust implementation of the metastore API."""
34
+
35
+ @property
36
+ def table_id(self) -> str: ...
37
+ @property
38
+ def root_uri(self) -> Uri: ...
39
+ @property
40
+ def key_schema(self) -> Schema: ...
41
+ def get_wal(self) -> WriteAheadLog:
42
+ """Return the log for the table."""
43
+ def get_key_space_metadata(self) -> KeySpaceMetadata:
44
+ """Return the metadata for the key space."""
45
+ ...
46
+ def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
47
+ """Return the metadata for a column group."""
48
+ ...
49
+
50
+ @staticmethod
51
+ def http(
52
+ table_id: str,
53
+ root_uri: str,
54
+ key_schema: Schema,
55
+ base_url: str,
56
+ authn: Authn,
57
+ ) -> PyMetastore:
58
+ """Construct a PyMetastore backed by an HTTP metastore service."""