pyspiral 0.7.18__cp312-abi3-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.7.18.dist-info/METADATA +52 -0
- pyspiral-0.7.18.dist-info/RECORD +110 -0
- pyspiral-0.7.18.dist-info/WHEEL +4 -0
- pyspiral-0.7.18.dist-info/entry_points.txt +3 -0
- spiral/__init__.py +55 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +411 -0
- spiral/api/__init__.py +78 -0
- spiral/api/admin.py +15 -0
- spiral/api/client.py +164 -0
- spiral/api/filesystems.py +134 -0
- spiral/api/key_space_indexes.py +23 -0
- spiral/api/organizations.py +77 -0
- spiral/api/projects.py +219 -0
- spiral/api/telemetry.py +19 -0
- spiral/api/text_indexes.py +56 -0
- spiral/api/types.py +23 -0
- spiral/api/workers.py +40 -0
- spiral/api/workloads.py +52 -0
- spiral/arrow_.py +216 -0
- spiral/cli/__init__.py +88 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +14 -0
- spiral/cli/app.py +108 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +76 -0
- spiral/cli/iceberg.py +97 -0
- spiral/cli/key_spaces.py +103 -0
- spiral/cli/login.py +25 -0
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +53 -0
- spiral/cli/projects.py +147 -0
- spiral/cli/state.py +7 -0
- spiral/cli/tables.py +197 -0
- spiral/cli/telemetry.py +17 -0
- spiral/cli/text.py +115 -0
- spiral/cli/types.py +50 -0
- spiral/cli/workloads.py +58 -0
- spiral/client.py +256 -0
- spiral/core/__init__.pyi +0 -0
- spiral/core/_tools/__init__.pyi +5 -0
- spiral/core/authn/__init__.pyi +21 -0
- spiral/core/client/__init__.pyi +285 -0
- spiral/core/config/__init__.pyi +35 -0
- spiral/core/expr/__init__.pyi +15 -0
- spiral/core/expr/images/__init__.pyi +3 -0
- spiral/core/expr/list_/__init__.pyi +4 -0
- spiral/core/expr/refs/__init__.pyi +4 -0
- spiral/core/expr/str_/__init__.pyi +3 -0
- spiral/core/expr/struct_/__init__.pyi +6 -0
- spiral/core/expr/text/__init__.pyi +5 -0
- spiral/core/expr/udf/__init__.pyi +14 -0
- spiral/core/expr/video/__init__.pyi +3 -0
- spiral/core/table/__init__.pyi +141 -0
- spiral/core/table/manifests/__init__.pyi +35 -0
- spiral/core/table/metastore/__init__.pyi +58 -0
- spiral/core/table/spec/__init__.pyi +215 -0
- spiral/dataloader.py +299 -0
- spiral/dataset.py +264 -0
- spiral/datetime_.py +27 -0
- spiral/debug/__init__.py +0 -0
- spiral/debug/manifests.py +87 -0
- spiral/debug/metrics.py +56 -0
- spiral/debug/scan.py +266 -0
- spiral/enrichment.py +306 -0
- spiral/expressions/__init__.py +274 -0
- spiral/expressions/base.py +167 -0
- spiral/expressions/file.py +17 -0
- spiral/expressions/http.py +17 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/s3.py +16 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +59 -0
- spiral/expressions/text.py +62 -0
- spiral/expressions/tiff.py +222 -0
- spiral/expressions/udf.py +60 -0
- spiral/grpc_.py +32 -0
- spiral/iceberg.py +31 -0
- spiral/iterable_dataset.py +106 -0
- spiral/key_space_index.py +44 -0
- spiral/project.py +227 -0
- spiral/protogen/_/__init__.py +0 -0
- spiral/protogen/_/arrow/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
- spiral/protogen/_/google/__init__.py +0 -0
- spiral/protogen/_/google/protobuf/__init__.py +2310 -0
- spiral/protogen/_/message_pool.py +3 -0
- spiral/protogen/_/py.typed +0 -0
- spiral/protogen/_/scandal/__init__.py +190 -0
- spiral/protogen/_/spfs/__init__.py +72 -0
- spiral/protogen/_/spql/__init__.py +61 -0
- spiral/protogen/_/substrait/__init__.py +6196 -0
- spiral/protogen/_/substrait/extensions/__init__.py +169 -0
- spiral/protogen/__init__.py +0 -0
- spiral/protogen/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan.py +363 -0
- spiral/server.py +17 -0
- spiral/settings.py +36 -0
- spiral/snapshot.py +56 -0
- spiral/streaming_/__init__.py +3 -0
- spiral/streaming_/reader.py +133 -0
- spiral/streaming_/stream.py +157 -0
- spiral/substrait_.py +274 -0
- spiral/table.py +224 -0
- spiral/text_index.py +17 -0
- spiral/transaction.py +155 -0
- spiral/types_.py +6 -0
spiral/cli/workloads.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
import pyperclip
|
|
4
|
+
import questionary
|
|
5
|
+
from questionary import Choice
|
|
6
|
+
from typer import Argument, Option
|
|
7
|
+
|
|
8
|
+
from spiral.api.workloads import CreateWorkloadRequest, IssueWorkloadCredentialsResponse, Workload
|
|
9
|
+
from spiral.cli import CONSOLE, ERR_CONSOLE, AsyncTyper, printer, state
|
|
10
|
+
from spiral.cli.types import ProjectArg
|
|
11
|
+
|
|
12
|
+
app = AsyncTyper()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command(help="Create a new workload.")
|
|
16
|
+
def create(
|
|
17
|
+
project: ProjectArg,
|
|
18
|
+
name: Annotated[str | None, Option(help="Friendly name for the workload.")] = None,
|
|
19
|
+
):
|
|
20
|
+
res = state.spiral.api.workload.create(project, CreateWorkloadRequest(name=name))
|
|
21
|
+
CONSOLE.print(f"Created workload {res.workload.id}")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@app.command(help="List workloads.")
|
|
25
|
+
def ls(
|
|
26
|
+
project: ProjectArg,
|
|
27
|
+
):
|
|
28
|
+
workloads = list(state.spiral.api.workload.list(project))
|
|
29
|
+
CONSOLE.print(printer.table_of_models(Workload, workloads, fields=["id", "project_id", "name"]))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@app.command(help="Issue new workflow credentials.")
|
|
33
|
+
def issue_credentials(workload_id: Annotated[str, Argument(help="Workload ID.")]):
|
|
34
|
+
res: IssueWorkloadCredentialsResponse = state.spiral.api.workload.issue_credentials(workload_id)
|
|
35
|
+
|
|
36
|
+
while True:
|
|
37
|
+
choice = questionary.select(
|
|
38
|
+
"What would you like to do with the secret? You will not be able to see this secret again!",
|
|
39
|
+
choices=[
|
|
40
|
+
Choice(title="Copy to clipboard", value=1),
|
|
41
|
+
Choice(title="Print to console", value=2),
|
|
42
|
+
Choice(title="Exit", value=3),
|
|
43
|
+
],
|
|
44
|
+
).ask()
|
|
45
|
+
|
|
46
|
+
if choice == 1:
|
|
47
|
+
pyperclip.copy(res.client_secret)
|
|
48
|
+
CONSOLE.print("[green]Secret copied to clipboard![/green]")
|
|
49
|
+
break
|
|
50
|
+
elif choice == 2:
|
|
51
|
+
CONSOLE.print(f"[green]Token Secret:[/green] {res.client_secret}")
|
|
52
|
+
break
|
|
53
|
+
elif choice == 3:
|
|
54
|
+
break
|
|
55
|
+
else:
|
|
56
|
+
ERR_CONSOLE.print("Invalid choice. Please try again.")
|
|
57
|
+
|
|
58
|
+
CONSOLE.print(f"[green]Token ID:[/green] {res.client_id}")
|
spiral/client.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
import jwt
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
|
|
7
|
+
from spiral.api import SpiralAPI
|
|
8
|
+
from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
|
|
9
|
+
from spiral.core.authn import Authn
|
|
10
|
+
from spiral.core.client import Internal, KeyColumns, Shard
|
|
11
|
+
from spiral.core.client import Spiral as CoreSpiral
|
|
12
|
+
from spiral.core.config import ClientSettings
|
|
13
|
+
from spiral.datetime_ import timestamp_micros
|
|
14
|
+
from spiral.expressions import ExprLike
|
|
15
|
+
from spiral.scan import Scan
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from spiral.iceberg import Iceberg
|
|
19
|
+
from spiral.key_space_index import KeySpaceIndex
|
|
20
|
+
from spiral.project import Project
|
|
21
|
+
from spiral.table import Table
|
|
22
|
+
from spiral.text_index import TextIndex
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Spiral:
|
|
26
|
+
"""Main client for interacting with the Spiral data platform.
|
|
27
|
+
|
|
28
|
+
Configuration is loaded with the following priority (highest to lowest):
|
|
29
|
+
1. Explicit parameters.
|
|
30
|
+
2. Environment variables (`SPIRAL__*`)
|
|
31
|
+
3. Config file (`~/.spiral.toml`)
|
|
32
|
+
4. Default values (production URLs)
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
```python
|
|
36
|
+
# Default configuration
|
|
37
|
+
client = Spiral()
|
|
38
|
+
|
|
39
|
+
# With config overrides
|
|
40
|
+
client = Spiral(overrides={"limits.concurrency": "16"})
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
config: Custom ClientSettings object. Defaults to global settings.
|
|
45
|
+
overrides: Configuration overrides using dot notation,
|
|
46
|
+
see the [Client Configuration](/python-client.md) page for a full list.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
config: ClientSettings | None = None,
|
|
52
|
+
overrides: dict[str, str] | None = None,
|
|
53
|
+
):
|
|
54
|
+
self._overrides = overrides
|
|
55
|
+
self._config = config
|
|
56
|
+
self._org = None
|
|
57
|
+
self._core = None
|
|
58
|
+
self._api = None
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def config(self) -> ClientSettings:
|
|
62
|
+
"""Returns the client's configuration"""
|
|
63
|
+
return self.core.config()
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def authn(self) -> Authn:
|
|
67
|
+
"""Get the authentication handler for this client."""
|
|
68
|
+
return self.core.authn()
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def api(self) -> SpiralAPI:
|
|
72
|
+
if self._api is None:
|
|
73
|
+
self._api = SpiralAPI(self.authn, base_url=self.config.server_url)
|
|
74
|
+
return self._api
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def core(self) -> CoreSpiral:
|
|
78
|
+
if self._core is None:
|
|
79
|
+
self._core = CoreSpiral(
|
|
80
|
+
config=self._config,
|
|
81
|
+
overrides=self._overrides,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return self._core
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def internal(self) -> Internal:
|
|
88
|
+
return self.core.internal(format=self.config.file_format)
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def organization(self) -> str:
|
|
92
|
+
if self._org is None:
|
|
93
|
+
token = self.authn.token()
|
|
94
|
+
if token is None:
|
|
95
|
+
raise ValueError("Authentication failed.")
|
|
96
|
+
token_payload = jwt.decode(token.expose_secret(), options={"verify_signature": False})
|
|
97
|
+
if "org_id" not in token_payload:
|
|
98
|
+
raise ValueError("Please create an organization.")
|
|
99
|
+
self._org = token_payload["org_id"]
|
|
100
|
+
return self._org
|
|
101
|
+
|
|
102
|
+
def list_projects(self) -> list["Project"]:
|
|
103
|
+
"""List project IDs."""
|
|
104
|
+
from .project import Project
|
|
105
|
+
|
|
106
|
+
return [Project(self, project_id=p.id, name=p.name) for p in self.api.project.list()]
|
|
107
|
+
|
|
108
|
+
def create_project(
|
|
109
|
+
self,
|
|
110
|
+
id_prefix: str | None = None,
|
|
111
|
+
*,
|
|
112
|
+
name: str | None = None,
|
|
113
|
+
) -> "Project":
|
|
114
|
+
"""Create a project in the current, or given, organization."""
|
|
115
|
+
from .project import Project
|
|
116
|
+
|
|
117
|
+
res: CreateProjectResponse = self.api.project.create(CreateProjectRequest(id_prefix=id_prefix, name=name))
|
|
118
|
+
return Project(self, res.project.id, name=res.project.name)
|
|
119
|
+
|
|
120
|
+
def project(self, project_id: str) -> "Project":
|
|
121
|
+
"""Open an existing project."""
|
|
122
|
+
from spiral.project import Project
|
|
123
|
+
|
|
124
|
+
# We avoid an API call since we'd just be fetching a human-readable name. Seems a waste in most cases.
|
|
125
|
+
return Project(self, project_id=project_id, name=project_id)
|
|
126
|
+
|
|
127
|
+
def table(self, table_id: str) -> "Table":
|
|
128
|
+
"""Open a table using an ID."""
|
|
129
|
+
from spiral.table import Table
|
|
130
|
+
|
|
131
|
+
return Table(self, self.core.table(table_id))
|
|
132
|
+
|
|
133
|
+
def text_index(self, index_id: str) -> "TextIndex":
|
|
134
|
+
"""Open a text index using an ID."""
|
|
135
|
+
from spiral.text_index import TextIndex
|
|
136
|
+
|
|
137
|
+
return TextIndex(self.core.text_index(index_id))
|
|
138
|
+
|
|
139
|
+
def key_space_index(self, index_id: str) -> "KeySpaceIndex":
|
|
140
|
+
"""Open a key space index using an ID."""
|
|
141
|
+
from spiral.key_space_index import KeySpaceIndex
|
|
142
|
+
|
|
143
|
+
return KeySpaceIndex(self.core.key_space_index(index_id))
|
|
144
|
+
|
|
145
|
+
def scan(
|
|
146
|
+
self,
|
|
147
|
+
*projections: ExprLike,
|
|
148
|
+
where: ExprLike | None = None,
|
|
149
|
+
asof: datetime | int | None = None,
|
|
150
|
+
_key_columns: KeyColumns | None = None,
|
|
151
|
+
) -> Scan:
|
|
152
|
+
"""Starts a read transaction on the Spiral.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
projections: a set of expressions that return struct arrays.
|
|
156
|
+
where: a query expression to apply to the data.
|
|
157
|
+
asof: execute the scan on the version of the table as of the given timestamp.
|
|
158
|
+
"""
|
|
159
|
+
from spiral import expressions as se
|
|
160
|
+
|
|
161
|
+
if isinstance(asof, datetime):
|
|
162
|
+
asof = timestamp_micros(asof)
|
|
163
|
+
|
|
164
|
+
# Combine all projections into a single struct.
|
|
165
|
+
if not projections:
|
|
166
|
+
raise ValueError("At least one projection is required.")
|
|
167
|
+
projection = se.merge(*projections)
|
|
168
|
+
if where is not None:
|
|
169
|
+
where = se.lift(where)
|
|
170
|
+
|
|
171
|
+
return Scan(
|
|
172
|
+
self,
|
|
173
|
+
self.core.scan(
|
|
174
|
+
projection.__expr__, filter=where.__expr__ if where else None, asof=asof, key_columns=_key_columns
|
|
175
|
+
),
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# TODO(marko): This should be query, and search should be query + scan.
|
|
179
|
+
def search(
|
|
180
|
+
self,
|
|
181
|
+
top_k: int,
|
|
182
|
+
*rank_by: ExprLike,
|
|
183
|
+
filters: ExprLike | None = None,
|
|
184
|
+
freshness_window: timedelta | None = None,
|
|
185
|
+
) -> pa.RecordBatchReader:
|
|
186
|
+
"""Queries the index with the given rank by and filters clauses. Returns a stream of scored keys.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
top_k: The number of top results to return.
|
|
190
|
+
rank_by: Rank by expressions are combined for scoring.
|
|
191
|
+
See `se.text.find` and `se.text.boost` for scoring expressions.
|
|
192
|
+
filters: The `filters` expression is used to filter the results.
|
|
193
|
+
It must return a boolean value and use only conjunctions (ANDs). Expressions in filters
|
|
194
|
+
statement are considered either a `must` or `must_not` clause in search terminology.
|
|
195
|
+
freshness_window: If provided, the index will not be refreshed if its freshness does not exceed this window.
|
|
196
|
+
"""
|
|
197
|
+
from spiral import expressions as se
|
|
198
|
+
|
|
199
|
+
if not rank_by:
|
|
200
|
+
raise ValueError("At least one rank by expression is required.")
|
|
201
|
+
rank_by = se.or_(*rank_by)
|
|
202
|
+
if filters is not None:
|
|
203
|
+
filters = se.lift(filters)
|
|
204
|
+
|
|
205
|
+
if freshness_window is None:
|
|
206
|
+
freshness_window = timedelta(seconds=0)
|
|
207
|
+
freshness_window_s = int(freshness_window.total_seconds())
|
|
208
|
+
|
|
209
|
+
return self.core.search(
|
|
210
|
+
top_k=top_k,
|
|
211
|
+
rank_by=rank_by.__expr__,
|
|
212
|
+
filters=filters.__expr__ if filters else None,
|
|
213
|
+
freshness_window_s=freshness_window_s,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def compute_shards(
|
|
217
|
+
self,
|
|
218
|
+
max_batch_size: int,
|
|
219
|
+
*projections: ExprLike,
|
|
220
|
+
where: ExprLike | None = None,
|
|
221
|
+
asof: datetime | int | None = None,
|
|
222
|
+
) -> list[Shard]:
|
|
223
|
+
"""Computes shards over the given projections and filter.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
max_batch_size: The maximum number of rows per shard.
|
|
227
|
+
projections: a set of expressions that return struct arrays.
|
|
228
|
+
where: a query expression to apply to the data.
|
|
229
|
+
asof: execute the scan on the version of the table as of the given timestamp.
|
|
230
|
+
"""
|
|
231
|
+
from spiral import expressions as se
|
|
232
|
+
|
|
233
|
+
if isinstance(asof, datetime):
|
|
234
|
+
asof = timestamp_micros(asof)
|
|
235
|
+
|
|
236
|
+
# Combine all projections into a single struct.
|
|
237
|
+
if not projections:
|
|
238
|
+
raise ValueError("At least one projection is required.")
|
|
239
|
+
projection = se.merge(*projections)
|
|
240
|
+
if where is not None:
|
|
241
|
+
where = se.lift(where)
|
|
242
|
+
|
|
243
|
+
return self.core.compute_shards(
|
|
244
|
+
max_batch_size, projection.__expr__, where.__expr__ if where else None, asof=asof
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def iceberg(self) -> "Iceberg":
|
|
249
|
+
"""
|
|
250
|
+
Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
|
|
251
|
+
Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
|
|
252
|
+
and ACID transactions to your warehouse.
|
|
253
|
+
"""
|
|
254
|
+
from spiral.iceberg import Iceberg
|
|
255
|
+
|
|
256
|
+
return Iceberg(self)
|
spiral/core/__init__.pyi
ADDED
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from spiral.api.types import OrgId
|
|
2
|
+
|
|
3
|
+
class Token:
|
|
4
|
+
def __init__(self, value: str): ...
|
|
5
|
+
def expose_secret(self) -> str: ...
|
|
6
|
+
|
|
7
|
+
class Authn:
|
|
8
|
+
def token(self) -> Token | None: ...
|
|
9
|
+
|
|
10
|
+
class DeviceCodeAuth:
|
|
11
|
+
@staticmethod
|
|
12
|
+
def default() -> DeviceCodeAuth:
|
|
13
|
+
"""Return the static device code instance."""
|
|
14
|
+
...
|
|
15
|
+
def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
|
|
16
|
+
"""Authenticate using device code flow."""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def logout(self) -> None:
|
|
20
|
+
"""Logout from the device authentication session."""
|
|
21
|
+
...
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any, Literal
|
|
3
|
+
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableId, TableName
|
|
6
|
+
from spiral.core.authn import Authn
|
|
7
|
+
from spiral.core.config import ClientSettings
|
|
8
|
+
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
|
|
9
|
+
from spiral.core.table.spec import ColumnGroup, Schema
|
|
10
|
+
from spiral.expressions import Expr
|
|
11
|
+
|
|
12
|
+
# Only for typing, the actual definition is in Rust.
|
|
13
|
+
class KeyColumns(Enum):
|
|
14
|
+
IfProjected = 0
|
|
15
|
+
Included = 1
|
|
16
|
+
Only = 2
|
|
17
|
+
|
|
18
|
+
class Spiral:
|
|
19
|
+
"""A client for Spiral database"""
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
config: ClientSettings | None = None,
|
|
23
|
+
overrides: dict[str, str] | None = None,
|
|
24
|
+
):
|
|
25
|
+
"""Initialize the Spiral client.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
config: Client configuration, defaults to the global config.
|
|
29
|
+
overrides: Configuration overrides using dot notation,
|
|
30
|
+
see the [Client Configuration](/python-client) page for a full list.
|
|
31
|
+
"""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
def authn(self) -> Authn:
|
|
35
|
+
"""Get the current authentication context."""
|
|
36
|
+
...
|
|
37
|
+
|
|
38
|
+
def scan(
|
|
39
|
+
self,
|
|
40
|
+
projection: Expr,
|
|
41
|
+
filter: Expr | None = None,
|
|
42
|
+
asof: int | None = None,
|
|
43
|
+
key_columns: KeyColumns | None = None,
|
|
44
|
+
) -> Scan:
|
|
45
|
+
"""Construct a table scan."""
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
def load_scan(self, plan_state: ScanState) -> Scan:
|
|
49
|
+
"""Load a scan from a serialized scan state."""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
|
|
53
|
+
"""Being a table transaction."""
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
def search(
|
|
57
|
+
self,
|
|
58
|
+
top_k: int,
|
|
59
|
+
rank_by: Expr,
|
|
60
|
+
*,
|
|
61
|
+
filters: Expr | None = None,
|
|
62
|
+
freshness_window_s: int | None = None,
|
|
63
|
+
) -> pa.RecordBatchReader:
|
|
64
|
+
"""Search an index.
|
|
65
|
+
|
|
66
|
+
Searching an index returns a stream of record batches that match table's key schema + float score column.
|
|
67
|
+
"""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
def table(self, table_id: str) -> Table:
|
|
71
|
+
"""Get a table."""
|
|
72
|
+
...
|
|
73
|
+
|
|
74
|
+
def create_table(
|
|
75
|
+
self,
|
|
76
|
+
project_id: ProjectId,
|
|
77
|
+
dataset: DatasetName,
|
|
78
|
+
table: TableName,
|
|
79
|
+
key_schema: Schema,
|
|
80
|
+
*,
|
|
81
|
+
root_uri: RootUri | None = None,
|
|
82
|
+
exist_ok: bool = False,
|
|
83
|
+
) -> Table:
|
|
84
|
+
"""Create a new table in the specified project."""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
def move_table(
|
|
88
|
+
self,
|
|
89
|
+
table_id: TableId,
|
|
90
|
+
new_dataset: DatasetName,
|
|
91
|
+
):
|
|
92
|
+
"""Move a table to a dataset in the same project."""
|
|
93
|
+
...
|
|
94
|
+
|
|
95
|
+
def rename_table(
|
|
96
|
+
self,
|
|
97
|
+
table_id: TableId,
|
|
98
|
+
new_table: TableName,
|
|
99
|
+
):
|
|
100
|
+
"""Rename a table."""
|
|
101
|
+
...
|
|
102
|
+
|
|
103
|
+
def text_index(self, index_id: str) -> TextIndex:
|
|
104
|
+
"""Get a text index."""
|
|
105
|
+
...
|
|
106
|
+
|
|
107
|
+
def create_text_index(
|
|
108
|
+
self,
|
|
109
|
+
project_id: ProjectId,
|
|
110
|
+
name: IndexName,
|
|
111
|
+
projection: Expr,
|
|
112
|
+
filter: Expr | None = None,
|
|
113
|
+
*,
|
|
114
|
+
root_uri: RootUri | None = None,
|
|
115
|
+
exist_ok: bool = False,
|
|
116
|
+
) -> TextIndex:
|
|
117
|
+
"""Create a new index in the specified project."""
|
|
118
|
+
...
|
|
119
|
+
|
|
120
|
+
def key_space_index(self, index_id: str) -> KeySpaceIndex:
|
|
121
|
+
"""Get a key space index."""
|
|
122
|
+
...
|
|
123
|
+
|
|
124
|
+
def create_key_space_index(
|
|
125
|
+
self,
|
|
126
|
+
project_id: ProjectId,
|
|
127
|
+
name: IndexName,
|
|
128
|
+
granularity: int,
|
|
129
|
+
projection: Expr,
|
|
130
|
+
filter: Expr | None = None,
|
|
131
|
+
*,
|
|
132
|
+
root_uri: RootUri | None = None,
|
|
133
|
+
exist_ok: bool = False,
|
|
134
|
+
) -> KeySpaceIndex:
|
|
135
|
+
"""Create a new key space index in the specified project."""
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
def compute_shards(
|
|
139
|
+
self,
|
|
140
|
+
max_batch_size: int,
|
|
141
|
+
projection: Expr,
|
|
142
|
+
filter: Expr | None = None,
|
|
143
|
+
asof: int | None = None,
|
|
144
|
+
) -> list[Shard]:
|
|
145
|
+
"""Constructs shards for a given projection (and filter).
|
|
146
|
+
|
|
147
|
+
Useful for distributing work.
|
|
148
|
+
"""
|
|
149
|
+
...
|
|
150
|
+
|
|
151
|
+
def internal(self, *, format: str | None = None) -> Internal:
|
|
152
|
+
"""Internal client APIs. It can change without notice."""
|
|
153
|
+
...
|
|
154
|
+
|
|
155
|
+
def config(self) -> ClientSettings:
|
|
156
|
+
"""Client-side configuration."""
|
|
157
|
+
...
|
|
158
|
+
|
|
159
|
+
class TextIndex:
|
|
160
|
+
id: str
|
|
161
|
+
|
|
162
|
+
class KeySpaceIndex:
|
|
163
|
+
id: str
|
|
164
|
+
table_id: str
|
|
165
|
+
granularity: int
|
|
166
|
+
projection: Expr
|
|
167
|
+
filter: Expr
|
|
168
|
+
asof: int
|
|
169
|
+
|
|
170
|
+
class Shard:
|
|
171
|
+
"""A shard representing a partition of data.
|
|
172
|
+
|
|
173
|
+
Attributes:
|
|
174
|
+
key_range: The key range for this shard.
|
|
175
|
+
cardinality: The number of rows in this shard, if known.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
key_range: KeyRange
|
|
179
|
+
cardinality: int | None
|
|
180
|
+
|
|
181
|
+
def __init__(self, key_range: KeyRange, cardinality: int | None): ...
|
|
182
|
+
def __getnewargs__(self) -> tuple[KeyRange, int | None]: ...
|
|
183
|
+
|
|
184
|
+
class ShuffleConfig:
|
|
185
|
+
"""Configuration for within-shard sample shuffling.
|
|
186
|
+
|
|
187
|
+
This controls how samples are shuffled within a buffer, separate from
|
|
188
|
+
which shards to read (which is specified as a parameter to the scan).
|
|
189
|
+
|
|
190
|
+
Attributes:
|
|
191
|
+
buffer_size: Size of the buffer pool for shuffling samples.
|
|
192
|
+
seed: Random seed for reproducibility. If None, uses OS randomness.
|
|
193
|
+
max_batch_size: Maximum batch size for output chunks. If None,
|
|
194
|
+
defaults to max(1, buffer_size / 16).
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
buffer_size: int
|
|
198
|
+
seed: int | None
|
|
199
|
+
max_batch_size: int | None
|
|
200
|
+
|
|
201
|
+
def __init__(
|
|
202
|
+
self,
|
|
203
|
+
buffer_size: int,
|
|
204
|
+
*,
|
|
205
|
+
seed: int | None = None,
|
|
206
|
+
max_batch_size: int | None = None,
|
|
207
|
+
): ...
|
|
208
|
+
|
|
209
|
+
class Internal:
|
|
210
|
+
def flush_wal(self, table: Table) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Flush the write-ahead log of the table.
|
|
213
|
+
"""
|
|
214
|
+
...
|
|
215
|
+
def compact_key_space(
|
|
216
|
+
self,
|
|
217
|
+
*,
|
|
218
|
+
table: Table,
|
|
219
|
+
mode: Literal["plan", "read", "write"] | None = None,
|
|
220
|
+
partition_bytes_min: int | None = None,
|
|
221
|
+
):
|
|
222
|
+
"""
|
|
223
|
+
Compact the key space of the table.
|
|
224
|
+
"""
|
|
225
|
+
...
|
|
226
|
+
def compact_column_group(
|
|
227
|
+
self,
|
|
228
|
+
table: Table,
|
|
229
|
+
column_group: ColumnGroup,
|
|
230
|
+
*,
|
|
231
|
+
mode: Literal["plan", "read", "write"] | None = None,
|
|
232
|
+
partition_bytes_min: int | None = None,
|
|
233
|
+
):
|
|
234
|
+
"""
|
|
235
|
+
Compact a column group in the table.
|
|
236
|
+
"""
|
|
237
|
+
...
|
|
238
|
+
def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
|
|
239
|
+
"""
|
|
240
|
+
Index table changes up to the given snapshot.
|
|
241
|
+
"""
|
|
242
|
+
...
|
|
243
|
+
def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Index table changes up to the given snapshot.
|
|
246
|
+
"""
|
|
247
|
+
...
|
|
248
|
+
def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
|
|
249
|
+
"""
|
|
250
|
+
The key space state for the table.
|
|
251
|
+
"""
|
|
252
|
+
...
|
|
253
|
+
def column_group_state(
|
|
254
|
+
self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
|
|
255
|
+
) -> ColumnGroupState:
|
|
256
|
+
"""
|
|
257
|
+
The state the column group of the table.
|
|
258
|
+
"""
|
|
259
|
+
...
|
|
260
|
+
def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
|
|
261
|
+
"""
|
|
262
|
+
The state of each column group of the table.
|
|
263
|
+
"""
|
|
264
|
+
...
|
|
265
|
+
def key_space_index_shards(self, index: KeySpaceIndex) -> list[Shard]:
|
|
266
|
+
"""
|
|
267
|
+
Compute the scan shards from a key space index.
|
|
268
|
+
"""
|
|
269
|
+
...
|
|
270
|
+
def prepare_shard(
|
|
271
|
+
self,
|
|
272
|
+
output_path: str,
|
|
273
|
+
scan: Scan,
|
|
274
|
+
shard: Shard,
|
|
275
|
+
row_block_size: int = 8192,
|
|
276
|
+
) -> None:
|
|
277
|
+
"""
|
|
278
|
+
Prepare a shard locally. Used for `SpiralStream` integration with `streaming` which requires on-disk shards.
|
|
279
|
+
"""
|
|
280
|
+
...
|
|
281
|
+
def metrics(self) -> dict[str, Any]: ...
|
|
282
|
+
|
|
283
|
+
def flush_telemetry() -> None:
|
|
284
|
+
"""Flush telemetry data to the configured exporter."""
|
|
285
|
+
...
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
class ClientSettings:
|
|
2
|
+
"""Client configuration loaded from ~/.spiral.toml and environment variables."""
|
|
3
|
+
|
|
4
|
+
@staticmethod
|
|
5
|
+
def load() -> ClientSettings:
|
|
6
|
+
"""Load ClientSettings from ~/.spiral.toml and environment variables.
|
|
7
|
+
|
|
8
|
+
Configuration priority (highest to lowest):
|
|
9
|
+
1. Environment variables (SPIRAL__*)
|
|
10
|
+
2. Config file (~/.spiral.toml)
|
|
11
|
+
3. Default values
|
|
12
|
+
"""
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def server_url(self) -> str:
|
|
17
|
+
"""The Spiral API endpoint URL."""
|
|
18
|
+
...
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def spfs_url(self) -> str:
|
|
22
|
+
"""The SpFS endpoint URL."""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def file_format(self) -> str:
|
|
27
|
+
"""File format for table storage (vortex or parquet)."""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
def to_json(self) -> str:
|
|
31
|
+
"""Serialize to a JSON string"""
|
|
32
|
+
...
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_json(json: str) -> ClientSettings:
|
|
35
|
+
"""Deserialize from a JSON-formatted string"""
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pyarrow import Array, DataType, Scalar
|
|
2
|
+
|
|
3
|
+
class Expr:
|
|
4
|
+
"""Low level expression class."""
|
|
5
|
+
|
|
6
|
+
def aux(name: str, data_type: DataType) -> Expr: ...
|
|
7
|
+
|
|
8
|
+
# Array is correct (there is no ArrayData), see the table here:
|
|
9
|
+
# https://arrow.apache.org/rust/arrow_pyarrow/index.html
|
|
10
|
+
def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
|
|
11
|
+
def not_(expr: Expr) -> Expr: ...
|
|
12
|
+
def is_null(expr: Expr) -> Expr: ...
|
|
13
|
+
def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
|
|
14
|
+
def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
|
|
15
|
+
def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
|