pyspiral 0.7.18__cp312-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. pyspiral-0.7.18.dist-info/METADATA +52 -0
  2. pyspiral-0.7.18.dist-info/RECORD +110 -0
  3. pyspiral-0.7.18.dist-info/WHEEL +4 -0
  4. pyspiral-0.7.18.dist-info/entry_points.txt +3 -0
  5. spiral/__init__.py +55 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +411 -0
  8. spiral/api/__init__.py +78 -0
  9. spiral/api/admin.py +15 -0
  10. spiral/api/client.py +164 -0
  11. spiral/api/filesystems.py +134 -0
  12. spiral/api/key_space_indexes.py +23 -0
  13. spiral/api/organizations.py +77 -0
  14. spiral/api/projects.py +219 -0
  15. spiral/api/telemetry.py +19 -0
  16. spiral/api/text_indexes.py +56 -0
  17. spiral/api/types.py +23 -0
  18. spiral/api/workers.py +40 -0
  19. spiral/api/workloads.py +52 -0
  20. spiral/arrow_.py +216 -0
  21. spiral/cli/__init__.py +88 -0
  22. spiral/cli/__main__.py +4 -0
  23. spiral/cli/admin.py +14 -0
  24. spiral/cli/app.py +108 -0
  25. spiral/cli/console.py +95 -0
  26. spiral/cli/fs.py +76 -0
  27. spiral/cli/iceberg.py +97 -0
  28. spiral/cli/key_spaces.py +103 -0
  29. spiral/cli/login.py +25 -0
  30. spiral/cli/orgs.py +90 -0
  31. spiral/cli/printer.py +53 -0
  32. spiral/cli/projects.py +147 -0
  33. spiral/cli/state.py +7 -0
  34. spiral/cli/tables.py +197 -0
  35. spiral/cli/telemetry.py +17 -0
  36. spiral/cli/text.py +115 -0
  37. spiral/cli/types.py +50 -0
  38. spiral/cli/workloads.py +58 -0
  39. spiral/client.py +256 -0
  40. spiral/core/__init__.pyi +0 -0
  41. spiral/core/_tools/__init__.pyi +5 -0
  42. spiral/core/authn/__init__.pyi +21 -0
  43. spiral/core/client/__init__.pyi +285 -0
  44. spiral/core/config/__init__.pyi +35 -0
  45. spiral/core/expr/__init__.pyi +15 -0
  46. spiral/core/expr/images/__init__.pyi +3 -0
  47. spiral/core/expr/list_/__init__.pyi +4 -0
  48. spiral/core/expr/refs/__init__.pyi +4 -0
  49. spiral/core/expr/str_/__init__.pyi +3 -0
  50. spiral/core/expr/struct_/__init__.pyi +6 -0
  51. spiral/core/expr/text/__init__.pyi +5 -0
  52. spiral/core/expr/udf/__init__.pyi +14 -0
  53. spiral/core/expr/video/__init__.pyi +3 -0
  54. spiral/core/table/__init__.pyi +141 -0
  55. spiral/core/table/manifests/__init__.pyi +35 -0
  56. spiral/core/table/metastore/__init__.pyi +58 -0
  57. spiral/core/table/spec/__init__.pyi +215 -0
  58. spiral/dataloader.py +299 -0
  59. spiral/dataset.py +264 -0
  60. spiral/datetime_.py +27 -0
  61. spiral/debug/__init__.py +0 -0
  62. spiral/debug/manifests.py +87 -0
  63. spiral/debug/metrics.py +56 -0
  64. spiral/debug/scan.py +266 -0
  65. spiral/enrichment.py +306 -0
  66. spiral/expressions/__init__.py +274 -0
  67. spiral/expressions/base.py +167 -0
  68. spiral/expressions/file.py +17 -0
  69. spiral/expressions/http.py +17 -0
  70. spiral/expressions/list_.py +68 -0
  71. spiral/expressions/s3.py +16 -0
  72. spiral/expressions/str_.py +39 -0
  73. spiral/expressions/struct.py +59 -0
  74. spiral/expressions/text.py +62 -0
  75. spiral/expressions/tiff.py +222 -0
  76. spiral/expressions/udf.py +60 -0
  77. spiral/grpc_.py +32 -0
  78. spiral/iceberg.py +31 -0
  79. spiral/iterable_dataset.py +106 -0
  80. spiral/key_space_index.py +44 -0
  81. spiral/project.py +227 -0
  82. spiral/protogen/_/__init__.py +0 -0
  83. spiral/protogen/_/arrow/__init__.py +0 -0
  84. spiral/protogen/_/arrow/flight/__init__.py +0 -0
  85. spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
  86. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
  87. spiral/protogen/_/google/__init__.py +0 -0
  88. spiral/protogen/_/google/protobuf/__init__.py +2310 -0
  89. spiral/protogen/_/message_pool.py +3 -0
  90. spiral/protogen/_/py.typed +0 -0
  91. spiral/protogen/_/scandal/__init__.py +190 -0
  92. spiral/protogen/_/spfs/__init__.py +72 -0
  93. spiral/protogen/_/spql/__init__.py +61 -0
  94. spiral/protogen/_/substrait/__init__.py +6196 -0
  95. spiral/protogen/_/substrait/extensions/__init__.py +169 -0
  96. spiral/protogen/__init__.py +0 -0
  97. spiral/protogen/util.py +41 -0
  98. spiral/py.typed +0 -0
  99. spiral/scan.py +363 -0
  100. spiral/server.py +17 -0
  101. spiral/settings.py +36 -0
  102. spiral/snapshot.py +56 -0
  103. spiral/streaming_/__init__.py +3 -0
  104. spiral/streaming_/reader.py +133 -0
  105. spiral/streaming_/stream.py +157 -0
  106. spiral/substrait_.py +274 -0
  107. spiral/table.py +224 -0
  108. spiral/text_index.py +17 -0
  109. spiral/transaction.py +155 -0
  110. spiral/types_.py +6 -0
@@ -0,0 +1,58 @@
1
+ from typing import Annotated
2
+
3
+ import pyperclip
4
+ import questionary
5
+ from questionary import Choice
6
+ from typer import Argument, Option
7
+
8
+ from spiral.api.workloads import CreateWorkloadRequest, IssueWorkloadCredentialsResponse, Workload
9
+ from spiral.cli import CONSOLE, ERR_CONSOLE, AsyncTyper, printer, state
10
+ from spiral.cli.types import ProjectArg
11
+
12
+ app = AsyncTyper()
13
+
14
+
15
+ @app.command(help="Create a new workload.")
16
+ def create(
17
+ project: ProjectArg,
18
+ name: Annotated[str | None, Option(help="Friendly name for the workload.")] = None,
19
+ ):
20
+ res = state.spiral.api.workload.create(project, CreateWorkloadRequest(name=name))
21
+ CONSOLE.print(f"Created workload {res.workload.id}")
22
+
23
+
24
+ @app.command(help="List workloads.")
25
+ def ls(
26
+ project: ProjectArg,
27
+ ):
28
+ workloads = list(state.spiral.api.workload.list(project))
29
+ CONSOLE.print(printer.table_of_models(Workload, workloads, fields=["id", "project_id", "name"]))
30
+
31
+
32
+ @app.command(help="Issue new workflow credentials.")
33
+ def issue_credentials(workload_id: Annotated[str, Argument(help="Workload ID.")]):
34
+ res: IssueWorkloadCredentialsResponse = state.spiral.api.workload.issue_credentials(workload_id)
35
+
36
+ while True:
37
+ choice = questionary.select(
38
+ "What would you like to do with the secret? You will not be able to see this secret again!",
39
+ choices=[
40
+ Choice(title="Copy to clipboard", value=1),
41
+ Choice(title="Print to console", value=2),
42
+ Choice(title="Exit", value=3),
43
+ ],
44
+ ).ask()
45
+
46
+ if choice == 1:
47
+ pyperclip.copy(res.client_secret)
48
+ CONSOLE.print("[green]Secret copied to clipboard![/green]")
49
+ break
50
+ elif choice == 2:
51
+ CONSOLE.print(f"[green]Token Secret:[/green] {res.client_secret}")
52
+ break
53
+ elif choice == 3:
54
+ break
55
+ else:
56
+ ERR_CONSOLE.print("Invalid choice. Please try again.")
57
+
58
+ CONSOLE.print(f"[green]Token ID:[/green] {res.client_id}")
spiral/client.py ADDED
@@ -0,0 +1,256 @@
1
+ from datetime import datetime, timedelta
2
+ from typing import TYPE_CHECKING
3
+
4
+ import jwt
5
+ import pyarrow as pa
6
+
7
+ from spiral.api import SpiralAPI
8
+ from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
+ from spiral.core.authn import Authn
10
+ from spiral.core.client import Internal, KeyColumns, Shard
11
+ from spiral.core.client import Spiral as CoreSpiral
12
+ from spiral.core.config import ClientSettings
13
+ from spiral.datetime_ import timestamp_micros
14
+ from spiral.expressions import ExprLike
15
+ from spiral.scan import Scan
16
+
17
+ if TYPE_CHECKING:
18
+ from spiral.iceberg import Iceberg
19
+ from spiral.key_space_index import KeySpaceIndex
20
+ from spiral.project import Project
21
+ from spiral.table import Table
22
+ from spiral.text_index import TextIndex
23
+
24
+
25
+ class Spiral:
26
+ """Main client for interacting with the Spiral data platform.
27
+
28
+ Configuration is loaded with the following priority (highest to lowest):
29
+ 1. Explicit parameters.
30
+ 2. Environment variables (`SPIRAL__*`)
31
+ 3. Config file (`~/.spiral.toml`)
32
+ 4. Default values (production URLs)
33
+
34
+ Examples:
35
+ ```python
36
+ # Default configuration
37
+ client = Spiral()
38
+
39
+ # With config overrides
40
+ client = Spiral(overrides={"limits.concurrency": "16"})
41
+ ```
42
+
43
+ Args:
44
+ config: Custom ClientSettings object. Defaults to global settings.
45
+ overrides: Configuration overrides using dot notation,
46
+ see the [Client Configuration](/python-client.md) page for a full list.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ config: ClientSettings | None = None,
52
+ overrides: dict[str, str] | None = None,
53
+ ):
54
+ self._overrides = overrides
55
+ self._config = config
56
+ self._org = None
57
+ self._core = None
58
+ self._api = None
59
+
60
+ @property
61
+ def config(self) -> ClientSettings:
62
+ """Returns the client's configuration"""
63
+ return self.core.config()
64
+
65
+ @property
66
+ def authn(self) -> Authn:
67
+ """Get the authentication handler for this client."""
68
+ return self.core.authn()
69
+
70
+ @property
71
+ def api(self) -> SpiralAPI:
72
+ if self._api is None:
73
+ self._api = SpiralAPI(self.authn, base_url=self.config.server_url)
74
+ return self._api
75
+
76
+ @property
77
+ def core(self) -> CoreSpiral:
78
+ if self._core is None:
79
+ self._core = CoreSpiral(
80
+ config=self._config,
81
+ overrides=self._overrides,
82
+ )
83
+
84
+ return self._core
85
+
86
+ @property
87
+ def internal(self) -> Internal:
88
+ return self.core.internal(format=self.config.file_format)
89
+
90
+ @property
91
+ def organization(self) -> str:
92
+ if self._org is None:
93
+ token = self.authn.token()
94
+ if token is None:
95
+ raise ValueError("Authentication failed.")
96
+ token_payload = jwt.decode(token.expose_secret(), options={"verify_signature": False})
97
+ if "org_id" not in token_payload:
98
+ raise ValueError("Please create an organization.")
99
+ self._org = token_payload["org_id"]
100
+ return self._org
101
+
102
+ def list_projects(self) -> list["Project"]:
103
+ """List project IDs."""
104
+ from .project import Project
105
+
106
+ return [Project(self, project_id=p.id, name=p.name) for p in self.api.project.list()]
107
+
108
+ def create_project(
109
+ self,
110
+ id_prefix: str | None = None,
111
+ *,
112
+ name: str | None = None,
113
+ ) -> "Project":
114
+ """Create a project in the current, or given, organization."""
115
+ from .project import Project
116
+
117
+ res: CreateProjectResponse = self.api.project.create(CreateProjectRequest(id_prefix=id_prefix, name=name))
118
+ return Project(self, res.project.id, name=res.project.name)
119
+
120
+ def project(self, project_id: str) -> "Project":
121
+ """Open an existing project."""
122
+ from spiral.project import Project
123
+
124
+ # We avoid an API call since we'd just be fetching a human-readable name. Seems a waste in most cases.
125
+ return Project(self, project_id=project_id, name=project_id)
126
+
127
+ def table(self, table_id: str) -> "Table":
128
+ """Open a table using an ID."""
129
+ from spiral.table import Table
130
+
131
+ return Table(self, self.core.table(table_id))
132
+
133
+ def text_index(self, index_id: str) -> "TextIndex":
134
+ """Open a text index using an ID."""
135
+ from spiral.text_index import TextIndex
136
+
137
+ return TextIndex(self.core.text_index(index_id))
138
+
139
+ def key_space_index(self, index_id: str) -> "KeySpaceIndex":
140
+ """Open a key space index using an ID."""
141
+ from spiral.key_space_index import KeySpaceIndex
142
+
143
+ return KeySpaceIndex(self.core.key_space_index(index_id))
144
+
145
+ def scan(
146
+ self,
147
+ *projections: ExprLike,
148
+ where: ExprLike | None = None,
149
+ asof: datetime | int | None = None,
150
+ _key_columns: KeyColumns | None = None,
151
+ ) -> Scan:
152
+ """Starts a read transaction on the Spiral.
153
+
154
+ Args:
155
+ projections: a set of expressions that return struct arrays.
156
+ where: a query expression to apply to the data.
157
+ asof: execute the scan on the version of the table as of the given timestamp.
158
+ """
159
+ from spiral import expressions as se
160
+
161
+ if isinstance(asof, datetime):
162
+ asof = timestamp_micros(asof)
163
+
164
+ # Combine all projections into a single struct.
165
+ if not projections:
166
+ raise ValueError("At least one projection is required.")
167
+ projection = se.merge(*projections)
168
+ if where is not None:
169
+ where = se.lift(where)
170
+
171
+ return Scan(
172
+ self,
173
+ self.core.scan(
174
+ projection.__expr__, filter=where.__expr__ if where else None, asof=asof, key_columns=_key_columns
175
+ ),
176
+ )
177
+
178
+ # TODO(marko): This should be query, and search should be query + scan.
179
+ def search(
180
+ self,
181
+ top_k: int,
182
+ *rank_by: ExprLike,
183
+ filters: ExprLike | None = None,
184
+ freshness_window: timedelta | None = None,
185
+ ) -> pa.RecordBatchReader:
186
+ """Queries the index with the given rank by and filters clauses. Returns a stream of scored keys.
187
+
188
+ Args:
189
+ top_k: The number of top results to return.
190
+ rank_by: Rank by expressions are combined for scoring.
191
+ See `se.text.find` and `se.text.boost` for scoring expressions.
192
+ filters: The `filters` expression is used to filter the results.
193
+ It must return a boolean value and use only conjunctions (ANDs). Expressions in filters
194
+ statement are considered either a `must` or `must_not` clause in search terminology.
195
+ freshness_window: If provided, the index will not be refreshed if its freshness does not exceed this window.
196
+ """
197
+ from spiral import expressions as se
198
+
199
+ if not rank_by:
200
+ raise ValueError("At least one rank by expression is required.")
201
+ rank_by = se.or_(*rank_by)
202
+ if filters is not None:
203
+ filters = se.lift(filters)
204
+
205
+ if freshness_window is None:
206
+ freshness_window = timedelta(seconds=0)
207
+ freshness_window_s = int(freshness_window.total_seconds())
208
+
209
+ return self.core.search(
210
+ top_k=top_k,
211
+ rank_by=rank_by.__expr__,
212
+ filters=filters.__expr__ if filters else None,
213
+ freshness_window_s=freshness_window_s,
214
+ )
215
+
216
+ def compute_shards(
217
+ self,
218
+ max_batch_size: int,
219
+ *projections: ExprLike,
220
+ where: ExprLike | None = None,
221
+ asof: datetime | int | None = None,
222
+ ) -> list[Shard]:
223
+ """Computes shards over the given projections and filter.
224
+
225
+ Args:
226
+ max_batch_size: The maximum number of rows per shard.
227
+ projections: a set of expressions that return struct arrays.
228
+ where: a query expression to apply to the data.
229
+ asof: execute the scan on the version of the table as of the given timestamp.
230
+ """
231
+ from spiral import expressions as se
232
+
233
+ if isinstance(asof, datetime):
234
+ asof = timestamp_micros(asof)
235
+
236
+ # Combine all projections into a single struct.
237
+ if not projections:
238
+ raise ValueError("At least one projection is required.")
239
+ projection = se.merge(*projections)
240
+ if where is not None:
241
+ where = se.lift(where)
242
+
243
+ return self.core.compute_shards(
244
+ max_batch_size, projection.__expr__, where.__expr__ if where else None, asof=asof
245
+ )
246
+
247
+ @property
248
+ def iceberg(self) -> "Iceberg":
249
+ """
250
+ Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
251
+ Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
252
+ and ACID transactions to your warehouse.
253
+ """
254
+ from spiral.iceberg import Iceberg
255
+
256
+ return Iceberg(self)
File without changes
@@ -0,0 +1,5 @@
1
+ from ..table.spec import Schema
2
+
3
+ def pretty_key(key: bytes, schema: Schema) -> str:
4
+ """Represent a key in a human-readable way."""
5
+ ...
@@ -0,0 +1,21 @@
1
+ from spiral.api.types import OrgId
2
+
3
+ class Token:
4
+ def __init__(self, value: str): ...
5
+ def expose_secret(self) -> str: ...
6
+
7
+ class Authn:
8
+ def token(self) -> Token | None: ...
9
+
10
+ class DeviceCodeAuth:
11
+ @staticmethod
12
+ def default() -> DeviceCodeAuth:
13
+ """Return the static device code instance."""
14
+ ...
15
+ def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
16
+ """Authenticate using device code flow."""
17
+ ...
18
+
19
+ def logout(self) -> None:
20
+ """Logout from the device authentication session."""
21
+ ...
@@ -0,0 +1,285 @@
1
+ from enum import Enum
2
+ from typing import Any, Literal
3
+
4
+ import pyarrow as pa
5
+ from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableId, TableName
6
+ from spiral.core.authn import Authn
7
+ from spiral.core.config import ClientSettings
8
+ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
9
+ from spiral.core.table.spec import ColumnGroup, Schema
10
+ from spiral.expressions import Expr
11
+
12
+ # Only for typing, the actual definition is in Rust.
13
+ class KeyColumns(Enum):
14
+ IfProjected = 0
15
+ Included = 1
16
+ Only = 2
17
+
18
+ class Spiral:
19
+ """A client for Spiral database"""
20
+ def __init__(
21
+ self,
22
+ config: ClientSettings | None = None,
23
+ overrides: dict[str, str] | None = None,
24
+ ):
25
+ """Initialize the Spiral client.
26
+
27
+ Args:
28
+ config: Client configuration, defaults to the global config.
29
+ overrides: Configuration overrides using dot notation,
30
+ see the [Client Configuration](/python-client) page for a full list.
31
+ """
32
+ ...
33
+
34
+ def authn(self) -> Authn:
35
+ """Get the current authentication context."""
36
+ ...
37
+
38
+ def scan(
39
+ self,
40
+ projection: Expr,
41
+ filter: Expr | None = None,
42
+ asof: int | None = None,
43
+ key_columns: KeyColumns | None = None,
44
+ ) -> Scan:
45
+ """Construct a table scan."""
46
+ ...
47
+
48
+ def load_scan(self, plan_state: ScanState) -> Scan:
49
+ """Load a scan from a serialized scan state."""
50
+ ...
51
+
52
+ def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
53
+ """Being a table transaction."""
54
+ ...
55
+
56
+ def search(
57
+ self,
58
+ top_k: int,
59
+ rank_by: Expr,
60
+ *,
61
+ filters: Expr | None = None,
62
+ freshness_window_s: int | None = None,
63
+ ) -> pa.RecordBatchReader:
64
+ """Search an index.
65
+
66
+ Searching an index returns a stream of record batches that match table's key schema + float score column.
67
+ """
68
+ ...
69
+
70
+ def table(self, table_id: str) -> Table:
71
+ """Get a table."""
72
+ ...
73
+
74
+ def create_table(
75
+ self,
76
+ project_id: ProjectId,
77
+ dataset: DatasetName,
78
+ table: TableName,
79
+ key_schema: Schema,
80
+ *,
81
+ root_uri: RootUri | None = None,
82
+ exist_ok: bool = False,
83
+ ) -> Table:
84
+ """Create a new table in the specified project."""
85
+ ...
86
+
87
+ def move_table(
88
+ self,
89
+ table_id: TableId,
90
+ new_dataset: DatasetName,
91
+ ):
92
+ """Move a table to a dataset in the same project."""
93
+ ...
94
+
95
+ def rename_table(
96
+ self,
97
+ table_id: TableId,
98
+ new_table: TableName,
99
+ ):
100
+ """Rename a table."""
101
+ ...
102
+
103
+ def text_index(self, index_id: str) -> TextIndex:
104
+ """Get a text index."""
105
+ ...
106
+
107
+ def create_text_index(
108
+ self,
109
+ project_id: ProjectId,
110
+ name: IndexName,
111
+ projection: Expr,
112
+ filter: Expr | None = None,
113
+ *,
114
+ root_uri: RootUri | None = None,
115
+ exist_ok: bool = False,
116
+ ) -> TextIndex:
117
+ """Create a new index in the specified project."""
118
+ ...
119
+
120
+ def key_space_index(self, index_id: str) -> KeySpaceIndex:
121
+ """Get a key space index."""
122
+ ...
123
+
124
+ def create_key_space_index(
125
+ self,
126
+ project_id: ProjectId,
127
+ name: IndexName,
128
+ granularity: int,
129
+ projection: Expr,
130
+ filter: Expr | None = None,
131
+ *,
132
+ root_uri: RootUri | None = None,
133
+ exist_ok: bool = False,
134
+ ) -> KeySpaceIndex:
135
+ """Create a new key space index in the specified project."""
136
+ ...
137
+
138
+ def compute_shards(
139
+ self,
140
+ max_batch_size: int,
141
+ projection: Expr,
142
+ filter: Expr | None = None,
143
+ asof: int | None = None,
144
+ ) -> list[Shard]:
145
+ """Constructs shards for a given projection (and filter).
146
+
147
+ Useful for distributing work.
148
+ """
149
+ ...
150
+
151
+ def internal(self, *, format: str | None = None) -> Internal:
152
+ """Internal client APIs. It can change without notice."""
153
+ ...
154
+
155
+ def config(self) -> ClientSettings:
156
+ """Client-side configuration."""
157
+ ...
158
+
159
+ class TextIndex:
160
+ id: str
161
+
162
+ class KeySpaceIndex:
163
+ id: str
164
+ table_id: str
165
+ granularity: int
166
+ projection: Expr
167
+ filter: Expr
168
+ asof: int
169
+
170
+ class Shard:
171
+ """A shard representing a partition of data.
172
+
173
+ Attributes:
174
+ key_range: The key range for this shard.
175
+ cardinality: The number of rows in this shard, if known.
176
+ """
177
+
178
+ key_range: KeyRange
179
+ cardinality: int | None
180
+
181
+ def __init__(self, key_range: KeyRange, cardinality: int | None): ...
182
+ def __getnewargs__(self) -> tuple[KeyRange, int | None]: ...
183
+
184
+ class ShuffleConfig:
185
+ """Configuration for within-shard sample shuffling.
186
+
187
+ This controls how samples are shuffled within a buffer, separate from
188
+ which shards to read (which is specified as a parameter to the scan).
189
+
190
+ Attributes:
191
+ buffer_size: Size of the buffer pool for shuffling samples.
192
+ seed: Random seed for reproducibility. If None, uses OS randomness.
193
+ max_batch_size: Maximum batch size for output chunks. If None,
194
+ defaults to max(1, buffer_size / 16).
195
+ """
196
+
197
+ buffer_size: int
198
+ seed: int | None
199
+ max_batch_size: int | None
200
+
201
+ def __init__(
202
+ self,
203
+ buffer_size: int,
204
+ *,
205
+ seed: int | None = None,
206
+ max_batch_size: int | None = None,
207
+ ): ...
208
+
209
+ class Internal:
210
+ def flush_wal(self, table: Table) -> None:
211
+ """
212
+ Flush the write-ahead log of the table.
213
+ """
214
+ ...
215
+ def compact_key_space(
216
+ self,
217
+ *,
218
+ table: Table,
219
+ mode: Literal["plan", "read", "write"] | None = None,
220
+ partition_bytes_min: int | None = None,
221
+ ):
222
+ """
223
+ Compact the key space of the table.
224
+ """
225
+ ...
226
+ def compact_column_group(
227
+ self,
228
+ table: Table,
229
+ column_group: ColumnGroup,
230
+ *,
231
+ mode: Literal["plan", "read", "write"] | None = None,
232
+ partition_bytes_min: int | None = None,
233
+ ):
234
+ """
235
+ Compact a column group in the table.
236
+ """
237
+ ...
238
+ def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
239
+ """
240
+ Index table changes up to the given snapshot.
241
+ """
242
+ ...
243
+ def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
244
+ """
245
+ Index table changes up to the given snapshot.
246
+ """
247
+ ...
248
+ def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
249
+ """
250
+ The key space state for the table.
251
+ """
252
+ ...
253
+ def column_group_state(
254
+ self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
255
+ ) -> ColumnGroupState:
256
+ """
257
+ The state the column group of the table.
258
+ """
259
+ ...
260
+ def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
261
+ """
262
+ The state of each column group of the table.
263
+ """
264
+ ...
265
+ def key_space_index_shards(self, index: KeySpaceIndex) -> list[Shard]:
266
+ """
267
+ Compute the scan shards from a key space index.
268
+ """
269
+ ...
270
+ def prepare_shard(
271
+ self,
272
+ output_path: str,
273
+ scan: Scan,
274
+ shard: Shard,
275
+ row_block_size: int = 8192,
276
+ ) -> None:
277
+ """
278
+ Prepare a shard locally. Used for `SpiralStream` integration with `streaming` which requires on-disk shards.
279
+ """
280
+ ...
281
+ def metrics(self) -> dict[str, Any]: ...
282
+
283
+ def flush_telemetry() -> None:
284
+ """Flush telemetry data to the configured exporter."""
285
+ ...
@@ -0,0 +1,35 @@
1
+ class ClientSettings:
2
+ """Client configuration loaded from ~/.spiral.toml and environment variables."""
3
+
4
+ @staticmethod
5
+ def load() -> ClientSettings:
6
+ """Load ClientSettings from ~/.spiral.toml and environment variables.
7
+
8
+ Configuration priority (highest to lowest):
9
+ 1. Environment variables (SPIRAL__*)
10
+ 2. Config file (~/.spiral.toml)
11
+ 3. Default values
12
+ """
13
+ ...
14
+
15
+ @property
16
+ def server_url(self) -> str:
17
+ """The Spiral API endpoint URL."""
18
+ ...
19
+
20
+ @property
21
+ def spfs_url(self) -> str:
22
+ """The SpFS endpoint URL."""
23
+ ...
24
+
25
+ @property
26
+ def file_format(self) -> str:
27
+ """File format for table storage (vortex or parquet)."""
28
+ ...
29
+
30
+ def to_json(self) -> str:
31
+ """Serialize to a JSON string"""
32
+ ...
33
+ @staticmethod
34
+ def from_json(json: str) -> ClientSettings:
35
+ """Deserialize from a JSON-formatted string"""
@@ -0,0 +1,15 @@
1
+ from pyarrow import Array, DataType, Scalar
2
+
3
+ class Expr:
4
+ """Low level expression class."""
5
+
6
+ def aux(name: str, data_type: DataType) -> Expr: ...
7
+
8
+ # Array is correct (there is no ArrayData), see the table here:
9
+ # https://arrow.apache.org/rust/arrow_pyarrow/index.html
10
+ def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
11
+ def not_(expr: Expr) -> Expr: ...
12
+ def is_null(expr: Expr) -> Expr: ...
13
+ def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
14
+ def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
15
+ def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def encode_(images: Expr, format: str) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def contains(list: Expr, expr: Expr) -> Expr: ...
4
+ def element_at(list: Expr, element: Expr) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def ref(expr: Expr, field: str | None) -> Expr: ...
4
+ def deref(expr: Expr, field: str | None) -> Expr: ...