pyspiral 0.8.2__cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pyspiral-0.8.2.dist-info/METADATA +53 -0
  2. pyspiral-0.8.2.dist-info/RECORD +114 -0
  3. pyspiral-0.8.2.dist-info/WHEEL +5 -0
  4. pyspiral-0.8.2.dist-info/entry_points.txt +3 -0
  5. spiral/__init__.py +55 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +411 -0
  8. spiral/api/__init__.py +78 -0
  9. spiral/api/admin.py +15 -0
  10. spiral/api/client.py +166 -0
  11. spiral/api/filesystems.py +134 -0
  12. spiral/api/key_space_indexes.py +23 -0
  13. spiral/api/organizations.py +77 -0
  14. spiral/api/projects.py +219 -0
  15. spiral/api/telemetry.py +19 -0
  16. spiral/api/text_indexes.py +56 -0
  17. spiral/api/types.py +23 -0
  18. spiral/api/workers.py +40 -0
  19. spiral/api/workloads.py +52 -0
  20. spiral/arrow_.py +202 -0
  21. spiral/cli/__init__.py +89 -0
  22. spiral/cli/__main__.py +4 -0
  23. spiral/cli/admin.py +14 -0
  24. spiral/cli/app.py +108 -0
  25. spiral/cli/console.py +95 -0
  26. spiral/cli/fs.py +76 -0
  27. spiral/cli/iceberg.py +97 -0
  28. spiral/cli/key_spaces.py +103 -0
  29. spiral/cli/login.py +25 -0
  30. spiral/cli/orgs.py +90 -0
  31. spiral/cli/printer.py +53 -0
  32. spiral/cli/projects.py +147 -0
  33. spiral/cli/state.py +7 -0
  34. spiral/cli/tables.py +203 -0
  35. spiral/cli/telemetry.py +17 -0
  36. spiral/cli/text.py +115 -0
  37. spiral/cli/types.py +50 -0
  38. spiral/cli/workloads.py +58 -0
  39. spiral/client.py +269 -0
  40. spiral/core/__init__.pyi +0 -0
  41. spiral/core/_tools/__init__.pyi +5 -0
  42. spiral/core/authn/__init__.pyi +21 -0
  43. spiral/core/client/__init__.pyi +258 -0
  44. spiral/core/config/__init__.pyi +35 -0
  45. spiral/core/expr/__init__.pyi +15 -0
  46. spiral/core/expr/images/__init__.pyi +3 -0
  47. spiral/core/expr/list_/__init__.pyi +4 -0
  48. spiral/core/expr/pushdown/__init__.pyi +3 -0
  49. spiral/core/expr/refs/__init__.pyi +4 -0
  50. spiral/core/expr/s3/__init__.pyi +3 -0
  51. spiral/core/expr/str_/__init__.pyi +3 -0
  52. spiral/core/expr/struct_/__init__.pyi +6 -0
  53. spiral/core/expr/text/__init__.pyi +5 -0
  54. spiral/core/expr/udf/__init__.pyi +14 -0
  55. spiral/core/expr/video/__init__.pyi +3 -0
  56. spiral/core/table/__init__.pyi +142 -0
  57. spiral/core/table/manifests/__init__.pyi +35 -0
  58. spiral/core/table/metastore/__init__.pyi +58 -0
  59. spiral/core/table/spec/__init__.pyi +214 -0
  60. spiral/dataloader.py +310 -0
  61. spiral/dataset.py +264 -0
  62. spiral/datetime_.py +27 -0
  63. spiral/debug/__init__.py +0 -0
  64. spiral/debug/manifests.py +87 -0
  65. spiral/debug/metrics.py +56 -0
  66. spiral/debug/scan.py +266 -0
  67. spiral/demo.py +84 -0
  68. spiral/enrichment.py +290 -0
  69. spiral/expressions/__init__.py +274 -0
  70. spiral/expressions/base.py +186 -0
  71. spiral/expressions/file.py +17 -0
  72. spiral/expressions/http.py +17 -0
  73. spiral/expressions/list_.py +77 -0
  74. spiral/expressions/pushdown.py +12 -0
  75. spiral/expressions/s3.py +16 -0
  76. spiral/expressions/str_.py +39 -0
  77. spiral/expressions/struct.py +59 -0
  78. spiral/expressions/text.py +62 -0
  79. spiral/expressions/tiff.py +225 -0
  80. spiral/expressions/udf.py +66 -0
  81. spiral/grpc_.py +32 -0
  82. spiral/iceberg.py +31 -0
  83. spiral/iterable_dataset.py +106 -0
  84. spiral/key_space_index.py +44 -0
  85. spiral/project.py +235 -0
  86. spiral/protogen/_/__init__.py +0 -0
  87. spiral/protogen/_/arrow/__init__.py +0 -0
  88. spiral/protogen/_/arrow/flight/__init__.py +0 -0
  89. spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
  90. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
  91. spiral/protogen/_/google/__init__.py +0 -0
  92. spiral/protogen/_/google/protobuf/__init__.py +2310 -0
  93. spiral/protogen/_/message_pool.py +3 -0
  94. spiral/protogen/_/py.typed +0 -0
  95. spiral/protogen/_/scandal/__init__.py +190 -0
  96. spiral/protogen/_/spfs/__init__.py +72 -0
  97. spiral/protogen/_/spql/__init__.py +61 -0
  98. spiral/protogen/_/substrait/__init__.py +6196 -0
  99. spiral/protogen/_/substrait/extensions/__init__.py +169 -0
  100. spiral/protogen/__init__.py +0 -0
  101. spiral/protogen/util.py +41 -0
  102. spiral/py.typed +0 -0
  103. spiral/scan.py +383 -0
  104. spiral/server.py +37 -0
  105. spiral/settings.py +36 -0
  106. spiral/snapshot.py +56 -0
  107. spiral/streaming_/__init__.py +3 -0
  108. spiral/streaming_/reader.py +133 -0
  109. spiral/streaming_/stream.py +156 -0
  110. spiral/substrait_.py +274 -0
  111. spiral/table.py +227 -0
  112. spiral/text_index.py +17 -0
  113. spiral/transaction.py +167 -0
  114. spiral/types_.py +6 -0
spiral/client.py ADDED
@@ -0,0 +1,269 @@
1
+ from datetime import datetime, timedelta
2
+ from typing import TYPE_CHECKING
3
+
4
+ import jwt
5
+ import pyarrow as pa
6
+
7
+ from spiral.api import SpiralAPI
8
+ from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
+ from spiral.core.authn import Authn
10
+ from spiral.core.client import Internal, Shard
11
+ from spiral.core.client import Spiral as CoreSpiral
12
+ from spiral.core.config import ClientSettings
13
+ from spiral.datetime_ import timestamp_micros
14
+ from spiral.expressions import ExprLike
15
+ from spiral.scan import Scan
16
+
17
+ if TYPE_CHECKING:
18
+ from spiral.iceberg import Iceberg
19
+ from spiral.key_space_index import KeySpaceIndex
20
+ from spiral.project import Project
21
+ from spiral.table import Table
22
+ from spiral.text_index import TextIndex
23
+
24
+
25
+ class Spiral:
26
+ """Main client for interacting with the Spiral data platform.
27
+
28
+ Configuration is loaded with the following priority (highest to lowest):
29
+ 1. Explicit parameters.
30
+ 2. Environment variables (`SPIRAL__*`)
31
+ 3. Config file (`~/.spiral.toml`)
32
+ 4. Default values (production URLs)
33
+
34
+ Examples:
35
+
36
+ ```python
37
+ import spiral
38
+ # Default configuration
39
+ sp = spiral.Spiral()
40
+
41
+ # With config overrides
42
+ sp = spiral.Spiral(overrides={"limits.concurrency": "16"})
43
+ ```
44
+
45
+ Args:
46
+ config: Custom ClientSettings object. Defaults to global settings.
47
+ overrides: Configuration overrides using dot notation,
48
+ see the [Client Configuration](https://docs.spiraldb.com/config) page for a full list.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ *,
54
+ config: ClientSettings | None = None,
55
+ overrides: dict[str, str] | None = None,
56
+ ):
57
+ self._overrides = overrides
58
+ self._config = config
59
+ self._org = None
60
+ self._core = None
61
+ self._api = None
62
+
63
+ @property
64
+ def config(self) -> ClientSettings:
65
+ """Returns the client's configuration"""
66
+ return self.core.config()
67
+
68
+ @property
69
+ def authn(self) -> Authn:
70
+ """Get the authentication handler for this client."""
71
+ return self.core.authn()
72
+
73
+ @property
74
+ def api(self) -> SpiralAPI:
75
+ if self._api is None:
76
+ self._api = SpiralAPI(self.authn, base_url=self.config.server_url)
77
+ return self._api
78
+
79
+ @property
80
+ def core(self) -> CoreSpiral:
81
+ if self._core is None:
82
+ self._core = CoreSpiral(
83
+ config=self._config,
84
+ overrides=self._overrides,
85
+ )
86
+
87
+ return self._core
88
+
89
+ @property
90
+ def internal(self) -> Internal:
91
+ return self.core.internal(format=self.config.file_format)
92
+
93
+ @property
94
+ def organization(self) -> str:
95
+ if self._org is None:
96
+ token = self.authn.token()
97
+ if token is None:
98
+ raise ValueError("Authentication failed.")
99
+ token_payload = jwt.decode(token.expose_secret(), options={"verify_signature": False})
100
+ if "org_id" not in token_payload:
101
+ raise ValueError("Please create an organization.")
102
+ self._org = token_payload["org_id"]
103
+ return self._org
104
+
105
+ def list_projects(self) -> list["Project"]:
106
+ """List project IDs."""
107
+ from .project import Project
108
+
109
+ return [Project(self, project_id=p.id, name=p.name) for p in self.api.project.list()]
110
+
111
+ def create_project(
112
+ self,
113
+ id_prefix: str | None = None,
114
+ *,
115
+ name: str | None = None,
116
+ ) -> "Project":
117
+ """Create a project in the current, or given, organization."""
118
+ from .project import Project
119
+
120
+ res: CreateProjectResponse = self.api.project.create(CreateProjectRequest(id_prefix=id_prefix, name=name))
121
+ return Project(self, res.project.id, name=res.project.name)
122
+
123
+ def project(self, project_id: str) -> "Project":
124
+ """Open an existing project."""
125
+ from spiral.project import Project
126
+
127
+ # We avoid an API call since we'd just be fetching a human-readable name. Seems a waste in most cases.
128
+ return Project(self, project_id=project_id, name=project_id)
129
+
130
+ def table(self, table_id: str) -> "Table":
131
+ """Open a table using an ID."""
132
+ from spiral.table import Table
133
+
134
+ return Table(self, self.core.table(table_id))
135
+
136
+ def text_index(self, index_id: str) -> "TextIndex":
137
+ """Open a text index using an ID."""
138
+ from spiral.text_index import TextIndex
139
+
140
+ return TextIndex(self.core.text_index(index_id))
141
+
142
+ def key_space_index(self, index_id: str) -> "KeySpaceIndex":
143
+ """Open a key space index using an ID."""
144
+ from spiral.key_space_index import KeySpaceIndex
145
+
146
+ return KeySpaceIndex(self.core.key_space_index(index_id))
147
+
148
+ def scan(
149
+ self,
150
+ *projections: ExprLike,
151
+ where: ExprLike | None = None,
152
+ asof: datetime | int | None = None,
153
+ ) -> Scan:
154
+ """Starts a read transaction on the Spiral.
155
+
156
+ Args:
157
+ projections: a set of expressions that return struct arrays.
158
+ where: a query expression to apply to the data.
159
+ asof: execute the scan on the version of the table as of the given timestamp.
160
+ """
161
+ from spiral import expressions as se
162
+
163
+ if isinstance(asof, datetime):
164
+ asof = timestamp_micros(asof)
165
+
166
+ # Combine all projections into a single struct.
167
+ if not projections:
168
+ raise ValueError("At least one projection is required.")
169
+ projection = se.merge(*projections)
170
+ if where is not None:
171
+ where = se.lift(where)
172
+
173
+ return Scan(
174
+ self,
175
+ self.core.scan(projection.__expr__, filter=where.__expr__ if where else None, asof=asof),
176
+ )
177
+
178
+ # TODO(marko): This should be query, and search should be query + scan.
179
+ def search(
180
+ self,
181
+ top_k: int,
182
+ *rank_by: ExprLike,
183
+ filters: ExprLike | None = None,
184
+ freshness_window: timedelta | None = None,
185
+ ) -> pa.RecordBatchReader:
186
+ """Queries the index with the given rank by and filters clauses. Returns a stream of scored keys.
187
+
188
+ Args:
189
+ top_k: The number of top results to return.
190
+ rank_by: Rank by expressions are combined for scoring.
191
+ See `se.text.find` and `se.text.boost` for scoring expressions.
192
+ filters: The `filters` expression is used to filter the results.
193
+ It must return a boolean value and use only conjunctions (ANDs). Expressions in filters
194
+ statement are considered either a `must` or `must_not` clause in search terminology.
195
+ freshness_window: If provided, the index will not be refreshed if its freshness does not exceed this window.
196
+ """
197
+ from spiral import expressions as se
198
+
199
+ if not rank_by:
200
+ raise ValueError("At least one rank by expression is required.")
201
+ rank_by = se.or_(*rank_by)
202
+ if filters is not None:
203
+ filters = se.lift(filters)
204
+
205
+ if freshness_window is None:
206
+ freshness_window = timedelta(seconds=0)
207
+ freshness_window_s = int(freshness_window.total_seconds())
208
+
209
+ return self.core.search(
210
+ top_k=top_k,
211
+ rank_by=rank_by.__expr__,
212
+ filters=filters.__expr__ if filters else None,
213
+ freshness_window_s=freshness_window_s,
214
+ )
215
+
216
+ def resume_scan(self, state_json: str) -> Scan:
217
+ """Resumes a previously started scan using its scan state.
218
+
219
+ Args:
220
+ state_json: The scan state returned by a previous scan.
221
+ """
222
+ from spiral.core.table import ScanState
223
+
224
+ state = ScanState.from_json(state_json)
225
+ return Scan(self, self.core.load_scan(state))
226
+
227
+ def compute_shards(
228
+ self,
229
+ max_batch_size: int,
230
+ *projections: ExprLike,
231
+ where: ExprLike | None = None,
232
+ asof: datetime | int | None = None,
233
+ stream: bool = False,
234
+ ) -> list[Shard]:
235
+ """Computes shards over the given projections and filter.
236
+
237
+ Args:
238
+ max_batch_size: The maximum number of rows per shard.
239
+ projections: a set of expressions that return struct arrays.
240
+ where: a query expression to apply to the data.
241
+ asof: execute the scan on the version of the table as of the given timestamp.
242
+ stream: if true, builds shards in a streaming fashion, suitable for very large tables.
243
+ """
244
+ from spiral import expressions as se
245
+
246
+ if isinstance(asof, datetime):
247
+ asof = timestamp_micros(asof)
248
+
249
+ # Combine all projections into a single struct.
250
+ if not projections:
251
+ raise ValueError("At least one projection is required.")
252
+ projection = se.merge(*projections)
253
+ if where is not None:
254
+ where = se.lift(where)
255
+
256
+ return self.core.compute_shards(
257
+ max_batch_size, projection.__expr__, where.__expr__ if where else None, asof=asof, stream=stream
258
+ )
259
+
260
+ @property
261
+ def iceberg(self) -> "Iceberg":
262
+ """
263
+ Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
264
+ Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
265
+ and ACID transactions to your warehouse.
266
+ """
267
+ from spiral.iceberg import Iceberg
268
+
269
+ return Iceberg(self)
File without changes
@@ -0,0 +1,5 @@
1
+ from ..table.spec import Schema
2
+
3
+ def pretty_key(key: bytes, schema: Schema) -> str:
4
+ """Represent a key in a human-readable way."""
5
+ ...
@@ -0,0 +1,21 @@
1
+ from spiral.api.types import OrgId
2
+
3
+ class Token:
4
+ def __init__(self, value: str): ...
5
+ def expose_secret(self) -> str: ...
6
+
7
+ class Authn:
8
+ def token(self) -> Token | None: ...
9
+
10
+ class DeviceCodeAuth:
11
+ @staticmethod
12
+ def default() -> DeviceCodeAuth:
13
+ """Return the static device code instance."""
14
+ ...
15
+ def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
16
+ """Authenticate using device code flow."""
17
+ ...
18
+
19
+ def logout(self) -> None:
20
+ """Logout from the device authentication session."""
21
+ ...
@@ -0,0 +1,258 @@
1
+ from enum import Enum
2
+ from typing import Any
3
+
4
+ import pyarrow as pa
5
+ from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableId, TableName
6
+ from spiral.core.authn import Authn
7
+ from spiral.core.config import ClientSettings
8
+ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
9
+ from spiral.core.table.spec import ColumnGroup, Schema
10
+ from spiral.expressions import Expr
11
+
12
+ class Spiral:
13
+ """A client for Spiral database"""
14
+ def __init__(
15
+ self,
16
+ config: ClientSettings | None = None,
17
+ overrides: dict[str, str] | None = None,
18
+ ):
19
+ """Initialize the Spiral client.
20
+
21
+ Args:
22
+ config: Client configuration, defaults to the global config.
23
+ overrides: Configuration overrides using dot notation,
24
+ see the [Client Configuration](/python-client) page for a full list.
25
+ """
26
+ ...
27
+
28
+ def authn(self) -> Authn:
29
+ """Get the current authentication context."""
30
+ ...
31
+
32
+ def scan(
33
+ self,
34
+ projection: Expr,
35
+ filter: Expr | None = None,
36
+ asof: int | None = None,
37
+ key_columns: KeyColumns | None = None,
38
+ ) -> Scan:
39
+ """Construct a table scan."""
40
+ ...
41
+
42
+ def load_scan(self, plan_state: ScanState) -> Scan:
43
+ """Load a scan from a serialized scan state."""
44
+ ...
45
+
46
+ def transaction(self, table: Table, *, partition_max_bytes: int | None = None) -> Transaction:
47
+ """Being a table transaction."""
48
+ ...
49
+
50
+ def search(
51
+ self,
52
+ top_k: int,
53
+ rank_by: Expr,
54
+ *,
55
+ filters: Expr | None = None,
56
+ freshness_window_s: int | None = None,
57
+ ) -> pa.RecordBatchReader:
58
+ """Search an index.
59
+
60
+ Searching an index returns a stream of record batches that match table's key schema + float score column.
61
+ """
62
+ ...
63
+
64
+ def table(self, table_id: str) -> Table:
65
+ """Get a table."""
66
+ ...
67
+
68
+ def create_table(
69
+ self,
70
+ project_id: ProjectId,
71
+ dataset: DatasetName,
72
+ table: TableName,
73
+ key_schema: Schema,
74
+ *,
75
+ root_uri: RootUri | None = None,
76
+ exist_ok: bool = False,
77
+ ) -> Table:
78
+ """Create a new table in the specified project."""
79
+ ...
80
+
81
+ def move_table(
82
+ self,
83
+ table_id: TableId,
84
+ new_dataset: DatasetName,
85
+ ):
86
+ """Move a table to a dataset in the same project."""
87
+ ...
88
+
89
+ def rename_table(
90
+ self,
91
+ table_id: TableId,
92
+ new_table: TableName,
93
+ ):
94
+ """Rename a table."""
95
+ ...
96
+
97
+ def text_index(self, index_id: str) -> TextIndex:
98
+ """Get a text index."""
99
+ ...
100
+
101
+ def create_text_index(
102
+ self,
103
+ project_id: ProjectId,
104
+ name: IndexName,
105
+ projection: Expr,
106
+ filter: Expr | None = None,
107
+ *,
108
+ root_uri: RootUri | None = None,
109
+ exist_ok: bool = False,
110
+ ) -> TextIndex:
111
+ """Create a new index in the specified project."""
112
+ ...
113
+
114
+ def key_space_index(self, index_id: str) -> KeySpaceIndex:
115
+ """Get a key space index."""
116
+ ...
117
+
118
+ def create_key_space_index(
119
+ self,
120
+ project_id: ProjectId,
121
+ name: IndexName,
122
+ granularity: int,
123
+ projection: Expr,
124
+ filter: Expr | None = None,
125
+ *,
126
+ root_uri: RootUri | None = None,
127
+ exist_ok: bool = False,
128
+ ) -> KeySpaceIndex:
129
+ """Create a new key space index in the specified project."""
130
+ ...
131
+
132
+ def compute_shards(
133
+ self,
134
+ max_batch_size: int,
135
+ projection: Expr,
136
+ filter: Expr | None = None,
137
+ asof: int | None = None,
138
+ stream: bool = False,
139
+ ) -> list[Shard]:
140
+ """Constructs shards for a given projection (and filter).
141
+
142
+ Useful for distributing work.
143
+ """
144
+ ...
145
+
146
+ def internal(self, *, format: str | None = None) -> Internal:
147
+ """Internal client APIs. It can change without notice."""
148
+ ...
149
+
150
+ def config(self) -> ClientSettings:
151
+ """Client-side configuration."""
152
+ ...
153
+
154
+ class KeyColumns(Enum):
155
+ IfProjected = 0
156
+ Included = 1
157
+ Only = 2
158
+
159
+ class TextIndex:
160
+ id: str
161
+
162
+ class KeySpaceIndex:
163
+ id: str
164
+ table_id: str
165
+ granularity: int
166
+ projection: Expr
167
+ filter: Expr
168
+ asof: int
169
+
170
+ class Shard:
171
+ """A shard representing a partition of data.
172
+
173
+ Attributes:
174
+ key_range: The key range for this shard.
175
+ cardinality: The number of rows in this shard, if known.
176
+ """
177
+
178
+ key_range: KeyRange
179
+ cardinality: int | None
180
+
181
+ def __init__(self, key_range: KeyRange, cardinality: int | None): ...
182
+ def __getnewargs__(self) -> tuple[KeyRange, int | None]: ...
183
+
184
+ class ShuffleConfig:
185
+ """Configuration for within-shard sample shuffling.
186
+
187
+ This controls how samples are shuffled within a buffer, separate from
188
+ which shards to read (which is specified as a parameter to the scan).
189
+
190
+ Attributes:
191
+ buffer_size: Size of the buffer pool for shuffling samples.
192
+ seed: Random seed for reproducibility. If None, uses OS randomness.
193
+ """
194
+
195
+ buffer_size: int
196
+ seed: int | None
197
+
198
+ def __init__(
199
+ self,
200
+ buffer_size: int,
201
+ *,
202
+ seed: int | None = None,
203
+ ): ...
204
+
205
+ class Internal:
206
+ def flush_wal(self, table: Table) -> None:
207
+ """
208
+ Flush the write-ahead log of the table.
209
+ """
210
+ ...
211
+ def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
212
+ """
213
+ Index table changes up to the given snapshot.
214
+ """
215
+ ...
216
+ def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
217
+ """
218
+ Index table changes up to the given snapshot.
219
+ """
220
+ ...
221
+ def key_space_state(self, snapshot: Snapshot) -> KeySpaceState:
222
+ """
223
+ The key space state for the table.
224
+ """
225
+ ...
226
+ def column_group_state(
227
+ self, snapshot: Snapshot, key_space_state: KeySpaceState, column_group: ColumnGroup
228
+ ) -> ColumnGroupState:
229
+ """
230
+ The state the column group of the table.
231
+ """
232
+ ...
233
+ def column_groups_states(self, snapshot: Snapshot, key_space_state: KeySpaceState) -> list[ColumnGroupState]:
234
+ """
235
+ The state of each column group of the table.
236
+ """
237
+ ...
238
+ def key_space_index_shards(self, index: KeySpaceIndex) -> list[Shard]:
239
+ """
240
+ Compute the scan shards from a key space index.
241
+ """
242
+ ...
243
+ def prepare_shard(
244
+ self,
245
+ output_path: str,
246
+ scan: Scan,
247
+ shard: Shard,
248
+ row_block_size: int = 8192,
249
+ ) -> None:
250
+ """
251
+ Prepare a shard locally. Used for `SpiralStream` integration with `streaming` which requires on-disk shards.
252
+ """
253
+ ...
254
+ def metrics(self) -> dict[str, Any]: ...
255
+
256
+ def flush_telemetry() -> None:
257
+ """Flush telemetry data to the configured exporter."""
258
+ ...
@@ -0,0 +1,35 @@
1
+ class ClientSettings:
2
+ """Client configuration loaded from ~/.spiral.toml and environment variables."""
3
+
4
+ @staticmethod
5
+ def load() -> ClientSettings:
6
+ """Load ClientSettings from ~/.spiral.toml and environment variables.
7
+
8
+ Configuration priority (highest to lowest):
9
+ 1. Environment variables (SPIRAL__*)
10
+ 2. Config file (~/.spiral.toml)
11
+ 3. Default values
12
+ """
13
+ ...
14
+
15
+ @property
16
+ def server_url(self) -> str:
17
+ """The Spiral API endpoint URL."""
18
+ ...
19
+
20
+ @property
21
+ def spfs_url(self) -> str:
22
+ """The SpFS endpoint URL."""
23
+ ...
24
+
25
+ @property
26
+ def file_format(self) -> str:
27
+ """File format for table storage (vortex or parquet)."""
28
+ ...
29
+
30
+ def to_json(self) -> str:
31
+ """Serialize to a JSON string"""
32
+ ...
33
+ @staticmethod
34
+ def from_json(json: str) -> ClientSettings:
35
+ """Deserialize from a JSON-formatted string"""
@@ -0,0 +1,15 @@
1
+ from pyarrow import Array, DataType, Scalar
2
+
3
+ class Expr:
4
+ """Low level expression class."""
5
+
6
+ def aux(name: str, data_type: DataType) -> Expr: ...
7
+
8
+ # Array is correct (there is no ArrayData), see the table here:
9
+ # https://arrow.apache.org/rust/arrow_pyarrow/index.html
10
+ def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
11
+ def not_(expr: Expr) -> Expr: ...
12
+ def is_null(expr: Expr) -> Expr: ...
13
+ def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
14
+ def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
15
+ def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def encode_(images: Expr, format: str) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def contains(list: Expr, expr: Expr) -> Expr: ...
4
+ def element_at(list: Expr, element: Expr) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def expensive(expr: Expr) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def ref(expr: Expr, field: str | None) -> Expr: ...
4
+ def deref(expr: Expr, field: str | None) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def get(expr: Expr, abort_on_error: bool) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def substr(expr: Expr, begin: int, end: int | None) -> Expr: ...
@@ -0,0 +1,6 @@
1
+ from .. import Expr
2
+
3
+ def getitem(expr: Expr, item: str) -> Expr: ...
4
+ def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
5
+ def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
6
+ def merge(names: list[Expr]) -> Expr: ...
@@ -0,0 +1,5 @@
1
+ from .. import Expr
2
+
3
+ def field(expr: Expr, tokeneizer: str | None) -> Expr: ...
4
+ def find(expr: Expr, term: str) -> Expr: ...
5
+ def boost(expr: Expr, factor: float) -> Expr: ...
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+
3
+ from pyarrow import Array, DataType, Scalar
4
+
5
+ from .. import Expr
6
+
7
+ class UDF:
8
+ def __call__(self, args: list[Expr]) -> Expr: ...
9
+
10
+ def create(
11
+ name: str,
12
+ return_type: Callable[[tuple[DataType, ...]], DataType],
13
+ invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
14
+ ) -> UDF: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def read(expr: Expr, ranges: Expr, crops: Expr, format: str) -> Expr: ...