pyspiral 0.4.4__cp310-abi3-macosx_11_0_arm64.whl → 0.5.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.4.4
3
+ Version: 0.5.0
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -13,22 +13,23 @@ Classifier: Programming Language :: Python :: 3.13
13
13
  Classifier: Programming Language :: Rust
14
14
  Classifier: License :: Other/Proprietary License
15
15
  Requires-Dist: betterproto==2.0.0b7
16
+ Requires-Dist: datasets>=4.0.0
16
17
  Requires-Dist: google-re2>=1.1.20240702
17
18
  Requires-Dist: grpclib>=0.4.7
18
19
  Requires-Dist: hishel>=0.0.30
19
20
  Requires-Dist: httpx>=0.27.0
21
+ Requires-Dist: nanoid>=2.0.0
20
22
  Requires-Dist: numpy>=2
21
23
  Requires-Dist: pyarrow>=21.0.0
22
24
  Requires-Dist: pydantic-settings>=2.3.4
23
25
  Requires-Dist: pydantic[email]>=2.5.3
24
26
  Requires-Dist: pyjwt[crypto]>=2.9.0
27
+ Requires-Dist: pyperclip>=1.9.0
25
28
  Requires-Dist: questionary>=2.0.1
29
+ Requires-Dist: sqlglot[rs]>=25.25.1
26
30
  Requires-Dist: tqdm>=4.66.5
27
31
  Requires-Dist: typer>=0.16
28
32
  Requires-Dist: xxhash>=3.4.1
29
- Requires-Dist: nanoid>=2.0.0
30
- Requires-Dist: sqlglot[rs]>=25.25.1
31
- Requires-Dist: pyperclip>=1.9.0
32
33
  Requires-Dist: polars>=1.31.0 ; extra == 'polars'
33
34
  Requires-Dist: duckdb>=1.3.2 ; extra == 'duckdb'
34
35
  Requires-Dist: pyiceberg>=0.9.1 ; extra == 'pyiceberg'
@@ -1,17 +1,19 @@
1
- pyspiral-0.4.4.dist-info/METADATA,sha256=lVusDQ4LmIf3tRllkl2_hUNn1y0P_yMfNKhkRbpNLW0,1610
2
- pyspiral-0.4.4.dist-info/WHEEL,sha256=Mdosfxua6Dx1zYgObRH97e3wyiELqBbLtoRJj4RUSQE,103
3
- pyspiral-0.4.4.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
1
+ pyspiral-0.5.0.dist-info/METADATA,sha256=J_bR2LwG0i6M8wB0-60AXJm8MutIfqs9QUbaINmBeTI,1641
2
+ pyspiral-0.5.0.dist-info/WHEEL,sha256=Mdosfxua6Dx1zYgObRH97e3wyiELqBbLtoRJj4RUSQE,103
3
+ pyspiral-0.5.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
4
  spiral/__init__.py,sha256=Jv1vbcnnmcTsBLN5mSNjnX3ae4C_mgojXDSBFaqIhN0,208
5
- spiral/_lib.abi3.so,sha256=5-2gLMM7XOEgX6lXbIBu-_ucQsC5zUZWqZaH7oXkdSE,59883120
6
- spiral/adbc.py,sha256=HcvR60uQeEK2oggSAK6y5VYtIrACIiCQ-85MEf18EZc,14199
7
- spiral/api/__init__.py,sha256=_7BS1RhqEFjnt3XwFWZNCHVEQeSKpezPevAiGCsvDbE,1776
5
+ spiral/_lib.abi3.so,sha256=4J6RSb9LxUWqUnRZeyccSneyjqEFfIrJ6lMCnsNUqbc,60398416
6
+ spiral/adbc.py,sha256=RIIWBew7zPoQa_h3I-A-nX9cUMDM3D3Je0mqE9aDX9k,14885
7
+ spiral/api/__init__.py,sha256=nzZK3r1K2GSaqaLoIba4WuyUyG7ApjLZ4fyJovT1unA,2000
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
9
- spiral/api/client.py,sha256=9-L6T8niQAXo90jRxllJD4hXXmcGfHj7CW9X3XTYa5Q,4551
9
+ spiral/api/client.py,sha256=XQaOd1DgLTDOxW_uZUeTX5UCnUpPV1unjxbzWLlPJaM,4650
10
10
  spiral/api/filesystems.py,sha256=EA4iqhTeaIlvObvEUxHmZl0pQ24IOxUVWM3GPhFLw8o,4969
11
11
  spiral/api/organizations.py,sha256=B-8zZ7lFJANGK7dUNbo_aU-cgI959JBP9VcWb6wdgi0,1895
12
- spiral/api/projects.py,sha256=JBGof9A2Ivasu2jrULMjHBwlna0M8WRrTNqU-Es4GJ8,5673
12
+ spiral/api/projects.py,sha256=mvyp4tnUhItNnPnyWsj0WrSf2ca3mggL7KjXs5Fllco,5671
13
13
  spiral/api/telemetry.py,sha256=tfdA3E_EWJwFVxkQfkm8tiYGRubnx2LuE5nbfsk1oG4,474
14
- spiral/api/types.py,sha256=zx-BRKsi1GHg9aL9gMUaVQWYYMXJcP0A8OQUc7jSIAc,653
14
+ spiral/api/text_indexes.py,sha256=f8AcrYaBKsaka5A4okuExl96A-2rBbsgru98l_xogwo,1826
15
+ spiral/api/types.py,sha256=lGdiKViRgIEJXD2ubwnyEIEwHkfRumlZjVEaHMV3Tm8,682
16
+ spiral/api/workers.py,sha256=0wZNUHMioDT53P1OBJfpjyDfIodHwwT6858z2IlRIM4,636
15
17
  spiral/api/workloads.py,sha256=XAyXV7vgZcoyyoPoGvOT4jTpyFKFMvrrAfhL6d1h1kE,1748
16
18
  spiral/arrow_.py,sha256=T1LZ7bh9aMDbXfpUsf0dR0E1roTQyAYSgZ2mL4s8J_4,7681
17
19
  spiral/cli/__init__.py,sha256=ooAFz_iCpVCKHE0TiVElIynbP2PtTgD9cUw46Vh1lcw,2145
@@ -23,21 +25,24 @@ spiral/cli/fs.py,sha256=dVPoAoAbuQ9yJlfI-JiFgS9VdnPmeBMygVHgehJRj34,4367
23
25
  spiral/cli/iceberg/__init__.py,sha256=IQV_gwCFSj6Ubxs58VM9Pal1ymgG2bxdDgOPuk9E5bs,214
24
26
  spiral/cli/iceberg/namespaces.py,sha256=x9pvHlcXtcATYYjqimHa6CtkyL3taQUJ--ni_Bfoemc,1510
25
27
  spiral/cli/iceberg/tables.py,sha256=nSR4-t54otJfCmubB6vXnbOkbqPVGV0sHBlc-t9cIVg,1930
26
- spiral/cli/indexes/__init__.py,sha256=-USfxCIdckzZKBNQ-DXqe3V5ttWVo_Fsa1Mfcx5hdIw,467
28
+ spiral/cli/indexes/__init__.py,sha256=yNMBZh3kAz1NXsridvbY4-4jNIJAgntBhDnu1EfaZTI,1291
29
+ spiral/cli/indexes/args.py,sha256=B4zAFnVZKPHq-_z8qnHYwgs1v4c-w_iypbA5MdXL67s,1139
30
+ spiral/cli/indexes/workers.py,sha256=mu-7-Asz1txNtCkoei6q-m-lE5YhLUjCDwstKJcew40,2169
27
31
  spiral/cli/login.py,sha256=InKMnpV8NATW5RPgB3ZL-DSVPzUuUByyK4Fx7pZEgfg,607
28
32
  spiral/cli/orgs.py,sha256=V-4ZTT3FwFQLcs1-BenC8uCgvWOJcxkZPSdCPfsexhc,2848
29
33
  spiral/cli/printer.py,sha256=W83KAE-7meoDD1yRltLQrZqrA2olGapBGy_2USWkY08,1778
30
34
  spiral/cli/projects.py,sha256=TKXu_VzkIUccwXzdlg-wQMkrB-Py33g052NrbuJx-D4,5096
31
35
  spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
32
- spiral/cli/tables/__init__.py,sha256=lkGLDeU28IVnuxJdlYSUh6QSB9fQ4_1MeZJL73iXcHo,3660
36
+ spiral/cli/tables/__init__.py,sha256=DTxviiflWZkUDmPhuGzXmhW2mNRgPNkJcaP7olFtXpc,2678
37
+ spiral/cli/tables/args.py,sha256=bxKQoJuWhCTHlDYz_WRQ6_Kp9XgRBH1UIMgUMzlxliA,1262
33
38
  spiral/cli/telemetry.py,sha256=ABDCyV5QJGOIJp4AxvK0LG5xNPIysP37K5haL38T7P4,586
34
39
  spiral/cli/types.py,sha256=YG1eHhRLaqlVU_18DQBuF_YMsabhMZLBY0V9CvbSxjY,1369
35
40
  spiral/cli/workloads.py,sha256=SbxgwiBlX1AuqpOLV3gs7DFkH-Tbeend7qJTwq0Je84,1994
36
41
  spiral/client.py,sha256=K-OuMOTgYxOA9vef5jSANjmPRBfGrzQ65fg6Fd-rHMY,2683
37
42
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
43
  spiral/core/client/__init__.pyi,sha256=Tn1OJmkO1rQUsPE9BtfEyxIjoife6s16qOd8XiyHi2c,3475
39
- spiral/core/index/__init__.pyi,sha256=NPOG1ztFO6siBGpmJU3boRzX26xfxw--2TiCydosGvo,314
40
- spiral/core/table/__init__.pyi,sha256=agrxN1dYx--dte_edQOKgAXT8yPDeh_cHA8dYAOodbE,3290
44
+ spiral/core/index/__init__.pyi,sha256=MBq-jBuTmBreYMJ4AJFAe4e-ByRMM7JVssuEJMLVfQs,131
45
+ spiral/core/table/__init__.pyi,sha256=XhN9xpDdBnugRhtiP9ThC3DSADoLMDL9lxSjjY7fJ28,3296
41
46
  spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
42
47
  spiral/core/table/metastore/__init__.pyi,sha256=dMqySDnsjPUTBuFU2MaQGyocKEoGkWpeTQmUP2iIKbc,1880
43
48
  spiral/core/table/spec/__init__.pyi,sha256=D4GQp9RWwyLKTlRW7eDXcQE-xA5rF2iBcXZ8y7b48EE,5595
@@ -61,7 +66,7 @@ spiral/iceberg/__init__.py,sha256=jSIlTxWauAbJV5gsWglZisFbnfNNzLYN90scoYcdWzc,65
61
66
  spiral/iceberg/client.py,sha256=E6FyE_h2HLgDW1cAFg1XgglJr6rbVOCWjRtRmqoMVkM,1003
62
67
  spiral/indexes/__init__.py,sha256=TXLQ-_3xso3lFIp2lM58_ip9OPNwPKFv1FdsWiUF-d8,178
63
68
  spiral/indexes/client.py,sha256=NsFBILEHMjyCUruFrUEKucRQRrN4OvqgbL4pmzWs07g,5600
64
- spiral/indexes/index.py,sha256=4CmSFlZYp46B2CjqtiyZ7VF5EH3duiutz3nWFnyApLA,973
69
+ spiral/indexes/index.py,sha256=TJB1-hSiPNrzJ_VYFTZGHvIQ_vmoILPK7tyi_9oS1nA,638
65
70
  spiral/indexes/scan.py,sha256=B2m-UgNuawNB90HXK33GTQfMy2WLdNNxiiB6cIjFW2Y,697
66
71
  spiral/project.py,sha256=0uJ1Jb88Ie-cCNnSdX3QfFtCUqrjLka4zCm_TxCpVak,1189
67
72
  spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -83,16 +88,16 @@ spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
83
88
  spiral/settings.py,sha256=PIQV2ljtB3pEOWoMRVSRzSGJNrXviO2JBgZ5ZY_Nq2E,2794
84
89
  spiral/substrait_.py,sha256=2BYvwFGcCwJ0JXNhXOLdPuhM1PqFyaeSqFpQCtv-M4E,12581
85
90
  spiral/tables/__init__.py,sha256=iiP7BkHA117em37_e75jtdvoZC10xCXtld18gRnPbTw,430
86
- spiral/tables/client.py,sha256=l_wJJRf3BPD5lg4Q1Ll2lAqQIuBCnKwC6JtsAui91Tc,4915
91
+ spiral/tables/client.py,sha256=MVgfeVF7P4kXnjOTQExp0VmywyuuRQ2IZVxdwVY3xgk,5015
87
92
  spiral/tables/dataset.py,sha256=DuHeKVCJfXLsbxmde9QW6yvesW5uhswG6qAxV5X0ZgA,7890
88
93
  spiral/tables/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- spiral/tables/debug/manifests.py,sha256=E_-DiMBg2EPL97cl9hLWhiqEsFtjEBgh_C7jZy8EWYc,2594
94
+ spiral/tables/debug/manifests.py,sha256=t7E0AchHrzOv9vAQpE77Qp3rLc5VTzRRxlByt5OWnUM,2596
90
95
  spiral/tables/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
91
- spiral/tables/debug/scan.py,sha256=-IWX_UjO4QP9Hj7PtZ1rLlbswJcryOin56GT-exqFm4,8942
96
+ spiral/tables/debug/scan.py,sha256=EEG2gzbBpUyrtk4jQXh-ENk8aySrKf0CZMym_wHuWm4,8947
92
97
  spiral/tables/maintenance.py,sha256=7Xa2Jdu_OY1Qu6iN1sPVdywVZtk_Mv3EaC3G93cmQvI,305
93
98
  spiral/tables/scan.py,sha256=3lPf5fSyF1fHGdGJ-pvu5HxPWoonf_XL7neWTqzB-0I,7582
94
99
  spiral/tables/snapshot.py,sha256=2NTuVEp2uJ1pV3Q5tLj7FOzPSc9axlfb6uOITwHnj0g,2229
95
100
  spiral/tables/table.py,sha256=VM93Rsm67sJFendI1_VhlkFORIdBGfhCMBUBK4dve9I,4910
96
101
  spiral/tables/transaction.py,sha256=3a64R-mf_cmR54BNn8U-05jmWonp6Ivxhe6u01Dyjzo,1573
97
102
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
98
- pyspiral-0.4.4.dist-info/RECORD,,
103
+ pyspiral-0.5.0.dist-info/RECORD,,
spiral/_lib.abi3.so CHANGED
Binary file
spiral/adbc.py CHANGED
@@ -35,6 +35,7 @@ from spiral.protogen._.arrow.flight.protocol.sql import (
35
35
  SqlInfo,
36
36
  SqlSupportedTransaction,
37
37
  )
38
+ from spiral.tables import Snapshot
38
39
 
39
40
  log = logging.getLogger(__name__)
40
41
  logging.getLogger("sqlx").setLevel(logging.WARNING)
@@ -64,7 +65,6 @@ def debuggable(func):
64
65
  return wrapper_decorator
65
66
 
66
67
 
67
- # TODO(marko): This should work for Iceberg tables.
68
68
  class ADBCServerBase:
69
69
  def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
70
70
  """Default implementation that reports no support for any complex features."""
@@ -143,6 +143,17 @@ class SpiralADBCServer(ADBCServerBase):
143
143
 
144
144
  self.pool = ThreadPoolExecutor()
145
145
 
146
+ def open_snapshot(self, tbl) -> Snapshot:
147
+ """Open a table in the Spiral project and return it as a PyArrow Dataset."""
148
+ if tbl.catalog is None or tbl.catalog == "":
149
+ raise FlightError("Project (Data Catalog) must be specified to open a table.")
150
+
151
+ project = tbl.catalog
152
+ dataset = tbl.db or "default"
153
+ table = tbl.name
154
+
155
+ return self.sp.project(project).tables.table(f"{dataset}.{table}").snapshot()
156
+
146
157
  def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
147
158
  schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
148
159
 
@@ -170,15 +181,16 @@ class SpiralADBCServer(ADBCServerBase):
170
181
  if req.catalog == "":
171
182
  # Empty string means databases _without_ a catalog, which we don't support
172
183
  return
184
+ catalog = req.catalog
173
185
 
174
186
  # Otherwise, catalog is either the project ID, or None.
175
- if req.catalog is None:
187
+ if catalog is None:
176
188
  projects = self.sp.list_projects()
177
189
  else:
178
190
  projects = [self.sp.project(req.catalog)]
179
191
 
180
192
  for project in projects:
181
- datasets = {dt.dataset for dt in project.tables.list_tables()}
193
+ datasets = {tbl.dataset for tbl in project.tables.list_tables()}
182
194
 
183
195
  batch = pa.RecordBatch.from_arrays(
184
196
  [
@@ -219,6 +231,7 @@ class SpiralADBCServer(ADBCServerBase):
219
231
  projects = list(self.sp.list_projects())
220
232
  else:
221
233
  projects = [self.sp.project(req.catalog)]
234
+ projects = sorted(projects, key=lambda p: p.id)
222
235
 
223
236
  def _process_project(project):
224
237
  tables: list[TableResource] = project.tables.list_tables()
@@ -248,12 +261,13 @@ class SpiralADBCServer(ADBCServerBase):
248
261
  def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
249
262
  # Extract the tables from the query, and bring them into the Python locals scope.
250
263
  expr = sqlglot.parse_one(req.query, dialect="duckdb")
264
+ datasets = {}
251
265
  for tbl in expr.find_all(exp.Table):
252
266
  # We swap the three-part identifier out for a single identifier
253
- # This lets us insert a PyArrow Dataset into Python locals such that
254
- # DuckDB will pick up on it for the query.
255
- name = exp.table_name(tbl)
256
- locals()[name] = self.sp.tables.table(f"{tbl.catalog}.{tbl.db}.{tbl.name}").snapshot().to_dataset()
267
+ # This lets us register a PyArrow Dataset with DuckDB for the query.
268
+ snapshot = self.open_snapshot(tbl)
269
+ name = snapshot.table.table_id
270
+ datasets[name] = snapshot.to_dataset()
257
271
  tbl.replace(exp.table_(table=name))
258
272
 
259
273
  try:
@@ -262,7 +276,11 @@ class SpiralADBCServer(ADBCServerBase):
262
276
  raise FlightError("DuckDB is required for SQL queries.")
263
277
 
264
278
  try:
265
- sql = duckdb.sql(expr.sql(dialect="duckdb"))
279
+ # Create a DuckDB connection and register the datasets
280
+ conn = duckdb.connect()
281
+ for name, dataset in datasets.items():
282
+ conn.register(name, dataset)
283
+ sql = conn.sql(expr.sql(dialect="duckdb"))
266
284
  except Exception as e:
267
285
  raise FlightError(str(e))
268
286
 
spiral/api/__init__.py CHANGED
@@ -3,6 +3,8 @@ from typing import TYPE_CHECKING
3
3
 
4
4
  import httpx
5
5
 
6
+ from spiral.api.text_indexes import TextIndexesService
7
+
6
8
  from .client import _Client
7
9
 
8
10
  if TYPE_CHECKING:
@@ -57,6 +59,12 @@ class SpiralAPI:
57
59
 
58
60
  return WorkloadService(self.client)
59
61
 
62
+ @property
63
+ def text_indexes(self) -> "TextIndexesService":
64
+ from .text_indexes import TextIndexesService
65
+
66
+ return TextIndexesService(self.client)
67
+
60
68
  @property
61
69
  def telemetry(self) -> "TelemetryService":
62
70
  from .telemetry import TelemetryService
spiral/api/client.py CHANGED
@@ -146,6 +146,10 @@ class _Client:
146
146
  # Enrich the exception with the response body
147
147
  raise SpiralHTTPError(body=resp.text, code=resp.status_code) from e
148
148
 
149
+ if response_cls == type[None]:
150
+ assert resp.text == ""
151
+ return None
152
+
149
153
  return TypeAdapter(response_cls).validate_python(resp.json())
150
154
 
151
155
  def paged(
spiral/api/projects.py CHANGED
@@ -192,6 +192,6 @@ class ProjectService(ServiceBase):
192
192
  """Get a grant."""
193
193
  return self.client.get(f"/v1/grants/{grant_id}", Grant)
194
194
 
195
- def revoke_grant(self, grant_id: str) -> None:
195
+ def revoke_grant(self, grant_id: str):
196
196
  """Revoke a grant."""
197
- return self.client.delete(f"/v1/grants/{grant_id}", None)
197
+ return self.client.delete(f"/v1/grants/{grant_id}", type[None])
@@ -0,0 +1,56 @@
1
+ from pydantic import BaseModel
2
+
3
+ from .client import Paged, PagedResponse, ServiceBase
4
+ from .types import IndexId, ProjectId, WorkerId
5
+ from .workers import CPU, GcpRegion, Memory, ResourceClass
6
+
7
+
8
+ class TextSearchWorker(BaseModel):
9
+ worker_id: WorkerId
10
+ project_id: ProjectId
11
+ index_id: IndexId
12
+ url: str | None
13
+
14
+
15
+ class CreateWorkerRequest(BaseModel):
16
+ cpu: CPU
17
+ memory: Memory
18
+ region: GcpRegion
19
+
20
+
21
+ class CreateWorkerResponse(BaseModel):
22
+ worker_id: WorkerId
23
+
24
+
25
+ class SyncIndexRequest(BaseModel):
26
+ """Request to sync a text index."""
27
+
28
+ resources: ResourceClass
29
+
30
+
31
+ class SyncIndexResponse(BaseModel):
32
+ worker_id: WorkerId
33
+
34
+
35
+ class TextIndexesService(ServiceBase):
36
+ """Service for workload operations."""
37
+
38
+ def create_worker(self, index_id: IndexId, request: CreateWorkerRequest) -> CreateWorkerResponse:
39
+ """Create a new search worker."""
40
+ return self.client.post(f"/v1/text-indexes/{index_id}/workers", request, CreateWorkerResponse)
41
+
42
+ def list_workers(self, index_id: IndexId) -> Paged[WorkerId]:
43
+ """List text index workers for the given index."""
44
+ return self.client.paged(f"/v1/text-indexes/{index_id}/workers", PagedResponse[WorkerId])
45
+
46
+ def get_worker(self, worker_id: WorkerId) -> TextSearchWorker:
47
+ """Get a text index worker."""
48
+ return self.client.get(f"/v1/text-index-workers/{worker_id}", TextSearchWorker)
49
+
50
+ def shutdown_worker(self, worker_id: WorkerId) -> None:
51
+ """Shutdown a text index worker."""
52
+ return self.client.delete(f"/v1/text-index-workers/{worker_id}", type[None])
53
+
54
+ def sync_index(self, index_id: IndexId, request: SyncIndexRequest) -> SyncIndexResponse:
55
+ """Start a job to sync an index."""
56
+ return self.client.post(f"/v1/text-indexes/{index_id}/sync", request, SyncIndexResponse)
spiral/api/types.py CHANGED
@@ -13,6 +13,8 @@ UserId = str
13
13
  OrgId = str
14
14
  ProjectId = str
15
15
  RoleId = str
16
+ IndexId = str
17
+ WorkerId = str
16
18
 
17
19
  RootUri = Annotated[str, AfterValidator(_validate_root_uri)]
18
20
  DatasetName = Annotated[str, StringConstraints(max_length=128, pattern=r"^[a-zA-Z_][a-zA-Z0-9_-]+$")]
spiral/api/workers.py ADDED
@@ -0,0 +1,40 @@
1
+ from enum import Enum, IntEnum
2
+
3
+
4
+ class CPU(IntEnum):
5
+ ONE = 1
6
+ TWO = 2
7
+ FOUR = 4
8
+ EIGHT = 8
9
+
10
+ def __str__(self):
11
+ return str(self.value)
12
+
13
+
14
+ class Memory(str, Enum):
15
+ MB_512 = "512Mi"
16
+ GB_1 = "1Gi"
17
+ GB_2 = "2Gi"
18
+ GB_4 = "4Gi"
19
+ GB_8 = "8Gi"
20
+
21
+ def __str__(self):
22
+ return self.value
23
+
24
+
25
+ class GcpRegion(str, Enum):
26
+ US_EAST4 = "us-east4"
27
+ EUROPE_WEST4 = "europe-west4"
28
+
29
+ def __str__(self):
30
+ return self.value
31
+
32
+
33
+ class ResourceClass(str, Enum):
34
+ """Resource class for text index sync."""
35
+
36
+ SMALL = "small"
37
+ LARGE = "large"
38
+
39
+ def __str__(self):
40
+ return self.value
@@ -1,9 +1,18 @@
1
+ from typing import Annotated
2
+
1
3
  import rich
4
+ from typer import Option
2
5
 
6
+ from spiral.api.text_indexes import SyncIndexRequest
3
7
  from spiral.cli import AsyncTyper, state
8
+ from spiral.cli.indexes.args import get_text_index_id
4
9
  from spiral.cli.types import ProjectArg
5
10
 
11
+ from ...api.workers import ResourceClass
12
+ from . import workers
13
+
6
14
  app = AsyncTyper(short_help="Indexes.")
15
+ app.add_typer(workers.app, name="workers")
7
16
 
8
17
 
9
18
  @app.command(help="List indexes.")
@@ -17,3 +26,15 @@ def ls(
17
26
  for index in indexes:
18
27
  rich_table.add_row(index.id, index.name)
19
28
  rich.print(rich_table)
29
+
30
+
31
+ @app.command(help="Trigger a sync job for the index.")
32
+ def sync(
33
+ project: ProjectArg,
34
+ name: Annotated[str | None, Option(help="Index name.")] = None,
35
+ resources: Annotated[ResourceClass, Option(help="Resources to use for the sync job.")] = ResourceClass.SMALL,
36
+ ):
37
+ """Trigger a sync job for the index."""
38
+ index_id = get_text_index_id(project, name)
39
+ response = state.spiral.api.text_indexes.sync_index(index_id, SyncIndexRequest(resources=resources))
40
+ rich.print(f"Triggered sync job {response.worker_id} for index {index_id}.")
@@ -0,0 +1,39 @@
1
+ from typing import Annotated
2
+
3
+ import questionary
4
+ import rich
5
+ import typer
6
+ from questionary import Choice
7
+ from typer import Option
8
+
9
+ from spiral.api.projects import TextIndexResource
10
+ from spiral.api.types import IndexId
11
+ from spiral.cli import state
12
+ from spiral.cli.types import ProjectArg
13
+
14
+
15
+ def ask_index(project_id, title="Select an index"):
16
+ indexes: list[TextIndexResource] = list(state.spiral.project(project_id).indexes.list_indexes())
17
+
18
+ if not indexes:
19
+ rich.print("[red]No indexes found[/red]")
20
+ raise typer.Exit(1)
21
+
22
+ return questionary.select(
23
+ title,
24
+ choices=[Choice(title=index.name, value=index.id) for index in sorted(indexes, key=lambda t: (t.name, t.id))],
25
+ ).ask()
26
+
27
+
28
+ def get_text_index_id(
29
+ project: ProjectArg,
30
+ name: Annotated[str | None, Option(help="Index name.")] = None,
31
+ ) -> IndexId:
32
+ if name is None:
33
+ return ask_index(project)
34
+
35
+ indexes: list[TextIndexResource] = list(state.spiral.project(project).indexes.list_indexes())
36
+ for index in indexes:
37
+ if index.name == name:
38
+ return index.id
39
+ raise ValueError(f"Index not found: {name}")
@@ -0,0 +1,59 @@
1
+ from typing import Annotated
2
+
3
+ import rich
4
+ from typer import Option
5
+
6
+ from spiral.api.text_indexes import CreateWorkerRequest
7
+ from spiral.api.workers import CPU, GcpRegion, Memory
8
+ from spiral.cli import AsyncTyper, state
9
+ from spiral.cli.indexes.args import get_text_index_id
10
+ from spiral.cli.types import ProjectArg
11
+
12
+ app = AsyncTyper(short_help="Text Search Workers.")
13
+
14
+
15
+ @app.command(name="serve", help="Create a search worker.")
16
+ def serve(
17
+ project: ProjectArg,
18
+ index: Annotated[str | None, Option(help="Index name.")] = None,
19
+ region: Annotated[GcpRegion, Option(help="GCP region for the worker.")] = GcpRegion.US_EAST4,
20
+ cpu: Annotated[CPU, Option(help="CPU resources for the worker.")] = CPU.ONE,
21
+ memory: Annotated[Memory, Option(help="Memory resources for the worker in MB.")] = Memory.MB_512,
22
+ ):
23
+ """Create a new text search worker."""
24
+ index_id = get_text_index_id(project, index)
25
+ request = CreateWorkerRequest(cpu=cpu, memory=memory, region=region)
26
+ response = state.spiral.api.text_indexes.create_worker(index_id, request)
27
+ rich.print(f"Created worker {response.worker_id} for {index_id}.")
28
+
29
+
30
+ @app.command(name="shutdown", help="Shutdown a search worker.")
31
+ def shutdown(worker_id: str):
32
+ """Shutdown a worker."""
33
+ state.spiral.api.text_indexes.shutdown_worker(worker_id)
34
+ rich.print(f"Requested worker {worker_id} to shutdown.")
35
+
36
+
37
+ @app.command(name="ls", help="List search workers.")
38
+ def ls(
39
+ project: ProjectArg,
40
+ index: Annotated[str | None, Option(help="Index name.")] = None,
41
+ ):
42
+ """List text search workers."""
43
+ index_id = get_text_index_id(project, index)
44
+ worker_ids = state.spiral.api.text_indexes.list_workers(index_id)
45
+
46
+ rich_table = rich.table.Table("Worker ID", "URL", title=f"Text Search Workers for {index_id}")
47
+ for worker_id in worker_ids:
48
+ try:
49
+ worker = state.spiral.api.text_indexes.get_worker(worker_id)
50
+ rich_table.add_row(
51
+ worker_id,
52
+ worker.url,
53
+ )
54
+ except Exception:
55
+ rich_table.add_row(
56
+ worker_id,
57
+ "Unavailable",
58
+ )
59
+ rich.print(rich_table)
@@ -1,35 +1,16 @@
1
1
  from typing import Annotated
2
2
 
3
- import questionary
4
3
  import rich
5
- import typer
6
- from questionary import Choice
7
4
  from typer import Argument, Option
8
5
 
9
6
  from spiral import Spiral
10
- from spiral.cli import AsyncTyper, state
7
+ from spiral.cli import AsyncTyper
8
+ from spiral.cli.tables.args import get_table
11
9
  from spiral.cli.types import ProjectArg
12
- from spiral.tables import Table
13
10
 
14
11
  app = AsyncTyper(short_help="Spiral Tables.")
15
12
 
16
13
 
17
- def ask_table(project_id, title="Select a table"):
18
- tables = list(state.spiral.project(project_id).tables.list_tables())
19
-
20
- if not tables:
21
- rich.print("[red]No tables found[/red]")
22
- raise typer.Exit(1)
23
-
24
- return questionary.select(
25
- title,
26
- choices=[
27
- Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
28
- for table in tables
29
- ],
30
- ).ask()
31
-
32
-
33
14
  @app.command(help="List tables.")
34
15
  def ls(
35
16
  project: ProjectArg,
@@ -48,7 +29,7 @@ def key_schema(
48
29
  table: Annotated[str | None, Option(help="Table name.")] = None,
49
30
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
50
31
  ):
51
- _, table = _get_table(project, table, dataset)
32
+ _, table = get_table(project, table, dataset)
52
33
  rich.print(table.key_schema)
53
34
 
54
35
 
@@ -58,7 +39,7 @@ def schema(
58
39
  table: Annotated[str | None, Option(help="Table name.")] = None,
59
40
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
60
41
  ):
61
- _, table = _get_table(project, table, dataset)
42
+ _, table = get_table(project, table, dataset)
62
43
  rich.print(table.schema)
63
44
 
64
45
 
@@ -68,7 +49,7 @@ def flush(
68
49
  table: Annotated[str | None, Option(help="Table name.")] = None,
69
50
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
70
51
  ):
71
- identifier, table = _get_table(project, table, dataset)
52
+ identifier, table = get_table(project, table, dataset)
72
53
  table.maintenance().flush_wal()
73
54
  print(f"Flushed WAL for table {identifier} in project {project}.")
74
55
 
@@ -80,7 +61,7 @@ def debug(
80
61
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
81
62
  column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
82
63
  ):
83
- _, table = _get_table(project, table, dataset)
64
+ _, table = get_table(project, table, dataset)
84
65
  if column_group != ".":
85
66
  projection = table[column_group]
86
67
  else:
@@ -97,7 +78,7 @@ def manifests(
97
78
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
98
79
  column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
99
80
  ):
100
- _, table = _get_table(project, table, dataset)
81
+ _, table = get_table(project, table, dataset)
101
82
  if column_group != ".":
102
83
  projection = table[column_group]
103
84
  else:
@@ -105,17 +86,3 @@ def manifests(
105
86
  scan = projection.scan()
106
87
 
107
88
  scan._dump_manifests()
108
-
109
-
110
- def _get_table(
111
- project: ProjectArg,
112
- table: Annotated[str | None, Option(help="Table name.")] = None,
113
- dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
114
- ) -> (str, Table):
115
- if table is None:
116
- identifier = ask_table(project)
117
- else:
118
- identifier = table
119
- if dataset is not None:
120
- identifier = f"{dataset}.{table}"
121
- return identifier, state.spiral.project(project).tables.table(identifier)
@@ -0,0 +1,42 @@
1
+ from typing import Annotated
2
+
3
+ import questionary
4
+ import rich
5
+ import typer
6
+ from questionary import Choice
7
+ from typer import Option
8
+
9
+ from spiral.api.projects import TableResource
10
+ from spiral.cli import state
11
+ from spiral.cli.types import ProjectArg
12
+ from spiral.tables import Table
13
+
14
+
15
+ def ask_table(project_id, title="Select a table"):
16
+ tables: list[TableResource] = list(state.spiral.project(project_id).tables.list_tables())
17
+
18
+ if not tables:
19
+ rich.print("[red]No tables found[/red]")
20
+ raise typer.Exit(1)
21
+
22
+ return questionary.select(
23
+ title,
24
+ choices=[
25
+ Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
26
+ for table in sorted(tables, key=lambda t: (t.dataset, t.table))
27
+ ],
28
+ ).ask()
29
+
30
+
31
+ def get_table(
32
+ project: ProjectArg,
33
+ table: Annotated[str | None, Option(help="Table name.")] = None,
34
+ dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
35
+ ) -> (str, Table):
36
+ if table is None:
37
+ identifier = ask_table(project)
38
+ else:
39
+ identifier = table
40
+ if dataset is not None:
41
+ identifier = f"{dataset}.{table}"
42
+ return identifier, state.spiral.project(project).tables.table(identifier)
@@ -1,15 +1,7 @@
1
1
  import pyarrow as pa
2
2
 
3
- class IndexStatus:
4
- status: str
5
- staleness_s: int | None
6
- # An extent of keys that are indexed.
7
- # key_extent: KeyExtent | None
8
-
9
3
  class TextIndex:
10
4
  id: str
11
5
 
12
- def status(self) -> IndexStatus: ...
13
-
14
6
  class SearchScan:
15
7
  def to_record_batches(self) -> pa.RecordBatchReader: ...
@@ -69,16 +69,16 @@ class TableScan:
69
69
  shuffle_buffer_size: int | None = None,
70
70
  shuffle_pool_num_rows: int | None = None,
71
71
  ) -> pa.RecordBatchReader: ...
72
- def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
73
- def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
72
+ def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
73
+ def key_space_state(self, table_id: str) -> KeySpaceState: ...
74
74
  def metrics(self) -> dict[str, Any]: ...
75
75
 
76
- class KeySpaceScan:
76
+ class KeySpaceState:
77
77
  manifest: FragmentManifest
78
78
 
79
79
  def key_schema(self) -> Schema: ...
80
80
 
81
- class ColumnGroupScan:
81
+ class ColumnGroupState:
82
82
  manifest: FragmentManifest
83
83
 
84
84
  def schema(self) -> Schema: ...
spiral/indexes/index.py CHANGED
@@ -1,4 +1,3 @@
1
- import datetime
2
1
  from typing import TYPE_CHECKING
3
2
 
4
3
  from spiral.core.index import TextIndex as CoreTextIndex
@@ -27,8 +26,3 @@ class TextIndex(Expr):
27
26
  @property
28
27
  def name(self) -> str:
29
28
  return self._name
30
-
31
- def status(self) -> (str, datetime.timedelta | None):
32
- """Fetch the status of the index. If status is ready, returns the staleness of the index."""
33
- status = self._index.status()
34
- return status.status, datetime.timedelta(seconds=status.staleness_s) if status.staleness_s is not None else None
spiral/tables/client.py CHANGED
@@ -25,6 +25,9 @@ class Tables:
25
25
  """
26
26
 
27
27
  def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
28
+ if project_id == "":
29
+ raise ValueError("Project ID cannot be an empty string.")
30
+
28
31
  self._api = api
29
32
  self._spiral = spiral
30
33
  self._project_id = project_id
@@ -10,14 +10,14 @@ def display_manifests(scan: TableScan):
10
10
  raise NotImplementedError("Multiple table scans are not supported.")
11
11
  table_id = scan.table_ids()[0]
12
12
 
13
- key_space_manifest: FragmentManifest = scan.key_space_scan(table_id).manifest
13
+ key_space_manifest: FragmentManifest = scan.key_space_state(table_id).manifest
14
14
  _table_of_fragments(
15
15
  key_space_manifest,
16
16
  title="Key Space manifest",
17
17
  )
18
18
 
19
19
  for column_group in scan.column_groups():
20
- column_group_manifest: FragmentManifest = scan.column_group_scan(column_group).manifest
20
+ column_group_manifest: FragmentManifest = scan.column_group_state(column_group).manifest
21
21
  _table_of_fragments(
22
22
  column_group_manifest,
23
23
  title=f"Column Group manifest for {str(column_group)}",
@@ -15,17 +15,17 @@ def show_scan(scan: TableScan):
15
15
  column_groups = scan.column_groups()
16
16
 
17
17
  splits = scan.split()
18
- key_space_scan = scan.key_space_scan(table_id)
18
+ key_space_state = scan.key_space_state(table_id)
19
19
 
20
20
  # Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
21
21
  key_points = set()
22
- key_space_manifest = key_space_scan.manifest
22
+ key_space_manifest = key_space_state.manifest
23
23
  for i in range(len(key_space_manifest)):
24
24
  fragment_file = key_space_manifest[i]
25
25
  key_points.add(fragment_file.key_extent.min)
26
26
  key_points.add(fragment_file.key_extent.max)
27
27
  for cg in column_groups:
28
- cg_scan = scan.column_group_scan(cg)
28
+ cg_scan = scan.column_group_state(cg)
29
29
  cg_manifest = cg_scan.manifest
30
30
  for i in range(len(cg_manifest)):
31
31
  fragment_file = cg_manifest[i]
@@ -39,7 +39,7 @@ def show_scan(scan: TableScan):
39
39
 
40
40
  show_manifest(key_space_manifest, scope="Key space", key_points=key_points, splits=splits)
41
41
  for cg in scan.column_groups():
42
- cg_scan = scan.column_group_scan(cg)
42
+ cg_scan = scan.column_group_state(cg)
43
43
  # Skip table id from the start of the column group.
44
44
  show_manifest(cg_scan.manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
45
45