pyspiral 0.4.3__cp310-abi3-macosx_11_0_arm64.whl → 0.5.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -13,22 +13,23 @@ Classifier: Programming Language :: Python :: 3.13
13
13
  Classifier: Programming Language :: Rust
14
14
  Classifier: License :: Other/Proprietary License
15
15
  Requires-Dist: betterproto==2.0.0b7
16
+ Requires-Dist: datasets>=4.0.0
16
17
  Requires-Dist: google-re2>=1.1.20240702
17
18
  Requires-Dist: grpclib>=0.4.7
18
19
  Requires-Dist: hishel>=0.0.30
19
20
  Requires-Dist: httpx>=0.27.0
21
+ Requires-Dist: nanoid>=2.0.0
20
22
  Requires-Dist: numpy>=2
21
23
  Requires-Dist: pyarrow>=21.0.0
22
24
  Requires-Dist: pydantic-settings>=2.3.4
23
25
  Requires-Dist: pydantic[email]>=2.5.3
24
26
  Requires-Dist: pyjwt[crypto]>=2.9.0
27
+ Requires-Dist: pyperclip>=1.9.0
25
28
  Requires-Dist: questionary>=2.0.1
29
+ Requires-Dist: sqlglot[rs]>=25.25.1
26
30
  Requires-Dist: tqdm>=4.66.5
27
31
  Requires-Dist: typer>=0.16
28
32
  Requires-Dist: xxhash>=3.4.1
29
- Requires-Dist: nanoid>=2.0.0
30
- Requires-Dist: sqlglot[rs]>=25.25.1
31
- Requires-Dist: pyperclip>=1.9.0
32
33
  Requires-Dist: polars>=1.31.0 ; extra == 'polars'
33
34
  Requires-Dist: duckdb>=1.3.2 ; extra == 'duckdb'
34
35
  Requires-Dist: pyiceberg>=0.9.1 ; extra == 'pyiceberg'
@@ -1,17 +1,19 @@
1
- pyspiral-0.4.3.dist-info/METADATA,sha256=vSc4ZGaqT4FiTed4rIQdo6sMQBXs690OKn-z6o1gfLQ,1610
2
- pyspiral-0.4.3.dist-info/WHEEL,sha256=Mdosfxua6Dx1zYgObRH97e3wyiELqBbLtoRJj4RUSQE,103
3
- pyspiral-0.4.3.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
1
+ pyspiral-0.5.0.dist-info/METADATA,sha256=J_bR2LwG0i6M8wB0-60AXJm8MutIfqs9QUbaINmBeTI,1641
2
+ pyspiral-0.5.0.dist-info/WHEEL,sha256=Mdosfxua6Dx1zYgObRH97e3wyiELqBbLtoRJj4RUSQE,103
3
+ pyspiral-0.5.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
4
  spiral/__init__.py,sha256=Jv1vbcnnmcTsBLN5mSNjnX3ae4C_mgojXDSBFaqIhN0,208
5
- spiral/_lib.abi3.so,sha256=NUSuOVRTgkgwtCLTg5IaYwewnboJsyVJEX4L95NeINE,59953376
6
- spiral/adbc.py,sha256=HcvR60uQeEK2oggSAK6y5VYtIrACIiCQ-85MEf18EZc,14199
7
- spiral/api/__init__.py,sha256=_7BS1RhqEFjnt3XwFWZNCHVEQeSKpezPevAiGCsvDbE,1776
5
+ spiral/_lib.abi3.so,sha256=4J6RSb9LxUWqUnRZeyccSneyjqEFfIrJ6lMCnsNUqbc,60398416
6
+ spiral/adbc.py,sha256=RIIWBew7zPoQa_h3I-A-nX9cUMDM3D3Je0mqE9aDX9k,14885
7
+ spiral/api/__init__.py,sha256=nzZK3r1K2GSaqaLoIba4WuyUyG7ApjLZ4fyJovT1unA,2000
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
9
- spiral/api/client.py,sha256=9-L6T8niQAXo90jRxllJD4hXXmcGfHj7CW9X3XTYa5Q,4551
10
- spiral/api/filesystems.py,sha256=5Ky_otnresGj7WdsR8Xi7DDM3lkB8UES6Lru_xWAGDM,4559
9
+ spiral/api/client.py,sha256=XQaOd1DgLTDOxW_uZUeTX5UCnUpPV1unjxbzWLlPJaM,4650
10
+ spiral/api/filesystems.py,sha256=EA4iqhTeaIlvObvEUxHmZl0pQ24IOxUVWM3GPhFLw8o,4969
11
11
  spiral/api/organizations.py,sha256=B-8zZ7lFJANGK7dUNbo_aU-cgI959JBP9VcWb6wdgi0,1895
12
- spiral/api/projects.py,sha256=JBGof9A2Ivasu2jrULMjHBwlna0M8WRrTNqU-Es4GJ8,5673
12
+ spiral/api/projects.py,sha256=mvyp4tnUhItNnPnyWsj0WrSf2ca3mggL7KjXs5Fllco,5671
13
13
  spiral/api/telemetry.py,sha256=tfdA3E_EWJwFVxkQfkm8tiYGRubnx2LuE5nbfsk1oG4,474
14
- spiral/api/types.py,sha256=zx-BRKsi1GHg9aL9gMUaVQWYYMXJcP0A8OQUc7jSIAc,653
14
+ spiral/api/text_indexes.py,sha256=f8AcrYaBKsaka5A4okuExl96A-2rBbsgru98l_xogwo,1826
15
+ spiral/api/types.py,sha256=lGdiKViRgIEJXD2ubwnyEIEwHkfRumlZjVEaHMV3Tm8,682
16
+ spiral/api/workers.py,sha256=0wZNUHMioDT53P1OBJfpjyDfIodHwwT6858z2IlRIM4,636
15
17
  spiral/api/workloads.py,sha256=XAyXV7vgZcoyyoPoGvOT4jTpyFKFMvrrAfhL6d1h1kE,1748
16
18
  spiral/arrow_.py,sha256=T1LZ7bh9aMDbXfpUsf0dR0E1roTQyAYSgZ2mL4s8J_4,7681
17
19
  spiral/cli/__init__.py,sha256=ooAFz_iCpVCKHE0TiVElIynbP2PtTgD9cUw46Vh1lcw,2145
@@ -23,34 +25,37 @@ spiral/cli/fs.py,sha256=dVPoAoAbuQ9yJlfI-JiFgS9VdnPmeBMygVHgehJRj34,4367
23
25
  spiral/cli/iceberg/__init__.py,sha256=IQV_gwCFSj6Ubxs58VM9Pal1ymgG2bxdDgOPuk9E5bs,214
24
26
  spiral/cli/iceberg/namespaces.py,sha256=x9pvHlcXtcATYYjqimHa6CtkyL3taQUJ--ni_Bfoemc,1510
25
27
  spiral/cli/iceberg/tables.py,sha256=nSR4-t54otJfCmubB6vXnbOkbqPVGV0sHBlc-t9cIVg,1930
26
- spiral/cli/indexes/__init__.py,sha256=-USfxCIdckzZKBNQ-DXqe3V5ttWVo_Fsa1Mfcx5hdIw,467
28
+ spiral/cli/indexes/__init__.py,sha256=yNMBZh3kAz1NXsridvbY4-4jNIJAgntBhDnu1EfaZTI,1291
29
+ spiral/cli/indexes/args.py,sha256=B4zAFnVZKPHq-_z8qnHYwgs1v4c-w_iypbA5MdXL67s,1139
30
+ spiral/cli/indexes/workers.py,sha256=mu-7-Asz1txNtCkoei6q-m-lE5YhLUjCDwstKJcew40,2169
27
31
  spiral/cli/login.py,sha256=InKMnpV8NATW5RPgB3ZL-DSVPzUuUByyK4Fx7pZEgfg,607
28
32
  spiral/cli/orgs.py,sha256=V-4ZTT3FwFQLcs1-BenC8uCgvWOJcxkZPSdCPfsexhc,2848
29
33
  spiral/cli/printer.py,sha256=W83KAE-7meoDD1yRltLQrZqrA2olGapBGy_2USWkY08,1778
30
34
  spiral/cli/projects.py,sha256=TKXu_VzkIUccwXzdlg-wQMkrB-Py33g052NrbuJx-D4,5096
31
35
  spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
32
- spiral/cli/tables/__init__.py,sha256=lkGLDeU28IVnuxJdlYSUh6QSB9fQ4_1MeZJL73iXcHo,3660
36
+ spiral/cli/tables/__init__.py,sha256=DTxviiflWZkUDmPhuGzXmhW2mNRgPNkJcaP7olFtXpc,2678
37
+ spiral/cli/tables/args.py,sha256=bxKQoJuWhCTHlDYz_WRQ6_Kp9XgRBH1UIMgUMzlxliA,1262
33
38
  spiral/cli/telemetry.py,sha256=ABDCyV5QJGOIJp4AxvK0LG5xNPIysP37K5haL38T7P4,586
34
39
  spiral/cli/types.py,sha256=YG1eHhRLaqlVU_18DQBuF_YMsabhMZLBY0V9CvbSxjY,1369
35
40
  spiral/cli/workloads.py,sha256=SbxgwiBlX1AuqpOLV3gs7DFkH-Tbeend7qJTwq0Je84,1994
36
41
  spiral/client.py,sha256=K-OuMOTgYxOA9vef5jSANjmPRBfGrzQ65fg6Fd-rHMY,2683
37
42
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
43
  spiral/core/client/__init__.pyi,sha256=Tn1OJmkO1rQUsPE9BtfEyxIjoife6s16qOd8XiyHi2c,3475
39
- spiral/core/index/__init__.pyi,sha256=NPOG1ztFO6siBGpmJU3boRzX26xfxw--2TiCydosGvo,314
40
- spiral/core/table/__init__.pyi,sha256=dwOaxcOl6ZIlxoLjOnC3CNUgGetWfnEV1Jx06aCH8M8,3265
44
+ spiral/core/index/__init__.pyi,sha256=MBq-jBuTmBreYMJ4AJFAe4e-ByRMM7JVssuEJMLVfQs,131
45
+ spiral/core/table/__init__.pyi,sha256=XhN9xpDdBnugRhtiP9ThC3DSADoLMDL9lxSjjY7fJ28,3296
41
46
  spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
42
47
  spiral/core/table/metastore/__init__.pyi,sha256=dMqySDnsjPUTBuFU2MaQGyocKEoGkWpeTQmUP2iIKbc,1880
43
48
  spiral/core/table/spec/__init__.pyi,sha256=D4GQp9RWwyLKTlRW7eDXcQE-xA5rF2iBcXZ8y7b48EE,5595
44
49
  spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
45
- spiral/expressions/__init__.py,sha256=hLh2qfHxM7hJg6GidRApqczwE80vatizkKN6YDUAAUA,6570
50
+ spiral/expressions/__init__.py,sha256=T8PIb0_UB9kynK0dpWbUD4No5lKRTG-wKnao8xOcXjY,6381
46
51
  spiral/expressions/base.py,sha256=q_W9XslcdFQtOIE_d1VkEmLickaXKOAoIcFeMoh-nqQ,4751
47
52
  spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
48
53
  spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
49
54
  spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
50
- spiral/expressions/mp4.py,sha256=R-fcVYRI6KaH1Nwpmqnsc1VYd9wA7Nuiy2UDcNxEzpw,2165
55
+ spiral/expressions/mp4.py,sha256=_xGVnkygddzxP9a8OACJ8_KXnejuVbYCVKBCXBQ798Y,2151
51
56
  spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
52
57
  spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
53
- spiral/expressions/refs.py,sha256=ISMtJtUL--BjHF6rsvgN3Um4QcvVqQE9URngOxjQrhw,2115
58
+ spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
54
59
  spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
55
60
  spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
56
61
  spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
@@ -61,7 +66,7 @@ spiral/iceberg/__init__.py,sha256=jSIlTxWauAbJV5gsWglZisFbnfNNzLYN90scoYcdWzc,65
61
66
  spiral/iceberg/client.py,sha256=E6FyE_h2HLgDW1cAFg1XgglJr6rbVOCWjRtRmqoMVkM,1003
62
67
  spiral/indexes/__init__.py,sha256=TXLQ-_3xso3lFIp2lM58_ip9OPNwPKFv1FdsWiUF-d8,178
63
68
  spiral/indexes/client.py,sha256=NsFBILEHMjyCUruFrUEKucRQRrN4OvqgbL4pmzWs07g,5600
64
- spiral/indexes/index.py,sha256=4CmSFlZYp46B2CjqtiyZ7VF5EH3duiutz3nWFnyApLA,973
69
+ spiral/indexes/index.py,sha256=TJB1-hSiPNrzJ_VYFTZGHvIQ_vmoILPK7tyi_9oS1nA,638
65
70
  spiral/indexes/scan.py,sha256=B2m-UgNuawNB90HXK33GTQfMy2WLdNNxiiB6cIjFW2Y,697
66
71
  spiral/project.py,sha256=0uJ1Jb88Ie-cCNnSdX3QfFtCUqrjLka4zCm_TxCpVak,1189
67
72
  spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -81,18 +86,18 @@ spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
81
86
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
87
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
83
88
  spiral/settings.py,sha256=PIQV2ljtB3pEOWoMRVSRzSGJNrXviO2JBgZ5ZY_Nq2E,2794
84
- spiral/substrait_.py,sha256=RNSmfbGFT_5uyo8AFtzS9A7IHW3DkacMTw2vKnj0Das,12762
89
+ spiral/substrait_.py,sha256=2BYvwFGcCwJ0JXNhXOLdPuhM1PqFyaeSqFpQCtv-M4E,12581
85
90
  spiral/tables/__init__.py,sha256=iiP7BkHA117em37_e75jtdvoZC10xCXtld18gRnPbTw,430
86
- spiral/tables/client.py,sha256=l_wJJRf3BPD5lg4Q1Ll2lAqQIuBCnKwC6JtsAui91Tc,4915
91
+ spiral/tables/client.py,sha256=MVgfeVF7P4kXnjOTQExp0VmywyuuRQ2IZVxdwVY3xgk,5015
87
92
  spiral/tables/dataset.py,sha256=DuHeKVCJfXLsbxmde9QW6yvesW5uhswG6qAxV5X0ZgA,7890
88
93
  spiral/tables/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- spiral/tables/debug/manifests.py,sha256=E_-DiMBg2EPL97cl9hLWhiqEsFtjEBgh_C7jZy8EWYc,2594
94
+ spiral/tables/debug/manifests.py,sha256=t7E0AchHrzOv9vAQpE77Qp3rLc5VTzRRxlByt5OWnUM,2596
90
95
  spiral/tables/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
91
- spiral/tables/debug/scan.py,sha256=-IWX_UjO4QP9Hj7PtZ1rLlbswJcryOin56GT-exqFm4,8942
96
+ spiral/tables/debug/scan.py,sha256=EEG2gzbBpUyrtk4jQXh-ENk8aySrKf0CZMym_wHuWm4,8947
92
97
  spiral/tables/maintenance.py,sha256=7Xa2Jdu_OY1Qu6iN1sPVdywVZtk_Mv3EaC3G93cmQvI,305
93
98
  spiral/tables/scan.py,sha256=3lPf5fSyF1fHGdGJ-pvu5HxPWoonf_XL7neWTqzB-0I,7582
94
99
  spiral/tables/snapshot.py,sha256=2NTuVEp2uJ1pV3Q5tLj7FOzPSc9axlfb6uOITwHnj0g,2229
95
- spiral/tables/table.py,sha256=4B2drwwfaoL6aIJ-5Ll-Bqza-EBeDIfMkuszSOZqSpk,5326
100
+ spiral/tables/table.py,sha256=VM93Rsm67sJFendI1_VhlkFORIdBGfhCMBUBK4dve9I,4910
96
101
  spiral/tables/transaction.py,sha256=3a64R-mf_cmR54BNn8U-05jmWonp6Ivxhe6u01Dyjzo,1573
97
102
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
98
- pyspiral-0.4.3.dist-info/RECORD,,
103
+ pyspiral-0.5.0.dist-info/RECORD,,
spiral/_lib.abi3.so CHANGED
Binary file
spiral/adbc.py CHANGED
@@ -35,6 +35,7 @@ from spiral.protogen._.arrow.flight.protocol.sql import (
35
35
  SqlInfo,
36
36
  SqlSupportedTransaction,
37
37
  )
38
+ from spiral.tables import Snapshot
38
39
 
39
40
  log = logging.getLogger(__name__)
40
41
  logging.getLogger("sqlx").setLevel(logging.WARNING)
@@ -64,7 +65,6 @@ def debuggable(func):
64
65
  return wrapper_decorator
65
66
 
66
67
 
67
- # TODO(marko): This should work for Iceberg tables.
68
68
  class ADBCServerBase:
69
69
  def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
70
70
  """Default implementation that reports no support for any complex features."""
@@ -143,6 +143,17 @@ class SpiralADBCServer(ADBCServerBase):
143
143
 
144
144
  self.pool = ThreadPoolExecutor()
145
145
 
146
+ def open_snapshot(self, tbl) -> Snapshot:
147
+ """Open a table in the Spiral project and return it as a PyArrow Dataset."""
148
+ if tbl.catalog is None or tbl.catalog == "":
149
+ raise FlightError("Project (Data Catalog) must be specified to open a table.")
150
+
151
+ project = tbl.catalog
152
+ dataset = tbl.db or "default"
153
+ table = tbl.name
154
+
155
+ return self.sp.project(project).tables.table(f"{dataset}.{table}").snapshot()
156
+
146
157
  def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
147
158
  schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
148
159
 
@@ -170,15 +181,16 @@ class SpiralADBCServer(ADBCServerBase):
170
181
  if req.catalog == "":
171
182
  # Empty string means databases _without_ a catalog, which we don't support
172
183
  return
184
+ catalog = req.catalog
173
185
 
174
186
  # Otherwise, catalog is either the project ID, or None.
175
- if req.catalog is None:
187
+ if catalog is None:
176
188
  projects = self.sp.list_projects()
177
189
  else:
178
190
  projects = [self.sp.project(req.catalog)]
179
191
 
180
192
  for project in projects:
181
- datasets = {dt.dataset for dt in project.tables.list_tables()}
193
+ datasets = {tbl.dataset for tbl in project.tables.list_tables()}
182
194
 
183
195
  batch = pa.RecordBatch.from_arrays(
184
196
  [
@@ -219,6 +231,7 @@ class SpiralADBCServer(ADBCServerBase):
219
231
  projects = list(self.sp.list_projects())
220
232
  else:
221
233
  projects = [self.sp.project(req.catalog)]
234
+ projects = sorted(projects, key=lambda p: p.id)
222
235
 
223
236
  def _process_project(project):
224
237
  tables: list[TableResource] = project.tables.list_tables()
@@ -248,12 +261,13 @@ class SpiralADBCServer(ADBCServerBase):
248
261
  def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
249
262
  # Extract the tables from the query, and bring them into the Python locals scope.
250
263
  expr = sqlglot.parse_one(req.query, dialect="duckdb")
264
+ datasets = {}
251
265
  for tbl in expr.find_all(exp.Table):
252
266
  # We swap the three-part identifier out for a single identifier
253
- # This lets us insert a PyArrow Dataset into Python locals such that
254
- # DuckDB will pick up on it for the query.
255
- name = exp.table_name(tbl)
256
- locals()[name] = self.sp.tables.table(f"{tbl.catalog}.{tbl.db}.{tbl.name}").snapshot().to_dataset()
267
+ # This lets us register a PyArrow Dataset with DuckDB for the query.
268
+ snapshot = self.open_snapshot(tbl)
269
+ name = snapshot.table.table_id
270
+ datasets[name] = snapshot.to_dataset()
257
271
  tbl.replace(exp.table_(table=name))
258
272
 
259
273
  try:
@@ -262,7 +276,11 @@ class SpiralADBCServer(ADBCServerBase):
262
276
  raise FlightError("DuckDB is required for SQL queries.")
263
277
 
264
278
  try:
265
- sql = duckdb.sql(expr.sql(dialect="duckdb"))
279
+ # Create a DuckDB connection and register the datasets
280
+ conn = duckdb.connect()
281
+ for name, dataset in datasets.items():
282
+ conn.register(name, dataset)
283
+ sql = conn.sql(expr.sql(dialect="duckdb"))
266
284
  except Exception as e:
267
285
  raise FlightError(str(e))
268
286
 
spiral/api/__init__.py CHANGED
@@ -3,6 +3,8 @@ from typing import TYPE_CHECKING
3
3
 
4
4
  import httpx
5
5
 
6
+ from spiral.api.text_indexes import TextIndexesService
7
+
6
8
  from .client import _Client
7
9
 
8
10
  if TYPE_CHECKING:
@@ -57,6 +59,12 @@ class SpiralAPI:
57
59
 
58
60
  return WorkloadService(self.client)
59
61
 
62
+ @property
63
+ def text_indexes(self) -> "TextIndexesService":
64
+ from .text_indexes import TextIndexesService
65
+
66
+ return TextIndexesService(self.client)
67
+
60
68
  @property
61
69
  def telemetry(self) -> "TelemetryService":
62
70
  from .telemetry import TelemetryService
spiral/api/client.py CHANGED
@@ -146,6 +146,10 @@ class _Client:
146
146
  # Enrich the exception with the response body
147
147
  raise SpiralHTTPError(body=resp.text, code=resp.status_code) from e
148
148
 
149
+ if response_cls == type[None]:
150
+ assert resp.text == ""
151
+ return None
152
+
149
153
  return TypeAdapter(response_cls).validate_python(resp.json())
150
154
 
151
155
  def paged(
spiral/api/filesystems.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from enum import Enum
2
+ from types import NoneType
2
3
  from typing import Annotated, Literal
3
4
 
4
5
  from pydantic import AfterValidator, BaseModel, Field
@@ -17,6 +18,7 @@ def _validate_directory_path(path: str) -> str:
17
18
 
18
19
  DirectoryPath = Annotated[str, AfterValidator(_validate_directory_path)]
19
20
  FilePath = str # Path or directory
21
+ FsLoc = str
20
22
 
21
23
 
22
24
  class BuiltinFileSystem(BaseModel):
@@ -120,6 +122,12 @@ class CreateMountResponse(BaseModel):
120
122
  mount: Mount
121
123
 
122
124
 
125
+ class GetMountAndFileSystemResponse(BaseModel):
126
+ mount: Mount
127
+ file_system: FileSystem
128
+ fs_loc: FsLoc
129
+
130
+
123
131
  class FileSystemService(ServiceBase):
124
132
  """Service for file system operations."""
125
133
 
@@ -148,6 +156,10 @@ class FileSystemService(ServiceBase):
148
156
  """Get a mount."""
149
157
  return self.client.get(f"/v1/mounts/{mount_id}", Mount)
150
158
 
159
+ def get_mount_and_file_system(self, mount_id: str) -> GetMountAndFileSystemResponse:
160
+ """Get the mount and its associated file system."""
161
+ return self.client.get(f"/v1/mounts/{mount_id}/with-filesystem", GetMountAndFileSystemResponse)
162
+
151
163
  def remove_mount(self, mount_id: str) -> None:
152
164
  """Remove mount."""
153
- return self.client.delete(f"/v1/mounts/{mount_id}", None)
165
+ return self.client.delete(f"/v1/mounts/{mount_id}", NoneType)
spiral/api/projects.py CHANGED
@@ -192,6 +192,6 @@ class ProjectService(ServiceBase):
192
192
  """Get a grant."""
193
193
  return self.client.get(f"/v1/grants/{grant_id}", Grant)
194
194
 
195
- def revoke_grant(self, grant_id: str) -> None:
195
+ def revoke_grant(self, grant_id: str):
196
196
  """Revoke a grant."""
197
- return self.client.delete(f"/v1/grants/{grant_id}", None)
197
+ return self.client.delete(f"/v1/grants/{grant_id}", type[None])
@@ -0,0 +1,56 @@
1
+ from pydantic import BaseModel
2
+
3
+ from .client import Paged, PagedResponse, ServiceBase
4
+ from .types import IndexId, ProjectId, WorkerId
5
+ from .workers import CPU, GcpRegion, Memory, ResourceClass
6
+
7
+
8
+ class TextSearchWorker(BaseModel):
9
+ worker_id: WorkerId
10
+ project_id: ProjectId
11
+ index_id: IndexId
12
+ url: str | None
13
+
14
+
15
+ class CreateWorkerRequest(BaseModel):
16
+ cpu: CPU
17
+ memory: Memory
18
+ region: GcpRegion
19
+
20
+
21
+ class CreateWorkerResponse(BaseModel):
22
+ worker_id: WorkerId
23
+
24
+
25
+ class SyncIndexRequest(BaseModel):
26
+ """Request to sync a text index."""
27
+
28
+ resources: ResourceClass
29
+
30
+
31
+ class SyncIndexResponse(BaseModel):
32
+ worker_id: WorkerId
33
+
34
+
35
+ class TextIndexesService(ServiceBase):
36
+ """Service for workload operations."""
37
+
38
+ def create_worker(self, index_id: IndexId, request: CreateWorkerRequest) -> CreateWorkerResponse:
39
+ """Create a new search worker."""
40
+ return self.client.post(f"/v1/text-indexes/{index_id}/workers", request, CreateWorkerResponse)
41
+
42
+ def list_workers(self, index_id: IndexId) -> Paged[WorkerId]:
43
+ """List text index workers for the given index."""
44
+ return self.client.paged(f"/v1/text-indexes/{index_id}/workers", PagedResponse[WorkerId])
45
+
46
+ def get_worker(self, worker_id: WorkerId) -> TextSearchWorker:
47
+ """Get a text index worker."""
48
+ return self.client.get(f"/v1/text-index-workers/{worker_id}", TextSearchWorker)
49
+
50
+ def shutdown_worker(self, worker_id: WorkerId) -> None:
51
+ """Shutdown a text index worker."""
52
+ return self.client.delete(f"/v1/text-index-workers/{worker_id}", type[None])
53
+
54
+ def sync_index(self, index_id: IndexId, request: SyncIndexRequest) -> SyncIndexResponse:
55
+ """Start a job to sync an index."""
56
+ return self.client.post(f"/v1/text-indexes/{index_id}/sync", request, SyncIndexResponse)
spiral/api/types.py CHANGED
@@ -13,6 +13,8 @@ UserId = str
13
13
  OrgId = str
14
14
  ProjectId = str
15
15
  RoleId = str
16
+ IndexId = str
17
+ WorkerId = str
16
18
 
17
19
  RootUri = Annotated[str, AfterValidator(_validate_root_uri)]
18
20
  DatasetName = Annotated[str, StringConstraints(max_length=128, pattern=r"^[a-zA-Z_][a-zA-Z0-9_-]+$")]
spiral/api/workers.py ADDED
@@ -0,0 +1,40 @@
1
+ from enum import Enum, IntEnum
2
+
3
+
4
+ class CPU(IntEnum):
5
+ ONE = 1
6
+ TWO = 2
7
+ FOUR = 4
8
+ EIGHT = 8
9
+
10
+ def __str__(self):
11
+ return str(self.value)
12
+
13
+
14
+ class Memory(str, Enum):
15
+ MB_512 = "512Mi"
16
+ GB_1 = "1Gi"
17
+ GB_2 = "2Gi"
18
+ GB_4 = "4Gi"
19
+ GB_8 = "8Gi"
20
+
21
+ def __str__(self):
22
+ return self.value
23
+
24
+
25
+ class GcpRegion(str, Enum):
26
+ US_EAST4 = "us-east4"
27
+ EUROPE_WEST4 = "europe-west4"
28
+
29
+ def __str__(self):
30
+ return self.value
31
+
32
+
33
+ class ResourceClass(str, Enum):
34
+ """Resource class for text index sync."""
35
+
36
+ SMALL = "small"
37
+ LARGE = "large"
38
+
39
+ def __str__(self):
40
+ return self.value
@@ -1,9 +1,18 @@
1
+ from typing import Annotated
2
+
1
3
  import rich
4
+ from typer import Option
2
5
 
6
+ from spiral.api.text_indexes import SyncIndexRequest
3
7
  from spiral.cli import AsyncTyper, state
8
+ from spiral.cli.indexes.args import get_text_index_id
4
9
  from spiral.cli.types import ProjectArg
5
10
 
11
+ from ...api.workers import ResourceClass
12
+ from . import workers
13
+
6
14
  app = AsyncTyper(short_help="Indexes.")
15
+ app.add_typer(workers.app, name="workers")
7
16
 
8
17
 
9
18
  @app.command(help="List indexes.")
@@ -17,3 +26,15 @@ def ls(
17
26
  for index in indexes:
18
27
  rich_table.add_row(index.id, index.name)
19
28
  rich.print(rich_table)
29
+
30
+
31
+ @app.command(help="Trigger a sync job for the index.")
32
+ def sync(
33
+ project: ProjectArg,
34
+ name: Annotated[str | None, Option(help="Index name.")] = None,
35
+ resources: Annotated[ResourceClass, Option(help="Resources to use for the sync job.")] = ResourceClass.SMALL,
36
+ ):
37
+ """Trigger a sync job for the index."""
38
+ index_id = get_text_index_id(project, name)
39
+ response = state.spiral.api.text_indexes.sync_index(index_id, SyncIndexRequest(resources=resources))
40
+ rich.print(f"Triggered sync job {response.worker_id} for index {index_id}.")
@@ -0,0 +1,39 @@
1
+ from typing import Annotated
2
+
3
+ import questionary
4
+ import rich
5
+ import typer
6
+ from questionary import Choice
7
+ from typer import Option
8
+
9
+ from spiral.api.projects import TextIndexResource
10
+ from spiral.api.types import IndexId
11
+ from spiral.cli import state
12
+ from spiral.cli.types import ProjectArg
13
+
14
+
15
+ def ask_index(project_id, title="Select an index"):
16
+ indexes: list[TextIndexResource] = list(state.spiral.project(project_id).indexes.list_indexes())
17
+
18
+ if not indexes:
19
+ rich.print("[red]No indexes found[/red]")
20
+ raise typer.Exit(1)
21
+
22
+ return questionary.select(
23
+ title,
24
+ choices=[Choice(title=index.name, value=index.id) for index in sorted(indexes, key=lambda t: (t.name, t.id))],
25
+ ).ask()
26
+
27
+
28
+ def get_text_index_id(
29
+ project: ProjectArg,
30
+ name: Annotated[str | None, Option(help="Index name.")] = None,
31
+ ) -> IndexId:
32
+ if name is None:
33
+ return ask_index(project)
34
+
35
+ indexes: list[TextIndexResource] = list(state.spiral.project(project).indexes.list_indexes())
36
+ for index in indexes:
37
+ if index.name == name:
38
+ return index.id
39
+ raise ValueError(f"Index not found: {name}")
@@ -0,0 +1,59 @@
1
+ from typing import Annotated
2
+
3
+ import rich
4
+ from typer import Option
5
+
6
+ from spiral.api.text_indexes import CreateWorkerRequest
7
+ from spiral.api.workers import CPU, GcpRegion, Memory
8
+ from spiral.cli import AsyncTyper, state
9
+ from spiral.cli.indexes.args import get_text_index_id
10
+ from spiral.cli.types import ProjectArg
11
+
12
+ app = AsyncTyper(short_help="Text Search Workers.")
13
+
14
+
15
+ @app.command(name="serve", help="Create a search worker.")
16
+ def serve(
17
+ project: ProjectArg,
18
+ index: Annotated[str | None, Option(help="Index name.")] = None,
19
+ region: Annotated[GcpRegion, Option(help="GCP region for the worker.")] = GcpRegion.US_EAST4,
20
+ cpu: Annotated[CPU, Option(help="CPU resources for the worker.")] = CPU.ONE,
21
+ memory: Annotated[Memory, Option(help="Memory resources for the worker in MB.")] = Memory.MB_512,
22
+ ):
23
+ """Create a new text search worker."""
24
+ index_id = get_text_index_id(project, index)
25
+ request = CreateWorkerRequest(cpu=cpu, memory=memory, region=region)
26
+ response = state.spiral.api.text_indexes.create_worker(index_id, request)
27
+ rich.print(f"Created worker {response.worker_id} for {index_id}.")
28
+
29
+
30
+ @app.command(name="shutdown", help="Shutdown a search worker.")
31
+ def shutdown(worker_id: str):
32
+ """Shutdown a worker."""
33
+ state.spiral.api.text_indexes.shutdown_worker(worker_id)
34
+ rich.print(f"Requested worker {worker_id} to shutdown.")
35
+
36
+
37
+ @app.command(name="ls", help="List search workers.")
38
+ def ls(
39
+ project: ProjectArg,
40
+ index: Annotated[str | None, Option(help="Index name.")] = None,
41
+ ):
42
+ """List text search workers."""
43
+ index_id = get_text_index_id(project, index)
44
+ worker_ids = state.spiral.api.text_indexes.list_workers(index_id)
45
+
46
+ rich_table = rich.table.Table("Worker ID", "URL", title=f"Text Search Workers for {index_id}")
47
+ for worker_id in worker_ids:
48
+ try:
49
+ worker = state.spiral.api.text_indexes.get_worker(worker_id)
50
+ rich_table.add_row(
51
+ worker_id,
52
+ worker.url,
53
+ )
54
+ except Exception:
55
+ rich_table.add_row(
56
+ worker_id,
57
+ "Unavailable",
58
+ )
59
+ rich.print(rich_table)
@@ -1,35 +1,16 @@
1
1
  from typing import Annotated
2
2
 
3
- import questionary
4
3
  import rich
5
- import typer
6
- from questionary import Choice
7
4
  from typer import Argument, Option
8
5
 
9
6
  from spiral import Spiral
10
- from spiral.cli import AsyncTyper, state
7
+ from spiral.cli import AsyncTyper
8
+ from spiral.cli.tables.args import get_table
11
9
  from spiral.cli.types import ProjectArg
12
- from spiral.tables import Table
13
10
 
14
11
  app = AsyncTyper(short_help="Spiral Tables.")
15
12
 
16
13
 
17
- def ask_table(project_id, title="Select a table"):
18
- tables = list(state.spiral.project(project_id).tables.list_tables())
19
-
20
- if not tables:
21
- rich.print("[red]No tables found[/red]")
22
- raise typer.Exit(1)
23
-
24
- return questionary.select(
25
- title,
26
- choices=[
27
- Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
28
- for table in tables
29
- ],
30
- ).ask()
31
-
32
-
33
14
  @app.command(help="List tables.")
34
15
  def ls(
35
16
  project: ProjectArg,
@@ -48,7 +29,7 @@ def key_schema(
48
29
  table: Annotated[str | None, Option(help="Table name.")] = None,
49
30
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
50
31
  ):
51
- _, table = _get_table(project, table, dataset)
32
+ _, table = get_table(project, table, dataset)
52
33
  rich.print(table.key_schema)
53
34
 
54
35
 
@@ -58,7 +39,7 @@ def schema(
58
39
  table: Annotated[str | None, Option(help="Table name.")] = None,
59
40
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
60
41
  ):
61
- _, table = _get_table(project, table, dataset)
42
+ _, table = get_table(project, table, dataset)
62
43
  rich.print(table.schema)
63
44
 
64
45
 
@@ -68,7 +49,7 @@ def flush(
68
49
  table: Annotated[str | None, Option(help="Table name.")] = None,
69
50
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
70
51
  ):
71
- identifier, table = _get_table(project, table, dataset)
52
+ identifier, table = get_table(project, table, dataset)
72
53
  table.maintenance().flush_wal()
73
54
  print(f"Flushed WAL for table {identifier} in project {project}.")
74
55
 
@@ -80,7 +61,7 @@ def debug(
80
61
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
81
62
  column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
82
63
  ):
83
- _, table = _get_table(project, table, dataset)
64
+ _, table = get_table(project, table, dataset)
84
65
  if column_group != ".":
85
66
  projection = table[column_group]
86
67
  else:
@@ -97,7 +78,7 @@ def manifests(
97
78
  dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
98
79
  column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
99
80
  ):
100
- _, table = _get_table(project, table, dataset)
81
+ _, table = get_table(project, table, dataset)
101
82
  if column_group != ".":
102
83
  projection = table[column_group]
103
84
  else:
@@ -105,17 +86,3 @@ def manifests(
105
86
  scan = projection.scan()
106
87
 
107
88
  scan._dump_manifests()
108
-
109
-
110
- def _get_table(
111
- project: ProjectArg,
112
- table: Annotated[str | None, Option(help="Table name.")] = None,
113
- dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
114
- ) -> (str, Table):
115
- if table is None:
116
- identifier = ask_table(project)
117
- else:
118
- identifier = table
119
- if dataset is not None:
120
- identifier = f"{dataset}.{table}"
121
- return identifier, state.spiral.project(project).tables.table(identifier)
@@ -0,0 +1,42 @@
1
+ from typing import Annotated
2
+
3
+ import questionary
4
+ import rich
5
+ import typer
6
+ from questionary import Choice
7
+ from typer import Option
8
+
9
+ from spiral.api.projects import TableResource
10
+ from spiral.cli import state
11
+ from spiral.cli.types import ProjectArg
12
+ from spiral.tables import Table
13
+
14
+
15
+ def ask_table(project_id, title="Select a table"):
16
+ tables: list[TableResource] = list(state.spiral.project(project_id).tables.list_tables())
17
+
18
+ if not tables:
19
+ rich.print("[red]No tables found[/red]")
20
+ raise typer.Exit(1)
21
+
22
+ return questionary.select(
23
+ title,
24
+ choices=[
25
+ Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
26
+ for table in sorted(tables, key=lambda t: (t.dataset, t.table))
27
+ ],
28
+ ).ask()
29
+
30
+
31
+ def get_table(
32
+ project: ProjectArg,
33
+ table: Annotated[str | None, Option(help="Table name.")] = None,
34
+ dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
35
+ ) -> (str, Table):
36
+ if table is None:
37
+ identifier = ask_table(project)
38
+ else:
39
+ identifier = table
40
+ if dataset is not None:
41
+ identifier = f"{dataset}.{table}"
42
+ return identifier, state.spiral.project(project).tables.table(identifier)
@@ -1,15 +1,7 @@
1
1
  import pyarrow as pa
2
2
 
3
- class IndexStatus:
4
- status: str
5
- staleness_s: int | None
6
- # An extent of keys that are indexed.
7
- # key_extent: KeyExtent | None
8
-
9
3
  class TextIndex:
10
4
  id: str
11
5
 
12
- def status(self) -> IndexStatus: ...
13
-
14
6
  class SearchScan:
15
7
  def to_record_batches(self) -> pa.RecordBatchReader: ...
@@ -36,6 +36,7 @@ class Table:
36
36
 
37
37
  id: str
38
38
  root_uri: str
39
+ mount_id: str | None
39
40
  key_schema: Schema
40
41
  metastore: PyMetastore
41
42
 
@@ -68,16 +69,16 @@ class TableScan:
68
69
  shuffle_buffer_size: int | None = None,
69
70
  shuffle_pool_num_rows: int | None = None,
70
71
  ) -> pa.RecordBatchReader: ...
71
- def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
72
- def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
72
+ def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
73
+ def key_space_state(self, table_id: str) -> KeySpaceState: ...
73
74
  def metrics(self) -> dict[str, Any]: ...
74
75
 
75
- class KeySpaceScan:
76
+ class KeySpaceState:
76
77
  manifest: FragmentManifest
77
78
 
78
79
  def key_schema(self) -> Schema: ...
79
80
 
80
- class ColumnGroupScan:
81
+ class ColumnGroupState:
81
82
  manifest: FragmentManifest
82
83
 
83
84
  def schema(self) -> Schema: ...
@@ -47,7 +47,7 @@ __all__ = [
47
47
  "not_",
48
48
  "or_",
49
49
  "pack",
50
- "keyed",
50
+ "aux",
51
51
  "ref",
52
52
  "refs",
53
53
  "scalar",
@@ -116,26 +116,17 @@ def lift(expr: ExprLike) -> Expr:
116
116
  return scalar(expr)
117
117
 
118
118
 
119
- def key(name: builtins.str) -> Expr:
120
- """Create a variable expression referencing a key column.
119
+ def aux(name: builtins.str, dtype: pa.DataType) -> Expr:
120
+ """Create a variable expression referencing a column in the auxiliary table.
121
121
 
122
- Args:
123
- name: variable name
124
- """
125
- return Expr(_lib.expr.keyed(name))
126
-
127
-
128
- def keyed(name: builtins.str, dtype: pa.DataType | None = None) -> Expr:
129
- """Create a variable expression referencing a column in the key table.
130
-
131
- Key table is optionally given to `Scan#to_record_batches` function when reading only specific keys
122
+ Auxiliary table is optionally given to `Scan#to_record_batches` function when reading only specific keys
132
123
  or doing cell pushdown.
133
124
 
134
125
  Args:
135
126
  name: variable name
136
- dtype: must match dtype of the column in the key table.
127
+ dtype: must match dtype of the column in the auxiliary table.
137
128
  """
138
- return Expr(_lib.expr.keyed(name, dtype))
129
+ return Expr(_lib.expr.aux(name, dtype))
139
130
 
140
131
 
141
132
  def scalar(value: Any) -> Expr:
spiral/expressions/mp4.py CHANGED
@@ -20,13 +20,13 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
20
20
 
21
21
  Args:
22
22
  expr: The referenced `Mp4` bytes.
23
- A str is assumed to be the `se.keyed` expression.
23
+ A str is assumed to be the `se.aux` expression.
24
24
  frames: The range of frames to read. Each element must be a list of two uint32,
25
25
  frame start and frame end, or null / empty list to read all frames.
26
- A str is assumed to be the `se.keyed` expression.
26
+ A str is assumed to be the `se.aux` expression.
27
27
  crop: The crop of the frames to read. Each element must be a list of four uint32,
28
28
  x, y, width, height or null / empty list to read full frames.
29
- A str is assumed to be the `se.keyed` expression.
29
+ A str is assumed to be the `se.aux` expression.
30
30
 
31
31
  Returns:
32
32
  An array where each element is a decoded cropped video with fields:
@@ -36,17 +36,17 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
36
36
  frames: Number of frames with type `pa.uint32()`.
37
37
  """
38
38
  from spiral import _lib
39
- from spiral.expressions import keyed, lift
39
+ from spiral.expressions import aux, lift
40
40
 
41
41
  if isinstance(expr, str):
42
- expr = keyed(
42
+ expr = aux(
43
43
  expr,
44
44
  pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
45
45
  )
46
46
  if isinstance(frames, str):
47
- frames = keyed(frames, pa.list_(pa.uint32()))
47
+ frames = aux(frames, pa.list_(pa.uint32()))
48
48
  if isinstance(crop, str):
49
- crop = keyed(crop, pa.list_(pa.uint32()))
49
+ crop = aux(crop, pa.list_(pa.uint32()))
50
50
 
51
51
  expr = lift(expr)
52
52
  frames = lift(frames)
@@ -29,15 +29,15 @@ def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
29
29
  column back into their original form, e.g. binary.
30
30
 
31
31
  Args:
32
- expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
32
+ expr: The expression to de-reference. A str is assumed to be the `se.aux` expression.
33
33
  field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
34
34
  If `None`, the expr must evaluate into a reference type.
35
35
  """
36
36
  from spiral import _lib
37
- from spiral.expressions import keyed, lift
37
+ from spiral.expressions import aux, lift
38
38
 
39
39
  if isinstance(expr, str):
40
- expr = keyed(
40
+ expr = aux(
41
41
  expr,
42
42
  pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
43
43
  )
spiral/indexes/index.py CHANGED
@@ -1,4 +1,3 @@
1
- import datetime
2
1
  from typing import TYPE_CHECKING
3
2
 
4
3
  from spiral.core.index import TextIndex as CoreTextIndex
@@ -27,8 +26,3 @@ class TextIndex(Expr):
27
26
  @property
28
27
  def name(self) -> str:
29
28
  return self._name
30
-
31
- def status(self) -> (str, datetime.timedelta | None):
32
- """Fetch the status of the index. If status is ready, returns the staleness of the index."""
33
- status = self._index.status()
34
- return status.status, datetime.timedelta(seconds=status.staleness_s) if status.staleness_s is not None else None
spiral/substrait_.py CHANGED
@@ -248,11 +248,6 @@ class SubstraitConverter:
248
248
  case "struct_field", ref:
249
249
  ref: ExpressionReferenceSegmentStructField
250
250
  field_name = scope_type.field(ref.field).name
251
-
252
- if field_name in self.key_names:
253
- # This is a key column, so we need to select it from the scope.
254
- return se.key(field_name)
255
-
256
251
  scope = se.getitem(scope, field_name)
257
252
  scope_type = scope_type.field(ref.field).type
258
253
  if ref.is_set("child"):
spiral/tables/client.py CHANGED
@@ -25,6 +25,9 @@ class Tables:
25
25
  """
26
26
 
27
27
  def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
28
+ if project_id == "":
29
+ raise ValueError("Project ID cannot be an empty string.")
30
+
28
31
  self._api = api
29
32
  self._spiral = spiral
30
33
  self._project_id = project_id
@@ -10,14 +10,14 @@ def display_manifests(scan: TableScan):
10
10
  raise NotImplementedError("Multiple table scans are not supported.")
11
11
  table_id = scan.table_ids()[0]
12
12
 
13
- key_space_manifest: FragmentManifest = scan.key_space_scan(table_id).manifest
13
+ key_space_manifest: FragmentManifest = scan.key_space_state(table_id).manifest
14
14
  _table_of_fragments(
15
15
  key_space_manifest,
16
16
  title="Key Space manifest",
17
17
  )
18
18
 
19
19
  for column_group in scan.column_groups():
20
- column_group_manifest: FragmentManifest = scan.column_group_scan(column_group).manifest
20
+ column_group_manifest: FragmentManifest = scan.column_group_state(column_group).manifest
21
21
  _table_of_fragments(
22
22
  column_group_manifest,
23
23
  title=f"Column Group manifest for {str(column_group)}",
@@ -15,17 +15,17 @@ def show_scan(scan: TableScan):
15
15
  column_groups = scan.column_groups()
16
16
 
17
17
  splits = scan.split()
18
- key_space_scan = scan.key_space_scan(table_id)
18
+ key_space_state = scan.key_space_state(table_id)
19
19
 
20
20
  # Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
21
21
  key_points = set()
22
- key_space_manifest = key_space_scan.manifest
22
+ key_space_manifest = key_space_state.manifest
23
23
  for i in range(len(key_space_manifest)):
24
24
  fragment_file = key_space_manifest[i]
25
25
  key_points.add(fragment_file.key_extent.min)
26
26
  key_points.add(fragment_file.key_extent.max)
27
27
  for cg in column_groups:
28
- cg_scan = scan.column_group_scan(cg)
28
+ cg_scan = scan.column_group_state(cg)
29
29
  cg_manifest = cg_scan.manifest
30
30
  for i in range(len(cg_manifest)):
31
31
  fragment_file = cg_manifest[i]
@@ -39,7 +39,7 @@ def show_scan(scan: TableScan):
39
39
 
40
40
  show_manifest(key_space_manifest, scope="Key space", key_points=key_points, splits=splits)
41
41
  for cg in scan.column_groups():
42
- cg_scan = scan.column_group_scan(cg)
42
+ cg_scan = scan.column_group_state(cg)
43
43
  # Skip table id from the start of the column group.
44
44
  show_manifest(cg_scan.manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
45
45
 
spiral/tables/table.py CHANGED
@@ -71,11 +71,6 @@ class Table(Expr):
71
71
  return f'Table("{self.identifier}")'
72
72
 
73
73
  def __getitem__(self, item: str) -> Expr:
74
- from spiral import expressions as se
75
-
76
- if item in self._key_columns:
77
- return se.key(name=item)
78
-
79
74
  return super().__getitem__(item)
80
75
 
81
76
  def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
@@ -86,14 +81,7 @@ class Table(Expr):
86
81
  "Cannot use 'exclude' arg with key columns. Use 'exclude_keys' and an explicit select of keys."
87
82
  )
88
83
 
89
- key_paths = set(paths) & self._key_columns
90
- other_paths = set(paths) - key_paths
91
- if not key_paths:
92
- return super().select(*paths, exclude=exclude)
93
-
94
- from spiral import expressions as se
95
-
96
- return se.merge(se.pack({key: se.key(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
84
+ return super().select(*paths, exclude=exclude)
97
85
 
98
86
  @property
99
87
  def key_schema(self) -> Schema: