pyspiral 0.4.3__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.5.0__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.4.3.dist-info → pyspiral-0.5.0.dist-info}/METADATA +5 -4
- {pyspiral-0.4.3.dist-info → pyspiral-0.5.0.dist-info}/RECORD +29 -24
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +26 -8
- spiral/api/__init__.py +8 -0
- spiral/api/client.py +4 -0
- spiral/api/filesystems.py +13 -1
- spiral/api/projects.py +2 -2
- spiral/api/text_indexes.py +56 -0
- spiral/api/types.py +2 -0
- spiral/api/workers.py +40 -0
- spiral/cli/indexes/__init__.py +21 -0
- spiral/cli/indexes/args.py +39 -0
- spiral/cli/indexes/workers.py +59 -0
- spiral/cli/tables/__init__.py +7 -40
- spiral/cli/tables/args.py +42 -0
- spiral/core/index/__init__.pyi +0 -8
- spiral/core/table/__init__.pyi +5 -4
- spiral/expressions/__init__.py +6 -15
- spiral/expressions/mp4.py +7 -7
- spiral/expressions/refs.py +3 -3
- spiral/indexes/index.py +0 -6
- spiral/substrait_.py +0 -5
- spiral/tables/client.py +3 -0
- spiral/tables/debug/manifests.py +2 -2
- spiral/tables/debug/scan.py +4 -4
- spiral/tables/table.py +1 -13
- {pyspiral-0.4.3.dist-info → pyspiral-0.5.0.dist-info}/WHEEL +0 -0
- {pyspiral-0.4.3.dist-info → pyspiral-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pyspiral
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Classifier: Intended Audience :: Science/Research
|
5
5
|
Classifier: Operating System :: OS Independent
|
6
6
|
Classifier: Programming Language :: Python
|
@@ -13,22 +13,23 @@ Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Classifier: Programming Language :: Rust
|
14
14
|
Classifier: License :: Other/Proprietary License
|
15
15
|
Requires-Dist: betterproto==2.0.0b7
|
16
|
+
Requires-Dist: datasets>=4.0.0
|
16
17
|
Requires-Dist: google-re2>=1.1.20240702
|
17
18
|
Requires-Dist: grpclib>=0.4.7
|
18
19
|
Requires-Dist: hishel>=0.0.30
|
19
20
|
Requires-Dist: httpx>=0.27.0
|
21
|
+
Requires-Dist: nanoid>=2.0.0
|
20
22
|
Requires-Dist: numpy>=2
|
21
23
|
Requires-Dist: pyarrow>=21.0.0
|
22
24
|
Requires-Dist: pydantic-settings>=2.3.4
|
23
25
|
Requires-Dist: pydantic[email]>=2.5.3
|
24
26
|
Requires-Dist: pyjwt[crypto]>=2.9.0
|
27
|
+
Requires-Dist: pyperclip>=1.9.0
|
25
28
|
Requires-Dist: questionary>=2.0.1
|
29
|
+
Requires-Dist: sqlglot[rs]>=25.25.1
|
26
30
|
Requires-Dist: tqdm>=4.66.5
|
27
31
|
Requires-Dist: typer>=0.16
|
28
32
|
Requires-Dist: xxhash>=3.4.1
|
29
|
-
Requires-Dist: nanoid>=2.0.0
|
30
|
-
Requires-Dist: sqlglot[rs]>=25.25.1
|
31
|
-
Requires-Dist: pyperclip>=1.9.0
|
32
33
|
Requires-Dist: polars>=1.31.0 ; extra == 'polars'
|
33
34
|
Requires-Dist: duckdb>=1.3.2 ; extra == 'duckdb'
|
34
35
|
Requires-Dist: pyiceberg>=0.9.1 ; extra == 'pyiceberg'
|
@@ -1,17 +1,19 @@
|
|
1
|
-
pyspiral-0.
|
2
|
-
pyspiral-0.
|
3
|
-
pyspiral-0.
|
1
|
+
pyspiral-0.5.0.dist-info/METADATA,sha256=J_bR2LwG0i6M8wB0-60AXJm8MutIfqs9QUbaINmBeTI,1641
|
2
|
+
pyspiral-0.5.0.dist-info/WHEEL,sha256=PjJMzJpvi5UGP0cjK5Ftx4x5YgThyrYgwpOwFQxcwHw,130
|
3
|
+
pyspiral-0.5.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
4
4
|
spiral/__init__.py,sha256=Jv1vbcnnmcTsBLN5mSNjnX3ae4C_mgojXDSBFaqIhN0,208
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
6
|
-
spiral/adbc.py,sha256=
|
7
|
-
spiral/api/__init__.py,sha256=
|
5
|
+
spiral/_lib.abi3.so,sha256=a9iyut5zpuRwxDDgYMLtJVJDzOfLBHbFqU7RcW6v3-E,52647448
|
6
|
+
spiral/adbc.py,sha256=RIIWBew7zPoQa_h3I-A-nX9cUMDM3D3Je0mqE9aDX9k,14885
|
7
|
+
spiral/api/__init__.py,sha256=nzZK3r1K2GSaqaLoIba4WuyUyG7ApjLZ4fyJovT1unA,2000
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
9
|
-
spiral/api/client.py,sha256=
|
10
|
-
spiral/api/filesystems.py,sha256=
|
9
|
+
spiral/api/client.py,sha256=XQaOd1DgLTDOxW_uZUeTX5UCnUpPV1unjxbzWLlPJaM,4650
|
10
|
+
spiral/api/filesystems.py,sha256=EA4iqhTeaIlvObvEUxHmZl0pQ24IOxUVWM3GPhFLw8o,4969
|
11
11
|
spiral/api/organizations.py,sha256=B-8zZ7lFJANGK7dUNbo_aU-cgI959JBP9VcWb6wdgi0,1895
|
12
|
-
spiral/api/projects.py,sha256=
|
12
|
+
spiral/api/projects.py,sha256=mvyp4tnUhItNnPnyWsj0WrSf2ca3mggL7KjXs5Fllco,5671
|
13
13
|
spiral/api/telemetry.py,sha256=tfdA3E_EWJwFVxkQfkm8tiYGRubnx2LuE5nbfsk1oG4,474
|
14
|
-
spiral/api/
|
14
|
+
spiral/api/text_indexes.py,sha256=f8AcrYaBKsaka5A4okuExl96A-2rBbsgru98l_xogwo,1826
|
15
|
+
spiral/api/types.py,sha256=lGdiKViRgIEJXD2ubwnyEIEwHkfRumlZjVEaHMV3Tm8,682
|
16
|
+
spiral/api/workers.py,sha256=0wZNUHMioDT53P1OBJfpjyDfIodHwwT6858z2IlRIM4,636
|
15
17
|
spiral/api/workloads.py,sha256=XAyXV7vgZcoyyoPoGvOT4jTpyFKFMvrrAfhL6d1h1kE,1748
|
16
18
|
spiral/arrow_.py,sha256=T1LZ7bh9aMDbXfpUsf0dR0E1roTQyAYSgZ2mL4s8J_4,7681
|
17
19
|
spiral/cli/__init__.py,sha256=ooAFz_iCpVCKHE0TiVElIynbP2PtTgD9cUw46Vh1lcw,2145
|
@@ -23,34 +25,37 @@ spiral/cli/fs.py,sha256=dVPoAoAbuQ9yJlfI-JiFgS9VdnPmeBMygVHgehJRj34,4367
|
|
23
25
|
spiral/cli/iceberg/__init__.py,sha256=IQV_gwCFSj6Ubxs58VM9Pal1ymgG2bxdDgOPuk9E5bs,214
|
24
26
|
spiral/cli/iceberg/namespaces.py,sha256=x9pvHlcXtcATYYjqimHa6CtkyL3taQUJ--ni_Bfoemc,1510
|
25
27
|
spiral/cli/iceberg/tables.py,sha256=nSR4-t54otJfCmubB6vXnbOkbqPVGV0sHBlc-t9cIVg,1930
|
26
|
-
spiral/cli/indexes/__init__.py,sha256
|
28
|
+
spiral/cli/indexes/__init__.py,sha256=yNMBZh3kAz1NXsridvbY4-4jNIJAgntBhDnu1EfaZTI,1291
|
29
|
+
spiral/cli/indexes/args.py,sha256=B4zAFnVZKPHq-_z8qnHYwgs1v4c-w_iypbA5MdXL67s,1139
|
30
|
+
spiral/cli/indexes/workers.py,sha256=mu-7-Asz1txNtCkoei6q-m-lE5YhLUjCDwstKJcew40,2169
|
27
31
|
spiral/cli/login.py,sha256=InKMnpV8NATW5RPgB3ZL-DSVPzUuUByyK4Fx7pZEgfg,607
|
28
32
|
spiral/cli/orgs.py,sha256=V-4ZTT3FwFQLcs1-BenC8uCgvWOJcxkZPSdCPfsexhc,2848
|
29
33
|
spiral/cli/printer.py,sha256=W83KAE-7meoDD1yRltLQrZqrA2olGapBGy_2USWkY08,1778
|
30
34
|
spiral/cli/projects.py,sha256=TKXu_VzkIUccwXzdlg-wQMkrB-Py33g052NrbuJx-D4,5096
|
31
35
|
spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
|
32
|
-
spiral/cli/tables/__init__.py,sha256=
|
36
|
+
spiral/cli/tables/__init__.py,sha256=DTxviiflWZkUDmPhuGzXmhW2mNRgPNkJcaP7olFtXpc,2678
|
37
|
+
spiral/cli/tables/args.py,sha256=bxKQoJuWhCTHlDYz_WRQ6_Kp9XgRBH1UIMgUMzlxliA,1262
|
33
38
|
spiral/cli/telemetry.py,sha256=ABDCyV5QJGOIJp4AxvK0LG5xNPIysP37K5haL38T7P4,586
|
34
39
|
spiral/cli/types.py,sha256=YG1eHhRLaqlVU_18DQBuF_YMsabhMZLBY0V9CvbSxjY,1369
|
35
40
|
spiral/cli/workloads.py,sha256=SbxgwiBlX1AuqpOLV3gs7DFkH-Tbeend7qJTwq0Je84,1994
|
36
41
|
spiral/client.py,sha256=K-OuMOTgYxOA9vef5jSANjmPRBfGrzQ65fg6Fd-rHMY,2683
|
37
42
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
43
|
spiral/core/client/__init__.pyi,sha256=Tn1OJmkO1rQUsPE9BtfEyxIjoife6s16qOd8XiyHi2c,3475
|
39
|
-
spiral/core/index/__init__.pyi,sha256=
|
40
|
-
spiral/core/table/__init__.pyi,sha256=
|
44
|
+
spiral/core/index/__init__.pyi,sha256=MBq-jBuTmBreYMJ4AJFAe4e-ByRMM7JVssuEJMLVfQs,131
|
45
|
+
spiral/core/table/__init__.pyi,sha256=XhN9xpDdBnugRhtiP9ThC3DSADoLMDL9lxSjjY7fJ28,3296
|
41
46
|
spiral/core/table/manifests/__init__.pyi,sha256=3V59-K1qr1z2dGfgRKXaHSVheK8NNw8Q8PFhfbeQd_4,1065
|
42
47
|
spiral/core/table/metastore/__init__.pyi,sha256=dMqySDnsjPUTBuFU2MaQGyocKEoGkWpeTQmUP2iIKbc,1880
|
43
48
|
spiral/core/table/spec/__init__.pyi,sha256=D4GQp9RWwyLKTlRW7eDXcQE-xA5rF2iBcXZ8y7b48EE,5595
|
44
49
|
spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
|
45
|
-
spiral/expressions/__init__.py,sha256=
|
50
|
+
spiral/expressions/__init__.py,sha256=T8PIb0_UB9kynK0dpWbUD4No5lKRTG-wKnao8xOcXjY,6381
|
46
51
|
spiral/expressions/base.py,sha256=q_W9XslcdFQtOIE_d1VkEmLickaXKOAoIcFeMoh-nqQ,4751
|
47
52
|
spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
|
48
53
|
spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
|
49
54
|
spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
|
50
|
-
spiral/expressions/mp4.py,sha256=
|
55
|
+
spiral/expressions/mp4.py,sha256=_xGVnkygddzxP9a8OACJ8_KXnejuVbYCVKBCXBQ798Y,2151
|
51
56
|
spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
|
52
57
|
spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
|
53
|
-
spiral/expressions/refs.py,sha256=
|
58
|
+
spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
|
54
59
|
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
55
60
|
spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
|
56
61
|
spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
|
@@ -61,7 +66,7 @@ spiral/iceberg/__init__.py,sha256=jSIlTxWauAbJV5gsWglZisFbnfNNzLYN90scoYcdWzc,65
|
|
61
66
|
spiral/iceberg/client.py,sha256=E6FyE_h2HLgDW1cAFg1XgglJr6rbVOCWjRtRmqoMVkM,1003
|
62
67
|
spiral/indexes/__init__.py,sha256=TXLQ-_3xso3lFIp2lM58_ip9OPNwPKFv1FdsWiUF-d8,178
|
63
68
|
spiral/indexes/client.py,sha256=NsFBILEHMjyCUruFrUEKucRQRrN4OvqgbL4pmzWs07g,5600
|
64
|
-
spiral/indexes/index.py,sha256=
|
69
|
+
spiral/indexes/index.py,sha256=TJB1-hSiPNrzJ_VYFTZGHvIQ_vmoILPK7tyi_9oS1nA,638
|
65
70
|
spiral/indexes/scan.py,sha256=B2m-UgNuawNB90HXK33GTQfMy2WLdNNxiiB6cIjFW2Y,697
|
66
71
|
spiral/project.py,sha256=0uJ1Jb88Ie-cCNnSdX3QfFtCUqrjLka4zCm_TxCpVak,1189
|
67
72
|
spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -81,18 +86,18 @@ spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
|
81
86
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
82
87
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
83
88
|
spiral/settings.py,sha256=PIQV2ljtB3pEOWoMRVSRzSGJNrXviO2JBgZ5ZY_Nq2E,2794
|
84
|
-
spiral/substrait_.py,sha256=
|
89
|
+
spiral/substrait_.py,sha256=2BYvwFGcCwJ0JXNhXOLdPuhM1PqFyaeSqFpQCtv-M4E,12581
|
85
90
|
spiral/tables/__init__.py,sha256=iiP7BkHA117em37_e75jtdvoZC10xCXtld18gRnPbTw,430
|
86
|
-
spiral/tables/client.py,sha256=
|
91
|
+
spiral/tables/client.py,sha256=MVgfeVF7P4kXnjOTQExp0VmywyuuRQ2IZVxdwVY3xgk,5015
|
87
92
|
spiral/tables/dataset.py,sha256=DuHeKVCJfXLsbxmde9QW6yvesW5uhswG6qAxV5X0ZgA,7890
|
88
93
|
spiral/tables/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
|
-
spiral/tables/debug/manifests.py,sha256=
|
94
|
+
spiral/tables/debug/manifests.py,sha256=t7E0AchHrzOv9vAQpE77Qp3rLc5VTzRRxlByt5OWnUM,2596
|
90
95
|
spiral/tables/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
91
|
-
spiral/tables/debug/scan.py,sha256
|
96
|
+
spiral/tables/debug/scan.py,sha256=EEG2gzbBpUyrtk4jQXh-ENk8aySrKf0CZMym_wHuWm4,8947
|
92
97
|
spiral/tables/maintenance.py,sha256=7Xa2Jdu_OY1Qu6iN1sPVdywVZtk_Mv3EaC3G93cmQvI,305
|
93
98
|
spiral/tables/scan.py,sha256=3lPf5fSyF1fHGdGJ-pvu5HxPWoonf_XL7neWTqzB-0I,7582
|
94
99
|
spiral/tables/snapshot.py,sha256=2NTuVEp2uJ1pV3Q5tLj7FOzPSc9axlfb6uOITwHnj0g,2229
|
95
|
-
spiral/tables/table.py,sha256=
|
100
|
+
spiral/tables/table.py,sha256=VM93Rsm67sJFendI1_VhlkFORIdBGfhCMBUBK4dve9I,4910
|
96
101
|
spiral/tables/transaction.py,sha256=3a64R-mf_cmR54BNn8U-05jmWonp6Ivxhe6u01Dyjzo,1573
|
97
102
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
98
|
-
pyspiral-0.
|
103
|
+
pyspiral-0.5.0.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
Binary file
|
spiral/adbc.py
CHANGED
@@ -35,6 +35,7 @@ from spiral.protogen._.arrow.flight.protocol.sql import (
|
|
35
35
|
SqlInfo,
|
36
36
|
SqlSupportedTransaction,
|
37
37
|
)
|
38
|
+
from spiral.tables import Snapshot
|
38
39
|
|
39
40
|
log = logging.getLogger(__name__)
|
40
41
|
logging.getLogger("sqlx").setLevel(logging.WARNING)
|
@@ -64,7 +65,6 @@ def debuggable(func):
|
|
64
65
|
return wrapper_decorator
|
65
66
|
|
66
67
|
|
67
|
-
# TODO(marko): This should work for Iceberg tables.
|
68
68
|
class ADBCServerBase:
|
69
69
|
def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
|
70
70
|
"""Default implementation that reports no support for any complex features."""
|
@@ -143,6 +143,17 @@ class SpiralADBCServer(ADBCServerBase):
|
|
143
143
|
|
144
144
|
self.pool = ThreadPoolExecutor()
|
145
145
|
|
146
|
+
def open_snapshot(self, tbl) -> Snapshot:
|
147
|
+
"""Open a table in the Spiral project and return it as a PyArrow Dataset."""
|
148
|
+
if tbl.catalog is None or tbl.catalog == "":
|
149
|
+
raise FlightError("Project (Data Catalog) must be specified to open a table.")
|
150
|
+
|
151
|
+
project = tbl.catalog
|
152
|
+
dataset = tbl.db or "default"
|
153
|
+
table = tbl.name
|
154
|
+
|
155
|
+
return self.sp.project(project).tables.table(f"{dataset}.{table}").snapshot()
|
156
|
+
|
146
157
|
def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
|
147
158
|
schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
|
148
159
|
|
@@ -170,15 +181,16 @@ class SpiralADBCServer(ADBCServerBase):
|
|
170
181
|
if req.catalog == "":
|
171
182
|
# Empty string means databases _without_ a catalog, which we don't support
|
172
183
|
return
|
184
|
+
catalog = req.catalog
|
173
185
|
|
174
186
|
# Otherwise, catalog is either the project ID, or None.
|
175
|
-
if
|
187
|
+
if catalog is None:
|
176
188
|
projects = self.sp.list_projects()
|
177
189
|
else:
|
178
190
|
projects = [self.sp.project(req.catalog)]
|
179
191
|
|
180
192
|
for project in projects:
|
181
|
-
datasets = {
|
193
|
+
datasets = {tbl.dataset for tbl in project.tables.list_tables()}
|
182
194
|
|
183
195
|
batch = pa.RecordBatch.from_arrays(
|
184
196
|
[
|
@@ -219,6 +231,7 @@ class SpiralADBCServer(ADBCServerBase):
|
|
219
231
|
projects = list(self.sp.list_projects())
|
220
232
|
else:
|
221
233
|
projects = [self.sp.project(req.catalog)]
|
234
|
+
projects = sorted(projects, key=lambda p: p.id)
|
222
235
|
|
223
236
|
def _process_project(project):
|
224
237
|
tables: list[TableResource] = project.tables.list_tables()
|
@@ -248,12 +261,13 @@ class SpiralADBCServer(ADBCServerBase):
|
|
248
261
|
def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
|
249
262
|
# Extract the tables from the query, and bring them into the Python locals scope.
|
250
263
|
expr = sqlglot.parse_one(req.query, dialect="duckdb")
|
264
|
+
datasets = {}
|
251
265
|
for tbl in expr.find_all(exp.Table):
|
252
266
|
# We swap the three-part identifier out for a single identifier
|
253
|
-
# This lets us
|
254
|
-
|
255
|
-
name =
|
256
|
-
|
267
|
+
# This lets us register a PyArrow Dataset with DuckDB for the query.
|
268
|
+
snapshot = self.open_snapshot(tbl)
|
269
|
+
name = snapshot.table.table_id
|
270
|
+
datasets[name] = snapshot.to_dataset()
|
257
271
|
tbl.replace(exp.table_(table=name))
|
258
272
|
|
259
273
|
try:
|
@@ -262,7 +276,11 @@ class SpiralADBCServer(ADBCServerBase):
|
|
262
276
|
raise FlightError("DuckDB is required for SQL queries.")
|
263
277
|
|
264
278
|
try:
|
265
|
-
|
279
|
+
# Create a DuckDB connection and register the datasets
|
280
|
+
conn = duckdb.connect()
|
281
|
+
for name, dataset in datasets.items():
|
282
|
+
conn.register(name, dataset)
|
283
|
+
sql = conn.sql(expr.sql(dialect="duckdb"))
|
266
284
|
except Exception as e:
|
267
285
|
raise FlightError(str(e))
|
268
286
|
|
spiral/api/__init__.py
CHANGED
@@ -3,6 +3,8 @@ from typing import TYPE_CHECKING
|
|
3
3
|
|
4
4
|
import httpx
|
5
5
|
|
6
|
+
from spiral.api.text_indexes import TextIndexesService
|
7
|
+
|
6
8
|
from .client import _Client
|
7
9
|
|
8
10
|
if TYPE_CHECKING:
|
@@ -57,6 +59,12 @@ class SpiralAPI:
|
|
57
59
|
|
58
60
|
return WorkloadService(self.client)
|
59
61
|
|
62
|
+
@property
|
63
|
+
def text_indexes(self) -> "TextIndexesService":
|
64
|
+
from .text_indexes import TextIndexesService
|
65
|
+
|
66
|
+
return TextIndexesService(self.client)
|
67
|
+
|
60
68
|
@property
|
61
69
|
def telemetry(self) -> "TelemetryService":
|
62
70
|
from .telemetry import TelemetryService
|
spiral/api/client.py
CHANGED
@@ -146,6 +146,10 @@ class _Client:
|
|
146
146
|
# Enrich the exception with the response body
|
147
147
|
raise SpiralHTTPError(body=resp.text, code=resp.status_code) from e
|
148
148
|
|
149
|
+
if response_cls == type[None]:
|
150
|
+
assert resp.text == ""
|
151
|
+
return None
|
152
|
+
|
149
153
|
return TypeAdapter(response_cls).validate_python(resp.json())
|
150
154
|
|
151
155
|
def paged(
|
spiral/api/filesystems.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from enum import Enum
|
2
|
+
from types import NoneType
|
2
3
|
from typing import Annotated, Literal
|
3
4
|
|
4
5
|
from pydantic import AfterValidator, BaseModel, Field
|
@@ -17,6 +18,7 @@ def _validate_directory_path(path: str) -> str:
|
|
17
18
|
|
18
19
|
DirectoryPath = Annotated[str, AfterValidator(_validate_directory_path)]
|
19
20
|
FilePath = str # Path or directory
|
21
|
+
FsLoc = str
|
20
22
|
|
21
23
|
|
22
24
|
class BuiltinFileSystem(BaseModel):
|
@@ -120,6 +122,12 @@ class CreateMountResponse(BaseModel):
|
|
120
122
|
mount: Mount
|
121
123
|
|
122
124
|
|
125
|
+
class GetMountAndFileSystemResponse(BaseModel):
|
126
|
+
mount: Mount
|
127
|
+
file_system: FileSystem
|
128
|
+
fs_loc: FsLoc
|
129
|
+
|
130
|
+
|
123
131
|
class FileSystemService(ServiceBase):
|
124
132
|
"""Service for file system operations."""
|
125
133
|
|
@@ -148,6 +156,10 @@ class FileSystemService(ServiceBase):
|
|
148
156
|
"""Get a mount."""
|
149
157
|
return self.client.get(f"/v1/mounts/{mount_id}", Mount)
|
150
158
|
|
159
|
+
def get_mount_and_file_system(self, mount_id: str) -> GetMountAndFileSystemResponse:
|
160
|
+
"""Get the mount and its associated file system."""
|
161
|
+
return self.client.get(f"/v1/mounts/{mount_id}/with-filesystem", GetMountAndFileSystemResponse)
|
162
|
+
|
151
163
|
def remove_mount(self, mount_id: str) -> None:
|
152
164
|
"""Remove mount."""
|
153
|
-
return self.client.delete(f"/v1/mounts/{mount_id}",
|
165
|
+
return self.client.delete(f"/v1/mounts/{mount_id}", NoneType)
|
spiral/api/projects.py
CHANGED
@@ -192,6 +192,6 @@ class ProjectService(ServiceBase):
|
|
192
192
|
"""Get a grant."""
|
193
193
|
return self.client.get(f"/v1/grants/{grant_id}", Grant)
|
194
194
|
|
195
|
-
def revoke_grant(self, grant_id: str)
|
195
|
+
def revoke_grant(self, grant_id: str):
|
196
196
|
"""Revoke a grant."""
|
197
|
-
return self.client.delete(f"/v1/grants/{grant_id}", None)
|
197
|
+
return self.client.delete(f"/v1/grants/{grant_id}", type[None])
|
@@ -0,0 +1,56 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
|
3
|
+
from .client import Paged, PagedResponse, ServiceBase
|
4
|
+
from .types import IndexId, ProjectId, WorkerId
|
5
|
+
from .workers import CPU, GcpRegion, Memory, ResourceClass
|
6
|
+
|
7
|
+
|
8
|
+
class TextSearchWorker(BaseModel):
|
9
|
+
worker_id: WorkerId
|
10
|
+
project_id: ProjectId
|
11
|
+
index_id: IndexId
|
12
|
+
url: str | None
|
13
|
+
|
14
|
+
|
15
|
+
class CreateWorkerRequest(BaseModel):
|
16
|
+
cpu: CPU
|
17
|
+
memory: Memory
|
18
|
+
region: GcpRegion
|
19
|
+
|
20
|
+
|
21
|
+
class CreateWorkerResponse(BaseModel):
|
22
|
+
worker_id: WorkerId
|
23
|
+
|
24
|
+
|
25
|
+
class SyncIndexRequest(BaseModel):
|
26
|
+
"""Request to sync a text index."""
|
27
|
+
|
28
|
+
resources: ResourceClass
|
29
|
+
|
30
|
+
|
31
|
+
class SyncIndexResponse(BaseModel):
|
32
|
+
worker_id: WorkerId
|
33
|
+
|
34
|
+
|
35
|
+
class TextIndexesService(ServiceBase):
|
36
|
+
"""Service for workload operations."""
|
37
|
+
|
38
|
+
def create_worker(self, index_id: IndexId, request: CreateWorkerRequest) -> CreateWorkerResponse:
|
39
|
+
"""Create a new search worker."""
|
40
|
+
return self.client.post(f"/v1/text-indexes/{index_id}/workers", request, CreateWorkerResponse)
|
41
|
+
|
42
|
+
def list_workers(self, index_id: IndexId) -> Paged[WorkerId]:
|
43
|
+
"""List text index workers for the given index."""
|
44
|
+
return self.client.paged(f"/v1/text-indexes/{index_id}/workers", PagedResponse[WorkerId])
|
45
|
+
|
46
|
+
def get_worker(self, worker_id: WorkerId) -> TextSearchWorker:
|
47
|
+
"""Get a text index worker."""
|
48
|
+
return self.client.get(f"/v1/text-index-workers/{worker_id}", TextSearchWorker)
|
49
|
+
|
50
|
+
def shutdown_worker(self, worker_id: WorkerId) -> None:
|
51
|
+
"""Shutdown a text index worker."""
|
52
|
+
return self.client.delete(f"/v1/text-index-workers/{worker_id}", type[None])
|
53
|
+
|
54
|
+
def sync_index(self, index_id: IndexId, request: SyncIndexRequest) -> SyncIndexResponse:
|
55
|
+
"""Start a job to sync an index."""
|
56
|
+
return self.client.post(f"/v1/text-indexes/{index_id}/sync", request, SyncIndexResponse)
|
spiral/api/types.py
CHANGED
@@ -13,6 +13,8 @@ UserId = str
|
|
13
13
|
OrgId = str
|
14
14
|
ProjectId = str
|
15
15
|
RoleId = str
|
16
|
+
IndexId = str
|
17
|
+
WorkerId = str
|
16
18
|
|
17
19
|
RootUri = Annotated[str, AfterValidator(_validate_root_uri)]
|
18
20
|
DatasetName = Annotated[str, StringConstraints(max_length=128, pattern=r"^[a-zA-Z_][a-zA-Z0-9_-]+$")]
|
spiral/api/workers.py
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
from enum import Enum, IntEnum
|
2
|
+
|
3
|
+
|
4
|
+
class CPU(IntEnum):
|
5
|
+
ONE = 1
|
6
|
+
TWO = 2
|
7
|
+
FOUR = 4
|
8
|
+
EIGHT = 8
|
9
|
+
|
10
|
+
def __str__(self):
|
11
|
+
return str(self.value)
|
12
|
+
|
13
|
+
|
14
|
+
class Memory(str, Enum):
|
15
|
+
MB_512 = "512Mi"
|
16
|
+
GB_1 = "1Gi"
|
17
|
+
GB_2 = "2Gi"
|
18
|
+
GB_4 = "4Gi"
|
19
|
+
GB_8 = "8Gi"
|
20
|
+
|
21
|
+
def __str__(self):
|
22
|
+
return self.value
|
23
|
+
|
24
|
+
|
25
|
+
class GcpRegion(str, Enum):
|
26
|
+
US_EAST4 = "us-east4"
|
27
|
+
EUROPE_WEST4 = "europe-west4"
|
28
|
+
|
29
|
+
def __str__(self):
|
30
|
+
return self.value
|
31
|
+
|
32
|
+
|
33
|
+
class ResourceClass(str, Enum):
|
34
|
+
"""Resource class for text index sync."""
|
35
|
+
|
36
|
+
SMALL = "small"
|
37
|
+
LARGE = "large"
|
38
|
+
|
39
|
+
def __str__(self):
|
40
|
+
return self.value
|
spiral/cli/indexes/__init__.py
CHANGED
@@ -1,9 +1,18 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
1
3
|
import rich
|
4
|
+
from typer import Option
|
2
5
|
|
6
|
+
from spiral.api.text_indexes import SyncIndexRequest
|
3
7
|
from spiral.cli import AsyncTyper, state
|
8
|
+
from spiral.cli.indexes.args import get_text_index_id
|
4
9
|
from spiral.cli.types import ProjectArg
|
5
10
|
|
11
|
+
from ...api.workers import ResourceClass
|
12
|
+
from . import workers
|
13
|
+
|
6
14
|
app = AsyncTyper(short_help="Indexes.")
|
15
|
+
app.add_typer(workers.app, name="workers")
|
7
16
|
|
8
17
|
|
9
18
|
@app.command(help="List indexes.")
|
@@ -17,3 +26,15 @@ def ls(
|
|
17
26
|
for index in indexes:
|
18
27
|
rich_table.add_row(index.id, index.name)
|
19
28
|
rich.print(rich_table)
|
29
|
+
|
30
|
+
|
31
|
+
@app.command(help="Trigger a sync job for the index.")
|
32
|
+
def sync(
|
33
|
+
project: ProjectArg,
|
34
|
+
name: Annotated[str | None, Option(help="Index name.")] = None,
|
35
|
+
resources: Annotated[ResourceClass, Option(help="Resources to use for the sync job.")] = ResourceClass.SMALL,
|
36
|
+
):
|
37
|
+
"""Trigger a sync job for the index."""
|
38
|
+
index_id = get_text_index_id(project, name)
|
39
|
+
response = state.spiral.api.text_indexes.sync_index(index_id, SyncIndexRequest(resources=resources))
|
40
|
+
rich.print(f"Triggered sync job {response.worker_id} for index {index_id}.")
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import questionary
|
4
|
+
import rich
|
5
|
+
import typer
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Option
|
8
|
+
|
9
|
+
from spiral.api.projects import TextIndexResource
|
10
|
+
from spiral.api.types import IndexId
|
11
|
+
from spiral.cli import state
|
12
|
+
from spiral.cli.types import ProjectArg
|
13
|
+
|
14
|
+
|
15
|
+
def ask_index(project_id, title="Select an index"):
|
16
|
+
indexes: list[TextIndexResource] = list(state.spiral.project(project_id).indexes.list_indexes())
|
17
|
+
|
18
|
+
if not indexes:
|
19
|
+
rich.print("[red]No indexes found[/red]")
|
20
|
+
raise typer.Exit(1)
|
21
|
+
|
22
|
+
return questionary.select(
|
23
|
+
title,
|
24
|
+
choices=[Choice(title=index.name, value=index.id) for index in sorted(indexes, key=lambda t: (t.name, t.id))],
|
25
|
+
).ask()
|
26
|
+
|
27
|
+
|
28
|
+
def get_text_index_id(
|
29
|
+
project: ProjectArg,
|
30
|
+
name: Annotated[str | None, Option(help="Index name.")] = None,
|
31
|
+
) -> IndexId:
|
32
|
+
if name is None:
|
33
|
+
return ask_index(project)
|
34
|
+
|
35
|
+
indexes: list[TextIndexResource] = list(state.spiral.project(project).indexes.list_indexes())
|
36
|
+
for index in indexes:
|
37
|
+
if index.name == name:
|
38
|
+
return index.id
|
39
|
+
raise ValueError(f"Index not found: {name}")
|
@@ -0,0 +1,59 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import rich
|
4
|
+
from typer import Option
|
5
|
+
|
6
|
+
from spiral.api.text_indexes import CreateWorkerRequest
|
7
|
+
from spiral.api.workers import CPU, GcpRegion, Memory
|
8
|
+
from spiral.cli import AsyncTyper, state
|
9
|
+
from spiral.cli.indexes.args import get_text_index_id
|
10
|
+
from spiral.cli.types import ProjectArg
|
11
|
+
|
12
|
+
app = AsyncTyper(short_help="Text Search Workers.")
|
13
|
+
|
14
|
+
|
15
|
+
@app.command(name="serve", help="Create a search worker.")
|
16
|
+
def serve(
|
17
|
+
project: ProjectArg,
|
18
|
+
index: Annotated[str | None, Option(help="Index name.")] = None,
|
19
|
+
region: Annotated[GcpRegion, Option(help="GCP region for the worker.")] = GcpRegion.US_EAST4,
|
20
|
+
cpu: Annotated[CPU, Option(help="CPU resources for the worker.")] = CPU.ONE,
|
21
|
+
memory: Annotated[Memory, Option(help="Memory resources for the worker in MB.")] = Memory.MB_512,
|
22
|
+
):
|
23
|
+
"""Create a new text search worker."""
|
24
|
+
index_id = get_text_index_id(project, index)
|
25
|
+
request = CreateWorkerRequest(cpu=cpu, memory=memory, region=region)
|
26
|
+
response = state.spiral.api.text_indexes.create_worker(index_id, request)
|
27
|
+
rich.print(f"Created worker {response.worker_id} for {index_id}.")
|
28
|
+
|
29
|
+
|
30
|
+
@app.command(name="shutdown", help="Shutdown a search worker.")
|
31
|
+
def shutdown(worker_id: str):
|
32
|
+
"""Shutdown a worker."""
|
33
|
+
state.spiral.api.text_indexes.shutdown_worker(worker_id)
|
34
|
+
rich.print(f"Requested worker {worker_id} to shutdown.")
|
35
|
+
|
36
|
+
|
37
|
+
@app.command(name="ls", help="List search workers.")
|
38
|
+
def ls(
|
39
|
+
project: ProjectArg,
|
40
|
+
index: Annotated[str | None, Option(help="Index name.")] = None,
|
41
|
+
):
|
42
|
+
"""List text search workers."""
|
43
|
+
index_id = get_text_index_id(project, index)
|
44
|
+
worker_ids = state.spiral.api.text_indexes.list_workers(index_id)
|
45
|
+
|
46
|
+
rich_table = rich.table.Table("Worker ID", "URL", title=f"Text Search Workers for {index_id}")
|
47
|
+
for worker_id in worker_ids:
|
48
|
+
try:
|
49
|
+
worker = state.spiral.api.text_indexes.get_worker(worker_id)
|
50
|
+
rich_table.add_row(
|
51
|
+
worker_id,
|
52
|
+
worker.url,
|
53
|
+
)
|
54
|
+
except Exception:
|
55
|
+
rich_table.add_row(
|
56
|
+
worker_id,
|
57
|
+
"Unavailable",
|
58
|
+
)
|
59
|
+
rich.print(rich_table)
|
spiral/cli/tables/__init__.py
CHANGED
@@ -1,35 +1,16 @@
|
|
1
1
|
from typing import Annotated
|
2
2
|
|
3
|
-
import questionary
|
4
3
|
import rich
|
5
|
-
import typer
|
6
|
-
from questionary import Choice
|
7
4
|
from typer import Argument, Option
|
8
5
|
|
9
6
|
from spiral import Spiral
|
10
|
-
from spiral.cli import AsyncTyper
|
7
|
+
from spiral.cli import AsyncTyper
|
8
|
+
from spiral.cli.tables.args import get_table
|
11
9
|
from spiral.cli.types import ProjectArg
|
12
|
-
from spiral.tables import Table
|
13
10
|
|
14
11
|
app = AsyncTyper(short_help="Spiral Tables.")
|
15
12
|
|
16
13
|
|
17
|
-
def ask_table(project_id, title="Select a table"):
|
18
|
-
tables = list(state.spiral.project(project_id).tables.list_tables())
|
19
|
-
|
20
|
-
if not tables:
|
21
|
-
rich.print("[red]No tables found[/red]")
|
22
|
-
raise typer.Exit(1)
|
23
|
-
|
24
|
-
return questionary.select(
|
25
|
-
title,
|
26
|
-
choices=[
|
27
|
-
Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
|
28
|
-
for table in tables
|
29
|
-
],
|
30
|
-
).ask()
|
31
|
-
|
32
|
-
|
33
14
|
@app.command(help="List tables.")
|
34
15
|
def ls(
|
35
16
|
project: ProjectArg,
|
@@ -48,7 +29,7 @@ def key_schema(
|
|
48
29
|
table: Annotated[str | None, Option(help="Table name.")] = None,
|
49
30
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
50
31
|
):
|
51
|
-
_, table =
|
32
|
+
_, table = get_table(project, table, dataset)
|
52
33
|
rich.print(table.key_schema)
|
53
34
|
|
54
35
|
|
@@ -58,7 +39,7 @@ def schema(
|
|
58
39
|
table: Annotated[str | None, Option(help="Table name.")] = None,
|
59
40
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
60
41
|
):
|
61
|
-
_, table =
|
42
|
+
_, table = get_table(project, table, dataset)
|
62
43
|
rich.print(table.schema)
|
63
44
|
|
64
45
|
|
@@ -68,7 +49,7 @@ def flush(
|
|
68
49
|
table: Annotated[str | None, Option(help="Table name.")] = None,
|
69
50
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
70
51
|
):
|
71
|
-
identifier, table =
|
52
|
+
identifier, table = get_table(project, table, dataset)
|
72
53
|
table.maintenance().flush_wal()
|
73
54
|
print(f"Flushed WAL for table {identifier} in project {project}.")
|
74
55
|
|
@@ -80,7 +61,7 @@ def debug(
|
|
80
61
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
81
62
|
column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
|
82
63
|
):
|
83
|
-
_, table =
|
64
|
+
_, table = get_table(project, table, dataset)
|
84
65
|
if column_group != ".":
|
85
66
|
projection = table[column_group]
|
86
67
|
else:
|
@@ -97,7 +78,7 @@ def manifests(
|
|
97
78
|
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
98
79
|
column_group: Annotated[str, Argument(help="Dot-separated column group path.")] = ".",
|
99
80
|
):
|
100
|
-
_, table =
|
81
|
+
_, table = get_table(project, table, dataset)
|
101
82
|
if column_group != ".":
|
102
83
|
projection = table[column_group]
|
103
84
|
else:
|
@@ -105,17 +86,3 @@ def manifests(
|
|
105
86
|
scan = projection.scan()
|
106
87
|
|
107
88
|
scan._dump_manifests()
|
108
|
-
|
109
|
-
|
110
|
-
def _get_table(
|
111
|
-
project: ProjectArg,
|
112
|
-
table: Annotated[str | None, Option(help="Table name.")] = None,
|
113
|
-
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
114
|
-
) -> (str, Table):
|
115
|
-
if table is None:
|
116
|
-
identifier = ask_table(project)
|
117
|
-
else:
|
118
|
-
identifier = table
|
119
|
-
if dataset is not None:
|
120
|
-
identifier = f"{dataset}.{table}"
|
121
|
-
return identifier, state.spiral.project(project).tables.table(identifier)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import questionary
|
4
|
+
import rich
|
5
|
+
import typer
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Option
|
8
|
+
|
9
|
+
from spiral.api.projects import TableResource
|
10
|
+
from spiral.cli import state
|
11
|
+
from spiral.cli.types import ProjectArg
|
12
|
+
from spiral.tables import Table
|
13
|
+
|
14
|
+
|
15
|
+
def ask_table(project_id, title="Select a table"):
|
16
|
+
tables: list[TableResource] = list(state.spiral.project(project_id).tables.list_tables())
|
17
|
+
|
18
|
+
if not tables:
|
19
|
+
rich.print("[red]No tables found[/red]")
|
20
|
+
raise typer.Exit(1)
|
21
|
+
|
22
|
+
return questionary.select(
|
23
|
+
title,
|
24
|
+
choices=[
|
25
|
+
Choice(title=f"{table.dataset}.{table.table}", value=f"{table.project_id}.{table.dataset}.{table.table}")
|
26
|
+
for table in sorted(tables, key=lambda t: (t.dataset, t.table))
|
27
|
+
],
|
28
|
+
).ask()
|
29
|
+
|
30
|
+
|
31
|
+
def get_table(
|
32
|
+
project: ProjectArg,
|
33
|
+
table: Annotated[str | None, Option(help="Table name.")] = None,
|
34
|
+
dataset: Annotated[str | None, Option(help="Dataset name.")] = None,
|
35
|
+
) -> (str, Table):
|
36
|
+
if table is None:
|
37
|
+
identifier = ask_table(project)
|
38
|
+
else:
|
39
|
+
identifier = table
|
40
|
+
if dataset is not None:
|
41
|
+
identifier = f"{dataset}.{table}"
|
42
|
+
return identifier, state.spiral.project(project).tables.table(identifier)
|
spiral/core/index/__init__.pyi
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
import pyarrow as pa
|
2
2
|
|
3
|
-
class IndexStatus:
|
4
|
-
status: str
|
5
|
-
staleness_s: int | None
|
6
|
-
# An extent of keys that are indexed.
|
7
|
-
# key_extent: KeyExtent | None
|
8
|
-
|
9
3
|
class TextIndex:
|
10
4
|
id: str
|
11
5
|
|
12
|
-
def status(self) -> IndexStatus: ...
|
13
|
-
|
14
6
|
class SearchScan:
|
15
7
|
def to_record_batches(self) -> pa.RecordBatchReader: ...
|
spiral/core/table/__init__.pyi
CHANGED
@@ -36,6 +36,7 @@ class Table:
|
|
36
36
|
|
37
37
|
id: str
|
38
38
|
root_uri: str
|
39
|
+
mount_id: str | None
|
39
40
|
key_schema: Schema
|
40
41
|
metastore: PyMetastore
|
41
42
|
|
@@ -68,16 +69,16 @@ class TableScan:
|
|
68
69
|
shuffle_buffer_size: int | None = None,
|
69
70
|
shuffle_pool_num_rows: int | None = None,
|
70
71
|
) -> pa.RecordBatchReader: ...
|
71
|
-
def
|
72
|
-
def
|
72
|
+
def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
|
73
|
+
def key_space_state(self, table_id: str) -> KeySpaceState: ...
|
73
74
|
def metrics(self) -> dict[str, Any]: ...
|
74
75
|
|
75
|
-
class
|
76
|
+
class KeySpaceState:
|
76
77
|
manifest: FragmentManifest
|
77
78
|
|
78
79
|
def key_schema(self) -> Schema: ...
|
79
80
|
|
80
|
-
class
|
81
|
+
class ColumnGroupState:
|
81
82
|
manifest: FragmentManifest
|
82
83
|
|
83
84
|
def schema(self) -> Schema: ...
|
spiral/expressions/__init__.py
CHANGED
@@ -47,7 +47,7 @@ __all__ = [
|
|
47
47
|
"not_",
|
48
48
|
"or_",
|
49
49
|
"pack",
|
50
|
-
"
|
50
|
+
"aux",
|
51
51
|
"ref",
|
52
52
|
"refs",
|
53
53
|
"scalar",
|
@@ -116,26 +116,17 @@ def lift(expr: ExprLike) -> Expr:
|
|
116
116
|
return scalar(expr)
|
117
117
|
|
118
118
|
|
119
|
-
def
|
120
|
-
"""Create a variable expression referencing a
|
119
|
+
def aux(name: builtins.str, dtype: pa.DataType) -> Expr:
|
120
|
+
"""Create a variable expression referencing a column in the auxiliary table.
|
121
121
|
|
122
|
-
|
123
|
-
name: variable name
|
124
|
-
"""
|
125
|
-
return Expr(_lib.expr.keyed(name))
|
126
|
-
|
127
|
-
|
128
|
-
def keyed(name: builtins.str, dtype: pa.DataType | None = None) -> Expr:
|
129
|
-
"""Create a variable expression referencing a column in the key table.
|
130
|
-
|
131
|
-
Key table is optionally given to `Scan#to_record_batches` function when reading only specific keys
|
122
|
+
Auxiliary table is optionally given to `Scan#to_record_batches` function when reading only specific keys
|
132
123
|
or doing cell pushdown.
|
133
124
|
|
134
125
|
Args:
|
135
126
|
name: variable name
|
136
|
-
dtype: must match dtype of the column in the
|
127
|
+
dtype: must match dtype of the column in the auxiliary table.
|
137
128
|
"""
|
138
|
-
return Expr(_lib.expr.
|
129
|
+
return Expr(_lib.expr.aux(name, dtype))
|
139
130
|
|
140
131
|
|
141
132
|
def scalar(value: Any) -> Expr:
|
spiral/expressions/mp4.py
CHANGED
@@ -20,13 +20,13 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
|
|
20
20
|
|
21
21
|
Args:
|
22
22
|
expr: The referenced `Mp4` bytes.
|
23
|
-
A str is assumed to be the `se.
|
23
|
+
A str is assumed to be the `se.aux` expression.
|
24
24
|
frames: The range of frames to read. Each element must be a list of two uint32,
|
25
25
|
frame start and frame end, or null / empty list to read all frames.
|
26
|
-
A str is assumed to be the `se.
|
26
|
+
A str is assumed to be the `se.aux` expression.
|
27
27
|
crop: The crop of the frames to read. Each element must be a list of four uint32,
|
28
28
|
x, y, width, height or null / empty list to read full frames.
|
29
|
-
A str is assumed to be the `se.
|
29
|
+
A str is assumed to be the `se.aux` expression.
|
30
30
|
|
31
31
|
Returns:
|
32
32
|
An array where each element is a decoded cropped video with fields:
|
@@ -36,17 +36,17 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
|
|
36
36
|
frames: Number of frames with type `pa.uint32()`.
|
37
37
|
"""
|
38
38
|
from spiral import _lib
|
39
|
-
from spiral.expressions import
|
39
|
+
from spiral.expressions import aux, lift
|
40
40
|
|
41
41
|
if isinstance(expr, str):
|
42
|
-
expr =
|
42
|
+
expr = aux(
|
43
43
|
expr,
|
44
44
|
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
45
45
|
)
|
46
46
|
if isinstance(frames, str):
|
47
|
-
frames =
|
47
|
+
frames = aux(frames, pa.list_(pa.uint32()))
|
48
48
|
if isinstance(crop, str):
|
49
|
-
crop =
|
49
|
+
crop = aux(crop, pa.list_(pa.uint32()))
|
50
50
|
|
51
51
|
expr = lift(expr)
|
52
52
|
frames = lift(frames)
|
spiral/expressions/refs.py
CHANGED
@@ -29,15 +29,15 @@ def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
|
|
29
29
|
column back into their original form, e.g. binary.
|
30
30
|
|
31
31
|
Args:
|
32
|
-
expr: The expression to de-reference. A str is assumed to be the `se.
|
32
|
+
expr: The expression to de-reference. A str is assumed to be the `se.aux` expression.
|
33
33
|
field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
|
34
34
|
If `None`, the expr must evaluate into a reference type.
|
35
35
|
"""
|
36
36
|
from spiral import _lib
|
37
|
-
from spiral.expressions import
|
37
|
+
from spiral.expressions import aux, lift
|
38
38
|
|
39
39
|
if isinstance(expr, str):
|
40
|
-
expr =
|
40
|
+
expr = aux(
|
41
41
|
expr,
|
42
42
|
pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
|
43
43
|
)
|
spiral/indexes/index.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import datetime
|
2
1
|
from typing import TYPE_CHECKING
|
3
2
|
|
4
3
|
from spiral.core.index import TextIndex as CoreTextIndex
|
@@ -27,8 +26,3 @@ class TextIndex(Expr):
|
|
27
26
|
@property
|
28
27
|
def name(self) -> str:
|
29
28
|
return self._name
|
30
|
-
|
31
|
-
def status(self) -> (str, datetime.timedelta | None):
|
32
|
-
"""Fetch the status of the index. If status is ready, returns the staleness of the index."""
|
33
|
-
status = self._index.status()
|
34
|
-
return status.status, datetime.timedelta(seconds=status.staleness_s) if status.staleness_s is not None else None
|
spiral/substrait_.py
CHANGED
@@ -248,11 +248,6 @@ class SubstraitConverter:
|
|
248
248
|
case "struct_field", ref:
|
249
249
|
ref: ExpressionReferenceSegmentStructField
|
250
250
|
field_name = scope_type.field(ref.field).name
|
251
|
-
|
252
|
-
if field_name in self.key_names:
|
253
|
-
# This is a key column, so we need to select it from the scope.
|
254
|
-
return se.key(field_name)
|
255
|
-
|
256
251
|
scope = se.getitem(scope, field_name)
|
257
252
|
scope_type = scope_type.field(ref.field).type
|
258
253
|
if ref.is_set("child"):
|
spiral/tables/client.py
CHANGED
@@ -25,6 +25,9 @@ class Tables:
|
|
25
25
|
"""
|
26
26
|
|
27
27
|
def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
|
28
|
+
if project_id == "":
|
29
|
+
raise ValueError("Project ID cannot be an empty string.")
|
30
|
+
|
28
31
|
self._api = api
|
29
32
|
self._spiral = spiral
|
30
33
|
self._project_id = project_id
|
spiral/tables/debug/manifests.py
CHANGED
@@ -10,14 +10,14 @@ def display_manifests(scan: TableScan):
|
|
10
10
|
raise NotImplementedError("Multiple table scans are not supported.")
|
11
11
|
table_id = scan.table_ids()[0]
|
12
12
|
|
13
|
-
key_space_manifest: FragmentManifest = scan.
|
13
|
+
key_space_manifest: FragmentManifest = scan.key_space_state(table_id).manifest
|
14
14
|
_table_of_fragments(
|
15
15
|
key_space_manifest,
|
16
16
|
title="Key Space manifest",
|
17
17
|
)
|
18
18
|
|
19
19
|
for column_group in scan.column_groups():
|
20
|
-
column_group_manifest: FragmentManifest = scan.
|
20
|
+
column_group_manifest: FragmentManifest = scan.column_group_state(column_group).manifest
|
21
21
|
_table_of_fragments(
|
22
22
|
column_group_manifest,
|
23
23
|
title=f"Column Group manifest for {str(column_group)}",
|
spiral/tables/debug/scan.py
CHANGED
@@ -15,17 +15,17 @@ def show_scan(scan: TableScan):
|
|
15
15
|
column_groups = scan.column_groups()
|
16
16
|
|
17
17
|
splits = scan.split()
|
18
|
-
|
18
|
+
key_space_state = scan.key_space_state(table_id)
|
19
19
|
|
20
20
|
# Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
|
21
21
|
key_points = set()
|
22
|
-
key_space_manifest =
|
22
|
+
key_space_manifest = key_space_state.manifest
|
23
23
|
for i in range(len(key_space_manifest)):
|
24
24
|
fragment_file = key_space_manifest[i]
|
25
25
|
key_points.add(fragment_file.key_extent.min)
|
26
26
|
key_points.add(fragment_file.key_extent.max)
|
27
27
|
for cg in column_groups:
|
28
|
-
cg_scan = scan.
|
28
|
+
cg_scan = scan.column_group_state(cg)
|
29
29
|
cg_manifest = cg_scan.manifest
|
30
30
|
for i in range(len(cg_manifest)):
|
31
31
|
fragment_file = cg_manifest[i]
|
@@ -39,7 +39,7 @@ def show_scan(scan: TableScan):
|
|
39
39
|
|
40
40
|
show_manifest(key_space_manifest, scope="Key space", key_points=key_points, splits=splits)
|
41
41
|
for cg in scan.column_groups():
|
42
|
-
cg_scan = scan.
|
42
|
+
cg_scan = scan.column_group_state(cg)
|
43
43
|
# Skip table id from the start of the column group.
|
44
44
|
show_manifest(cg_scan.manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
|
45
45
|
|
spiral/tables/table.py
CHANGED
@@ -71,11 +71,6 @@ class Table(Expr):
|
|
71
71
|
return f'Table("{self.identifier}")'
|
72
72
|
|
73
73
|
def __getitem__(self, item: str) -> Expr:
|
74
|
-
from spiral import expressions as se
|
75
|
-
|
76
|
-
if item in self._key_columns:
|
77
|
-
return se.key(name=item)
|
78
|
-
|
79
74
|
return super().__getitem__(item)
|
80
75
|
|
81
76
|
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
@@ -86,14 +81,7 @@ class Table(Expr):
|
|
86
81
|
"Cannot use 'exclude' arg with key columns. Use 'exclude_keys' and an explicit select of keys."
|
87
82
|
)
|
88
83
|
|
89
|
-
|
90
|
-
other_paths = set(paths) - key_paths
|
91
|
-
if not key_paths:
|
92
|
-
return super().select(*paths, exclude=exclude)
|
93
|
-
|
94
|
-
from spiral import expressions as se
|
95
|
-
|
96
|
-
return se.merge(se.pack({key: se.key(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
|
84
|
+
return super().select(*paths, exclude=exclude)
|
97
85
|
|
98
86
|
@property
|
99
87
|
def key_schema(self) -> Schema:
|
File without changes
|
File without changes
|