pyspiral 0.1.0__cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pyspiral
|
3
|
+
Version: 0.1.0
|
4
|
+
Classifier: Intended Audience :: Science/Research
|
5
|
+
Classifier: Operating System :: OS Independent
|
6
|
+
Classifier: Programming Language :: Python
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
13
|
+
Classifier: Programming Language :: Rust
|
14
|
+
Classifier: License :: Other/Proprietary License
|
15
|
+
Requires-Dist: betterproto==2.0.0b7
|
16
|
+
Requires-Dist: google-re2>=1.1.20240702
|
17
|
+
Requires-Dist: grpclib>=0.4.7
|
18
|
+
Requires-Dist: hishel>=0.0.30
|
19
|
+
Requires-Dist: httpx>=0.27.0
|
20
|
+
Requires-Dist: numpy>=1.26.3
|
21
|
+
Requires-Dist: opentelemetry-api>=1.27.0
|
22
|
+
Requires-Dist: opentelemetry-sdk>=1.27.0
|
23
|
+
Requires-Dist: polars>=1.6.0
|
24
|
+
Requires-Dist: pyarrow>=17.0.0
|
25
|
+
Requires-Dist: pydantic-settings>=2.3.4
|
26
|
+
Requires-Dist: pydantic[email]>=2.5.3
|
27
|
+
Requires-Dist: pyjwt[crypto]>=2.9.0
|
28
|
+
Requires-Dist: pyroaring>=0.4.4
|
29
|
+
Requires-Dist: questionary>=2.0.1
|
30
|
+
Requires-Dist: tqdm>=4.66.5
|
31
|
+
Requires-Dist: typer>=0.12.3
|
32
|
+
Requires-Dist: xxhash>=3.4.1
|
33
|
+
Requires-Dist: nanoid>=2.0.0
|
34
|
+
Requires-Dist: sqlglot[rs]>=25.25.1
|
35
|
+
Requires-Dist: duckdb>=1.1.1
|
36
|
+
Requires-Dist: pyperclip>=1.9.0
|
37
|
+
Summary: Python implementation of Spiral table format.
|
38
|
+
Keywords: vortex,spiraldb
|
39
|
+
Home-Page: https://spiraldb.com
|
40
|
+
Author: Spiral<hello@spiraldb.com>
|
41
|
+
Author-email: SpiralDB <hello@spiraldb.com>
|
42
|
+
License: Proprietary
|
43
|
+
Requires-Python: >=3.10
|
44
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
45
|
+
Project-URL: Source Code, https://github.com/spiraldb/spiraldb
|
46
|
+
|
47
|
+
# PySpiral
|
48
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
pyspiral-0.1.0.dist-info/METADATA,sha256=rPBsHTqcVDRoUPx9y63fXXLZ-7K1D_p6jQBjeRTM-t8,1699
|
2
|
+
pyspiral-0.1.0.dist-info/WHEEL,sha256=1L8UL2wrWiNkziZv-3hQSZMxy7RYSqejqwbCb6Xe2FM,128
|
3
|
+
pyspiral-0.1.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
4
|
+
spiral/catalog.py,sha256=BtthmRApU1RSb6KbUfVTM2aYeLsnlO0nKDYHBYhdr9M,2496
|
5
|
+
spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
|
6
|
+
spiral/substrait_.py,sha256=5ZXnYcsXEdrBogECnoL6IMlsjsseYHEnVARgRpy2vt8,12671
|
7
|
+
spiral/table.py,sha256=iJ-Lhu9ieSNg728cvUNRERcxKz7pj8hMCXPGBFHg7tI,4845
|
8
|
+
spiral/core/spec/__init__.pyi,sha256=zwOPhpBS_iOrPkOdc4ySpgzICZNbteMZf4c2wdkWw1Y,7251
|
9
|
+
spiral/core/core/__init__.pyi,sha256=-OCtTgqjUN7sACAmgrAA_TrqmuP7epNSyL9XjqnNTa4,1914
|
10
|
+
spiral/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
spiral/core/metastore/__init__.pyi,sha256=pdKED91GVJ9XWxTWc9gwkHvVHV_RKxvL43Ofs5ndmew,3145
|
12
|
+
spiral/core/manifests/__init__.pyi,sha256=DXr4Ab_Xo11AzrJlSj7FTSJc8qoO-SkL-_ik4kB855U,1516
|
13
|
+
spiral/__init__.py,sha256=4EiQkY17qHT9dpxu41fdOV1kqGl_b-HXNRQg--ZwJbo,286
|
14
|
+
spiral/api/__init__.py,sha256=Ub3IYeQUJ_z0-Y_SXmNasxb6uefKynQkuUZgRM1enyc,6660
|
15
|
+
spiral/api/admin.py,sha256=HJBrRJScbcdDuFhF_06E0EyE-_Y0osfYPxVoRAyEoTc,837
|
16
|
+
spiral/api/workloads.py,sha256=4MWs2pp9AWvx6cZhgyW-ehyCRxpHc_NAQgHaoYOEBfg,1266
|
17
|
+
spiral/api/organizations.py,sha256=-PO93HTX02IxhXM6SJpAhnAXpVW1WjthFO4-AOzZAC4,2670
|
18
|
+
spiral/api/filesystems.py,sha256=NPf5PCyCQ7eEU723fOdKcuGPgn83pthkb_jJ9aB0fwM,3431
|
19
|
+
spiral/api/tables.py,sha256=uPoWkkcW7lJUxU0fUgDK5Gy_DT8y7gJFXdgxQpqcc5w,2548
|
20
|
+
spiral/api/projects.py,sha256=-VGlu5V3TJ3XLCGu85bPHRFiktIADnAxfLWIH8Rmxug,4986
|
21
|
+
spiral/api/tokens.py,sha256=WaSRr_l3i81t5u2qi3kWW-fySbyKFm-PQK1ZlmoSByc,1464
|
22
|
+
spiral/adbc.py,sha256=H_bzevPy5teyZKzjczh1gQ_zPcfk5sNASiJKQvyab9E,13830
|
23
|
+
spiral/config.py,sha256=ovtE5D3r6_g90ZRDJhlJyWhOBlxLjagvomOyA-VZmdc,911
|
24
|
+
spiral/settings.py,sha256=e_F6GQea6ljXzCRgFxMBMecoGhFpEhePhMRQkYOoVO4,4555
|
25
|
+
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
+
spiral/scan_.py,sha256=rbLl85yeOkRuLsbk6QKng0_U4gtGGsW6C-aJiJSxuv8,5946
|
27
|
+
spiral/dataset.py,sha256=jUeXvE4B5nKh9VNmHxvROQbEkTUxqyYS8oS6EQyRbng,7555
|
28
|
+
spiral/expressions/__init__.py,sha256=T2POn0Z-mQj9PY8ukYWYn5A832Fz7Y6F8RRASo6Xl3A,5638
|
29
|
+
spiral/expressions/struct.py,sha256=MuxoBP6ESpwmjzusG-_HxHGYKvQQz6AZWzvw7vNUHJM,2007
|
30
|
+
spiral/expressions/refs.py,sha256=F3xr7wmrbAawMkLTWcvVwczbOMNOnIttSMRDuzjZHbo,1774
|
31
|
+
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
32
|
+
spiral/expressions/base.py,sha256=mTBwS6CwdDaV8uotjZUiKi7GHQzX2TRPpnseJJUDrR0,4776
|
33
|
+
spiral/expressions/list_.py,sha256=nbo4xQAuqBsQGajq_JgORaJl8_CDvOAv14zMbqmtZh4,1814
|
34
|
+
spiral/expressions/tiff.py,sha256=k5GMzm4FmGBJMyja-D6u_kt_-vHYIdj4bnYYZut5lK4,7579
|
35
|
+
spiral/expressions/udf.py,sha256=vOlrdxiVpt7vdSgiTKX_XR86YKyu02Fdwb9xlINCby4,1363
|
36
|
+
spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
|
37
|
+
spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
|
38
|
+
spiral/authn/modal_.py,sha256=agcnR3dYTslkH2K_a2Eis_2JWn9Ps11FVrGG_jkOdGk,472
|
39
|
+
spiral/authn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
+
spiral/authn/authn.py,sha256=OCGJAUfoKLiXw9xAcAnX6i6mBRlvsl6qEFcimqQOu7g,2555
|
41
|
+
spiral/authn/github_.py,sha256=K-0RUHDreINjnCDHyT9aeVDRk6WtNP7noBYEcwdz2W4,1313
|
42
|
+
spiral/authn/device.py,sha256=ohHSVLW3a-qLNYQGN-3kXxV_836xOe0UYBN8i63cqAQ,6796
|
43
|
+
spiral/arrow.py,sha256=LBPwZcGkP4kXb42_kl5IUwWW3DO84CV3QJDwCHjG5Dg,7225
|
44
|
+
spiral/project.py,sha256=q9jHql7hz5OQzPfDfvE_hN3K9cZotD0cxkdug9EUTwM,4889
|
45
|
+
spiral/debug.py,sha256=t590eAUtNWwMTsSdkjVNN7J1iMqY2p4PRJ3BWR_ozho,8999
|
46
|
+
spiral/cli/table.py,sha256=eh2NAk0GlfvthwRNeIbcZTsRWU3ypFx_uu9OaOLHPUo,628
|
47
|
+
spiral/cli/__init__.py,sha256=CoiAJ7FDgqjG_TrU-6SpP1hyZloIlEP4wcwnrF8flHM,2237
|
48
|
+
spiral/cli/state.py,sha256=1quvei8TnDTT6mDRo58P8FUfy4w16Z9sggBz7cFgllY,70
|
49
|
+
spiral/cli/types.py,sha256=4cphJs-i0vfq_CcnHxT9FpHiZdGwxME5GnOmIGB6Thw,1436
|
50
|
+
spiral/cli/admin.py,sha256=3pIs6PxDugMtdgzfRpn_HfBDv-cwAYtF0cJP2INB01A,579
|
51
|
+
spiral/cli/fs.py,sha256=8sQAgMahAq0gtXJqnuiKUkx4sO6vEEF4Iaq_-AF3wro,1524
|
52
|
+
spiral/cli/token.py,sha256=dv30aa745bbS-c3tyzQUTSxGG_N0kCt8G4bip9fP_EM,968
|
53
|
+
spiral/cli/workload.py,sha256=-XreFPJcX7kZvcYE3oQMeGkkYoXi25R5nuktJPg-PuY,2000
|
54
|
+
spiral/cli/__main__.py,sha256=kNaKM2xgJo7GRogf83nYldLM-RGUR6vymdGwZxywQu0,71
|
55
|
+
spiral/cli/project.py,sha256=aCxvw8UVwSmh_anArizIQ3_pLVT6QH9jYwMsGfpJUgM,4486
|
56
|
+
spiral/cli/app.py,sha256=2oZfDTgj_gZ-lFMMzzJJTnvVzQhp_iedvH-FJnaaMW0,1487
|
57
|
+
spiral/cli/org.py,sha256=ezWhoGUkJQQAwI1jKvDP8uZPNlnou_hXtRDa1us5cSE,2935
|
58
|
+
spiral/cli/login.py,sha256=C7VpqVyYO2daUeIWHoelWnSGN7cju8YEuqOy12ImH4c,381
|
59
|
+
spiral/cli/console.py,sha256=-OP0bB_efxhWh4lZ95KRdu-SRgSUMJ47Rbi9FHv1TlY,2577
|
60
|
+
spiral/cli/printer.py,sha256=5HD3UcszFfPk-dK8U5akuvtXqMB7PMgOB1DFYMqspG8,1625
|
61
|
+
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
62
|
+
spiral/proto/scandal/__init__.py,sha256=wAAEkPN4S4XDpGQtw1MV5zFUeHM01XSAa5tgUgcALvg,777
|
63
|
+
spiral/proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
+
spiral/proto/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
65
|
+
spiral/proto/substrait/__init__.py,sha256=pV4-T-lwAHKkfFrNYSUGY4IkbIvuKjSo_imzF7BLj_s,126526
|
66
|
+
spiral/proto/substrait/extensions/__init__.py,sha256=yD7dg0TBqn-GK_L0qeVof1GKnwSLg_kPyQSV3kcSljs,3655
|
67
|
+
spiral/proto/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
|
+
spiral/proto/spiral/table/__init__.py,sha256=_F1f52RMkZsXofPXpJb2KE8KR5l6zxCtrGrabR1uDxo,2816
|
69
|
+
spiral/proto/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
|
+
spiral/proto/_/arrow/flight/protocol/sql/__init__.py,sha256=_xhj9QkWEW1qZ-iVxcQ8k4EjYr7KJ5ofitJGqVUGQi4,79921
|
71
|
+
spiral/proto/_/arrow/flight/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
|
+
spiral/proto/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
+
spiral/proto/_/scandal/__init__.py,sha256=rQJdbN3UKDJ8vOJ5V7l3KumNHlRyY8iw25HCLsIDB4I,6582
|
74
|
+
spiral/proto/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
|
+
spiral/proto/_/spiraldb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
|
+
spiral/proto/_/spiraldb/metastore/__init__.py,sha256=40Egtg8MRYTaTTYRKOHkwuiyXEkw3Yg7ETCQskIzpIg,16873
|
77
|
+
spiral/proto/_/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
+
spiral/proto/_/spiral/table/__init__.py,sha256=sjK2dmvB09PqV3lxKMEk5QoHjC37HMW0MnxR1QDuBg0,7387
|
79
|
+
spiral/proto/_/spfs/__init__.py,sha256=9WtIXr7HGslKWRHHieFDo8N_qnGL4QQyLOCWEkOKRvk,1017
|
80
|
+
spiral/_lib.abi3.so,sha256=3gQK4_PPR_H0f3Ve5fg3NsCnIjx2UBuGvUBnYUYn8AA,58598496
|
81
|
+
pyspiral-0.1.0.dist-info/RECORD,,
|
spiral/__init__.py
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Python client for the Spiral warehouse."""
|
2
|
+
|
3
|
+
from spiral import _lib
|
4
|
+
from spiral.catalog import Spiral
|
5
|
+
from spiral.scan_ import Scan, scan
|
6
|
+
from spiral.table import Table
|
7
|
+
|
8
|
+
# Eagerly import the Spiral library
|
9
|
+
assert _lib, "Spiral library"
|
10
|
+
|
11
|
+
__all__ = ["scan", "Scan", "Table", "Spiral"]
|
spiral/_lib.abi3.so
ADDED
Binary file
|
spiral/adbc.py
ADDED
@@ -0,0 +1,386 @@
|
|
1
|
+
import abc
|
2
|
+
import functools
|
3
|
+
import logging
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
import duckdb
|
8
|
+
import pyarrow as pa
|
9
|
+
import pyarrow.compute as pc
|
10
|
+
import sqlglot
|
11
|
+
import sqlglot.expressions as exp
|
12
|
+
from betterproto.lib.google.protobuf import Any
|
13
|
+
from pyarrow.flight import (
|
14
|
+
Action,
|
15
|
+
FlightDescriptor,
|
16
|
+
FlightEndpoint,
|
17
|
+
FlightError,
|
18
|
+
FlightInfo,
|
19
|
+
FlightMetadataWriter,
|
20
|
+
FlightServerBase,
|
21
|
+
MetadataRecordBatchReader,
|
22
|
+
RecordBatchStream,
|
23
|
+
ServerCallContext,
|
24
|
+
Ticket,
|
25
|
+
)
|
26
|
+
|
27
|
+
from spiral import Spiral
|
28
|
+
from spiral.proto._.arrow.flight.protocol import sql as rpc
|
29
|
+
from spiral.proto._.arrow.flight.protocol.sql import (
|
30
|
+
CommandGetCatalogs,
|
31
|
+
CommandGetDbSchemas,
|
32
|
+
CommandGetSqlInfo,
|
33
|
+
CommandGetTables,
|
34
|
+
CommandStatementQuery,
|
35
|
+
SqlInfo,
|
36
|
+
SqlSupportedTransaction,
|
37
|
+
)
|
38
|
+
|
39
|
+
log = logging.getLogger(__name__)
|
40
|
+
|
41
|
+
|
42
|
+
def debuggable(func):
|
43
|
+
"""A decorator to enable GUI (i.e. PyCharm) debugging in the
|
44
|
+
decorated Arrow Flight RPC Server function.
|
45
|
+
|
46
|
+
See: https://github.com/apache/arrow/issues/36844
|
47
|
+
for more details...
|
48
|
+
"""
|
49
|
+
|
50
|
+
@functools.wraps(func)
|
51
|
+
def wrapper_decorator(*args, **kwargs):
|
52
|
+
try:
|
53
|
+
import pydevd
|
54
|
+
|
55
|
+
pydevd.connected = True
|
56
|
+
pydevd.settrace(suspend=False)
|
57
|
+
except ImportError:
|
58
|
+
# Not running in debugger
|
59
|
+
pass
|
60
|
+
value = func(*args, **kwargs)
|
61
|
+
return value
|
62
|
+
|
63
|
+
return wrapper_decorator
|
64
|
+
|
65
|
+
|
66
|
+
class ADBCServerBase:
|
67
|
+
def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
|
68
|
+
"""Default implementation that reports no support for any complex features."""
|
69
|
+
info = {
|
70
|
+
SqlInfo.FLIGHT_SQL_SERVER_NAME: "Spiral ADBC Server",
|
71
|
+
SqlInfo.FLIGHT_SQL_SERVER_VERSION: "0.0.1",
|
72
|
+
SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION: pa.__version__,
|
73
|
+
SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY: True,
|
74
|
+
SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION: SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_NONE.value,
|
75
|
+
}
|
76
|
+
|
77
|
+
# See https://github.com/apache/arrow-adbc/blob/38c21c2311a59803559cb0091b3f34180c28b25f/rust/core/src/schemas.rs#L35
|
78
|
+
union_fields = [
|
79
|
+
pa.field("string_value", pa.string()),
|
80
|
+
pa.field("bool_value", pa.bool_()),
|
81
|
+
pa.field("int64_value", pa.int64()),
|
82
|
+
pa.field("int32_bitmask", pa.int32()),
|
83
|
+
pa.field("string_list", pa.list_(pa.string())),
|
84
|
+
pa.field(
|
85
|
+
"int32_to_int32_list_map",
|
86
|
+
pa.map_(pa.int32(), pa.list_(pa.int32()), keys_sorted=False),
|
87
|
+
),
|
88
|
+
]
|
89
|
+
schema = pa.schema(
|
90
|
+
[
|
91
|
+
pa.field("info_name", pa.uint32(), nullable=False),
|
92
|
+
pa.field("info_value", pa.dense_union(union_fields), nullable=False),
|
93
|
+
]
|
94
|
+
)
|
95
|
+
|
96
|
+
# PyArrow doesn't support creating a dense union for us :(
|
97
|
+
types = []
|
98
|
+
offsets = []
|
99
|
+
ints = []
|
100
|
+
bools = []
|
101
|
+
strs = []
|
102
|
+
for value in info.values():
|
103
|
+
if isinstance(value, str):
|
104
|
+
types.append(0)
|
105
|
+
offsets.append(len(strs))
|
106
|
+
strs.append(value)
|
107
|
+
elif isinstance(value, bool):
|
108
|
+
types.append(1)
|
109
|
+
offsets.append(len(bools))
|
110
|
+
bools.append(value)
|
111
|
+
else:
|
112
|
+
types.append(1)
|
113
|
+
offsets.append(len(ints))
|
114
|
+
ints.append(value)
|
115
|
+
|
116
|
+
values = pa.UnionArray.from_dense(
|
117
|
+
pa.array(types, type=pa.int8()),
|
118
|
+
pa.array(offsets, type=pa.int32()),
|
119
|
+
[pa.array(data, type=f.type) for data, f in zip([strs, bools, ints, [], [], []], union_fields)],
|
120
|
+
[f.name for f in union_fields],
|
121
|
+
)
|
122
|
+
|
123
|
+
return pa.table(data=[pa.array(list(info.keys()), type=pa.uint32()), values], schema=schema).to_reader()
|
124
|
+
|
125
|
+
@abc.abstractmethod
|
126
|
+
def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader: ...
|
127
|
+
|
128
|
+
@abc.abstractmethod
|
129
|
+
def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader: ...
|
130
|
+
|
131
|
+
@abc.abstractmethod
|
132
|
+
def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader: ...
|
133
|
+
|
134
|
+
@abc.abstractmethod
|
135
|
+
def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader: ...
|
136
|
+
|
137
|
+
|
138
|
+
class SpiralADBCServer(ADBCServerBase):
|
139
|
+
def __init__(self, spiral: Spiral):
|
140
|
+
self.sp = spiral
|
141
|
+
|
142
|
+
self.pool = ThreadPoolExecutor()
|
143
|
+
|
144
|
+
def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
|
145
|
+
schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
|
146
|
+
|
147
|
+
@debuggable
|
148
|
+
def batches():
|
149
|
+
yield pa.RecordBatch.from_arrays(
|
150
|
+
[list(self.sp.list_project_ids())],
|
151
|
+
schema=schema,
|
152
|
+
)
|
153
|
+
|
154
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
155
|
+
|
156
|
+
def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader:
|
157
|
+
"""Get the schemas from the database."""
|
158
|
+
|
159
|
+
schema = pa.schema(
|
160
|
+
[
|
161
|
+
pa.field("catalog_name", pa.string()),
|
162
|
+
pa.field("db_schema_name", pa.string(), nullable=False),
|
163
|
+
]
|
164
|
+
)
|
165
|
+
|
166
|
+
@debuggable
|
167
|
+
def batches():
|
168
|
+
if req.catalog == "":
|
169
|
+
# Empty string means databases _without_ a catalog, which we don't support
|
170
|
+
return
|
171
|
+
|
172
|
+
# Otherwise, catalog is either the project ID, or None.
|
173
|
+
if req.catalog is None:
|
174
|
+
projects = list(self.sp.list_projects())
|
175
|
+
else:
|
176
|
+
projects = [self.sp.project(req.catalog)]
|
177
|
+
|
178
|
+
for project in projects:
|
179
|
+
datasets = {dt[0] for dt in project.list_table_names()}
|
180
|
+
batch = pa.RecordBatch.from_arrays(
|
181
|
+
[
|
182
|
+
[project.id] * len(datasets),
|
183
|
+
list(datasets),
|
184
|
+
],
|
185
|
+
schema=schema,
|
186
|
+
)
|
187
|
+
|
188
|
+
if req.db_schema_filter_pattern:
|
189
|
+
mask = pc.match_like(batch["db_schema_name"], req.db_schema_filter_pattern)
|
190
|
+
batch = batch.filter(mask)
|
191
|
+
|
192
|
+
yield batch
|
193
|
+
|
194
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
195
|
+
|
196
|
+
def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader:
|
197
|
+
schema = pa.schema(
|
198
|
+
[
|
199
|
+
pa.field("catalog_name", pa.string()),
|
200
|
+
pa.field("db_schema_name", pa.string()),
|
201
|
+
pa.field("table_name", pa.string(), nullable=False),
|
202
|
+
pa.field("table_type", pa.string(), nullable=False),
|
203
|
+
]
|
204
|
+
+ [pa.field("table_schema", pa.binary(), nullable=False)]
|
205
|
+
if req.include_schema
|
206
|
+
else []
|
207
|
+
)
|
208
|
+
|
209
|
+
@debuggable
|
210
|
+
def batches():
|
211
|
+
if req.catalog == "":
|
212
|
+
# Empty string means databases _without_ a catalog, which we don't support
|
213
|
+
return
|
214
|
+
|
215
|
+
if req.catalog is None:
|
216
|
+
projects = list(self.sp.list_projects())
|
217
|
+
else:
|
218
|
+
projects = [self.sp.project(req.catalog)]
|
219
|
+
|
220
|
+
def _process_project(project):
|
221
|
+
tables = project.list_tables()
|
222
|
+
|
223
|
+
rows = []
|
224
|
+
for table in tables:
|
225
|
+
_project_id, dataset, name = str(table).split(".")
|
226
|
+
|
227
|
+
row = {
|
228
|
+
"catalog_name": project.id,
|
229
|
+
"db_schema_name": dataset,
|
230
|
+
"table_name": name,
|
231
|
+
"table_type": "TABLE",
|
232
|
+
}
|
233
|
+
|
234
|
+
if req.include_schema:
|
235
|
+
row["table_schema"] = table.to_dataset().schema.serialize().to_pybytes()
|
236
|
+
|
237
|
+
rows.append(row)
|
238
|
+
|
239
|
+
return pa.RecordBatch.from_pylist(rows, schema=schema)
|
240
|
+
|
241
|
+
yield from self.pool.map(_process_project, projects)
|
242
|
+
|
243
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
244
|
+
|
245
|
+
@debuggable
|
246
|
+
def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
|
247
|
+
# Extract the tables from the query, and bring them into the Python locals scope.
|
248
|
+
expr = sqlglot.parse_one(req.query, dialect="duckdb")
|
249
|
+
for tbl in expr.find_all(exp.Table):
|
250
|
+
# We swap the three-part identifier out for a single identifier
|
251
|
+
# This lets us insert a PyArrow Dataset into Python locals such that
|
252
|
+
# DuckDB will pick up on it for the query.
|
253
|
+
name = exp.table_name(tbl)
|
254
|
+
locals()[name] = self.sp.project(tbl.catalog).table(f"{tbl.db}.{tbl.name}").to_dataset()
|
255
|
+
tbl.replace(exp.table_(table=name))
|
256
|
+
|
257
|
+
try:
|
258
|
+
sql = duckdb.sql(expr.sql(dialect="duckdb"))
|
259
|
+
except Exception as e:
|
260
|
+
raise FlightError(str(e))
|
261
|
+
|
262
|
+
if limit is not None:
|
263
|
+
sql = sql.limit(limit)
|
264
|
+
|
265
|
+
return sql.fetch_arrow_reader(batch_size=1_000)
|
266
|
+
|
267
|
+
|
268
|
+
class ADBCFlightServer(FlightServerBase):
|
269
|
+
"""An implementation of a FlightSQL ADBC server."""
|
270
|
+
|
271
|
+
def __init__(self, abdc: ADBCServerBase, *, location=None, **kwargs):
|
272
|
+
super().__init__(location=location, **kwargs)
|
273
|
+
self.location = location
|
274
|
+
self.adbc = abdc
|
275
|
+
|
276
|
+
self.host = "localhost"
|
277
|
+
self.tls = False
|
278
|
+
if location:
|
279
|
+
parts = urlparse(location)
|
280
|
+
self.host = parts.hostname
|
281
|
+
self.tls = parts.scheme.endswith("s")
|
282
|
+
|
283
|
+
@debuggable
|
284
|
+
def do_action(self, context: ServerCallContext, action: Action):
|
285
|
+
log.info("DoAction %s: %s", context.peer(), action)
|
286
|
+
super().do_action(context, action)
|
287
|
+
|
288
|
+
@debuggable
|
289
|
+
def do_exchange(self, context: ServerCallContext, descriptor: FlightDescriptor, reader, writer):
|
290
|
+
log.info("DoExchange %s: %s", context.peer(), descriptor)
|
291
|
+
super().do_exchange(context, descriptor, reader, writer)
|
292
|
+
|
293
|
+
@debuggable
|
294
|
+
def do_get(self, context: ServerCallContext, ticket: Ticket):
|
295
|
+
log.info("DoGet %s: %s", context.peer(), ticket)
|
296
|
+
req = self.parse_command(ticket.ticket)
|
297
|
+
match req:
|
298
|
+
case CommandGetSqlInfo():
|
299
|
+
return RecordBatchStream(self.adbc.get_sql_info(req))
|
300
|
+
case CommandGetCatalogs():
|
301
|
+
return RecordBatchStream(self.adbc.get_catalogs(req))
|
302
|
+
case CommandGetDbSchemas():
|
303
|
+
return RecordBatchStream(self.adbc.get_db_schemas(req))
|
304
|
+
case CommandGetTables():
|
305
|
+
return RecordBatchStream(self.adbc.get_tables(req))
|
306
|
+
case CommandStatementQuery():
|
307
|
+
return RecordBatchStream(self.adbc.statement_query(req))
|
308
|
+
case _:
|
309
|
+
raise NotImplementedError(f"Unsupported do_Get: {req}")
|
310
|
+
|
311
|
+
@debuggable
|
312
|
+
def do_put(
|
313
|
+
self,
|
314
|
+
context: ServerCallContext,
|
315
|
+
descriptor: FlightDescriptor,
|
316
|
+
reader: MetadataRecordBatchReader,
|
317
|
+
writer: FlightMetadataWriter,
|
318
|
+
):
|
319
|
+
log.info("DoPut %s: %s", context.peer(), descriptor)
|
320
|
+
super().do_put(context, descriptor, reader, writer)
|
321
|
+
|
322
|
+
@debuggable
|
323
|
+
def get_flight_info(self, context: ServerCallContext, descriptor: FlightDescriptor) -> FlightInfo:
|
324
|
+
log.info("GetFlightInfo %s: %s", context.peer(), descriptor)
|
325
|
+
req = self.parse_command(descriptor.command)
|
326
|
+
match req:
|
327
|
+
case CommandGetSqlInfo():
|
328
|
+
# Each metadata type contributes to the schema.
|
329
|
+
schema = self.adbc.get_sql_info(req).schema
|
330
|
+
case CommandGetCatalogs():
|
331
|
+
schema = self.adbc.get_catalogs(req).schema
|
332
|
+
case CommandGetDbSchemas():
|
333
|
+
schema = self.adbc.get_db_schemas(req).schema
|
334
|
+
case CommandGetTables():
|
335
|
+
schema = self.adbc.get_tables(req).schema
|
336
|
+
case CommandStatementQuery():
|
337
|
+
schema = self.adbc.statement_query(req, limit=0).schema
|
338
|
+
case _:
|
339
|
+
raise NotImplementedError(f"Unsupported command: {req}")
|
340
|
+
|
341
|
+
return self._make_flight_info(self.descriptor_to_key(descriptor), descriptor, schema)
|
342
|
+
|
343
|
+
@staticmethod
|
344
|
+
def parse_command(command: bytes):
|
345
|
+
command = Any().parse(command)
|
346
|
+
|
347
|
+
if not command.type_url.startswith("type.googleapis.com/arrow.flight.protocol.sql."):
|
348
|
+
raise NotImplementedError(f"Unsupported command: {command.type_url}")
|
349
|
+
|
350
|
+
proto_cls_name = command.type_url[len("type.googleapis.com/arrow.flight.protocol.sql.") :]
|
351
|
+
proto_cls = getattr(rpc, proto_cls_name)
|
352
|
+
return proto_cls().parse(command.value)
|
353
|
+
|
354
|
+
@staticmethod
|
355
|
+
def descriptor_to_key(descriptor):
|
356
|
+
return descriptor.command
|
357
|
+
|
358
|
+
@debuggable
|
359
|
+
def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor):
|
360
|
+
log.info("GetSchema %s: %s", context.peer(), descriptor)
|
361
|
+
return super().get_schema(context, descriptor)
|
362
|
+
|
363
|
+
@debuggable
|
364
|
+
def list_actions(self, context: ServerCallContext):
|
365
|
+
log.info("ListActions %s", context.peer())
|
366
|
+
super().list_actions(context)
|
367
|
+
|
368
|
+
@debuggable
|
369
|
+
def list_flights(self, context: ServerCallContext, criteria):
|
370
|
+
log.info("ListFlights %s: %s", context.peer(), criteria)
|
371
|
+
super().list_flights(context, criteria)
|
372
|
+
|
373
|
+
def _make_flight_info(self, key, descriptor, schema: pa.Schema):
|
374
|
+
# If we pass zero locations, the FlightSQL client should attempt to use the original connection.
|
375
|
+
endpoints = [FlightEndpoint(key, [])]
|
376
|
+
return FlightInfo(schema, descriptor, endpoints, -1, -1)
|
377
|
+
|
378
|
+
|
379
|
+
if __name__ == "__main__":
|
380
|
+
import logging
|
381
|
+
|
382
|
+
logging.basicConfig()
|
383
|
+
logging.getLogger("spiral").setLevel(logging.DEBUG)
|
384
|
+
|
385
|
+
server = ADBCFlightServer(SpiralADBCServer(Spiral()), location="grpc://localhost:5005")
|
386
|
+
server.serve()
|