pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pyspiral
|
3
|
+
Version: 0.1.0
|
4
|
+
Classifier: Intended Audience :: Science/Research
|
5
|
+
Classifier: Operating System :: OS Independent
|
6
|
+
Classifier: Programming Language :: Python
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
13
|
+
Classifier: Programming Language :: Rust
|
14
|
+
Classifier: License :: Other/Proprietary License
|
15
|
+
Requires-Dist: betterproto==2.0.0b7
|
16
|
+
Requires-Dist: google-re2>=1.1.20240702
|
17
|
+
Requires-Dist: grpclib>=0.4.7
|
18
|
+
Requires-Dist: hishel>=0.0.30
|
19
|
+
Requires-Dist: httpx>=0.27.0
|
20
|
+
Requires-Dist: numpy>=1.26.3
|
21
|
+
Requires-Dist: opentelemetry-api>=1.27.0
|
22
|
+
Requires-Dist: opentelemetry-sdk>=1.27.0
|
23
|
+
Requires-Dist: polars>=1.6.0
|
24
|
+
Requires-Dist: pyarrow>=17.0.0
|
25
|
+
Requires-Dist: pydantic-settings>=2.3.4
|
26
|
+
Requires-Dist: pydantic[email]>=2.5.3
|
27
|
+
Requires-Dist: pyjwt[crypto]>=2.9.0
|
28
|
+
Requires-Dist: pyroaring>=0.4.4
|
29
|
+
Requires-Dist: questionary>=2.0.1
|
30
|
+
Requires-Dist: tqdm>=4.66.5
|
31
|
+
Requires-Dist: typer>=0.12.3
|
32
|
+
Requires-Dist: xxhash>=3.4.1
|
33
|
+
Requires-Dist: nanoid>=2.0.0
|
34
|
+
Requires-Dist: sqlglot[rs]>=25.25.1
|
35
|
+
Requires-Dist: duckdb>=1.1.1
|
36
|
+
Requires-Dist: pyperclip>=1.9.0
|
37
|
+
Summary: Python implementation of Spiral table format.
|
38
|
+
Keywords: vortex,spiraldb
|
39
|
+
Home-Page: https://spiraldb.com
|
40
|
+
Author: Spiral<hello@spiraldb.com>
|
41
|
+
Author-email: SpiralDB <hello@spiraldb.com>
|
42
|
+
License: Proprietary
|
43
|
+
Requires-Python: >=3.10
|
44
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
45
|
+
Project-URL: Source Code, https://github.com/spiraldb/spiraldb
|
46
|
+
|
47
|
+
# PySpiral
|
48
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
pyspiral-0.1.0.dist-info/METADATA,sha256=rPBsHTqcVDRoUPx9y63fXXLZ-7K1D_p6jQBjeRTM-t8,1699
|
2
|
+
pyspiral-0.1.0.dist-info/WHEEL,sha256=j3ku1HwtRttgdyoybPiqmsz03FP6lDUkPQNFM63xZJo,103
|
3
|
+
pyspiral-0.1.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
4
|
+
spiral/catalog.py,sha256=BtthmRApU1RSb6KbUfVTM2aYeLsnlO0nKDYHBYhdr9M,2496
|
5
|
+
spiral/scan_.py,sha256=rbLl85yeOkRuLsbk6QKng0_U4gtGGsW6C-aJiJSxuv8,5946
|
6
|
+
spiral/substrait_.py,sha256=5ZXnYcsXEdrBogECnoL6IMlsjsseYHEnVARgRpy2vt8,12671
|
7
|
+
spiral/config.py,sha256=ovtE5D3r6_g90ZRDJhlJyWhOBlxLjagvomOyA-VZmdc,911
|
8
|
+
spiral/core/core/__init__.pyi,sha256=-OCtTgqjUN7sACAmgrAA_TrqmuP7epNSyL9XjqnNTa4,1914
|
9
|
+
spiral/core/spec/__init__.pyi,sha256=zwOPhpBS_iOrPkOdc4ySpgzICZNbteMZf4c2wdkWw1Y,7251
|
10
|
+
spiral/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
spiral/core/manifests/__init__.pyi,sha256=DXr4Ab_Xo11AzrJlSj7FTSJc8qoO-SkL-_ik4kB855U,1516
|
12
|
+
spiral/core/metastore/__init__.pyi,sha256=pdKED91GVJ9XWxTWc9gwkHvVHV_RKxvL43Ofs5ndmew,3145
|
13
|
+
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
14
|
+
spiral/proto/scandal/__init__.py,sha256=wAAEkPN4S4XDpGQtw1MV5zFUeHM01XSAa5tgUgcALvg,777
|
15
|
+
spiral/proto/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
16
|
+
spiral/proto/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
spiral/proto/spiral/table/__init__.py,sha256=_F1f52RMkZsXofPXpJb2KE8KR5l6zxCtrGrabR1uDxo,2816
|
18
|
+
spiral/proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
spiral/proto/_/scandal/__init__.py,sha256=rQJdbN3UKDJ8vOJ5V7l3KumNHlRyY8iw25HCLsIDB4I,6582
|
20
|
+
spiral/proto/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
spiral/proto/_/arrow/flight/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
spiral/proto/_/arrow/flight/protocol/sql/__init__.py,sha256=_xhj9QkWEW1qZ-iVxcQ8k4EjYr7KJ5ofitJGqVUGQi4,79921
|
23
|
+
spiral/proto/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
+
spiral/proto/_/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
+
spiral/proto/_/spiral/table/__init__.py,sha256=sjK2dmvB09PqV3lxKMEk5QoHjC37HMW0MnxR1QDuBg0,7387
|
26
|
+
spiral/proto/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
spiral/proto/_/spiraldb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
|
+
spiral/proto/_/spiraldb/metastore/__init__.py,sha256=40Egtg8MRYTaTTYRKOHkwuiyXEkw3Yg7ETCQskIzpIg,16873
|
29
|
+
spiral/proto/_/spfs/__init__.py,sha256=9WtIXr7HGslKWRHHieFDo8N_qnGL4QQyLOCWEkOKRvk,1017
|
30
|
+
spiral/proto/substrait/__init__.py,sha256=pV4-T-lwAHKkfFrNYSUGY4IkbIvuKjSo_imzF7BLj_s,126526
|
31
|
+
spiral/proto/substrait/extensions/__init__.py,sha256=yD7dg0TBqn-GK_L0qeVof1GKnwSLg_kPyQSV3kcSljs,3655
|
32
|
+
spiral/arrow.py,sha256=LBPwZcGkP4kXb42_kl5IUwWW3DO84CV3QJDwCHjG5Dg,7225
|
33
|
+
spiral/__init__.py,sha256=4EiQkY17qHT9dpxu41fdOV1kqGl_b-HXNRQg--ZwJbo,286
|
34
|
+
spiral/cli/console.py,sha256=-OP0bB_efxhWh4lZ95KRdu-SRgSUMJ47Rbi9FHv1TlY,2577
|
35
|
+
spiral/cli/org.py,sha256=ezWhoGUkJQQAwI1jKvDP8uZPNlnou_hXtRDa1us5cSE,2935
|
36
|
+
spiral/cli/token.py,sha256=dv30aa745bbS-c3tyzQUTSxGG_N0kCt8G4bip9fP_EM,968
|
37
|
+
spiral/cli/__init__.py,sha256=CoiAJ7FDgqjG_TrU-6SpP1hyZloIlEP4wcwnrF8flHM,2237
|
38
|
+
spiral/cli/types.py,sha256=4cphJs-i0vfq_CcnHxT9FpHiZdGwxME5GnOmIGB6Thw,1436
|
39
|
+
spiral/cli/workload.py,sha256=-XreFPJcX7kZvcYE3oQMeGkkYoXi25R5nuktJPg-PuY,2000
|
40
|
+
spiral/cli/admin.py,sha256=3pIs6PxDugMtdgzfRpn_HfBDv-cwAYtF0cJP2INB01A,579
|
41
|
+
spiral/cli/fs.py,sha256=8sQAgMahAq0gtXJqnuiKUkx4sO6vEEF4Iaq_-AF3wro,1524
|
42
|
+
spiral/cli/app.py,sha256=2oZfDTgj_gZ-lFMMzzJJTnvVzQhp_iedvH-FJnaaMW0,1487
|
43
|
+
spiral/cli/table.py,sha256=eh2NAk0GlfvthwRNeIbcZTsRWU3ypFx_uu9OaOLHPUo,628
|
44
|
+
spiral/cli/login.py,sha256=C7VpqVyYO2daUeIWHoelWnSGN7cju8YEuqOy12ImH4c,381
|
45
|
+
spiral/cli/printer.py,sha256=5HD3UcszFfPk-dK8U5akuvtXqMB7PMgOB1DFYMqspG8,1625
|
46
|
+
spiral/cli/__main__.py,sha256=kNaKM2xgJo7GRogf83nYldLM-RGUR6vymdGwZxywQu0,71
|
47
|
+
spiral/cli/project.py,sha256=aCxvw8UVwSmh_anArizIQ3_pLVT6QH9jYwMsGfpJUgM,4486
|
48
|
+
spiral/cli/state.py,sha256=1quvei8TnDTT6mDRo58P8FUfy4w16Z9sggBz7cFgllY,70
|
49
|
+
spiral/dataset.py,sha256=jUeXvE4B5nKh9VNmHxvROQbEkTUxqyYS8oS6EQyRbng,7555
|
50
|
+
spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
|
51
|
+
spiral/debug.py,sha256=t590eAUtNWwMTsSdkjVNN7J1iMqY2p4PRJ3BWR_ozho,8999
|
52
|
+
spiral/expressions/tiff.py,sha256=k5GMzm4FmGBJMyja-D6u_kt_-vHYIdj4bnYYZut5lK4,7579
|
53
|
+
spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
|
54
|
+
spiral/expressions/__init__.py,sha256=T2POn0Z-mQj9PY8ukYWYn5A832Fz7Y6F8RRASo6Xl3A,5638
|
55
|
+
spiral/expressions/list_.py,sha256=nbo4xQAuqBsQGajq_JgORaJl8_CDvOAv14zMbqmtZh4,1814
|
56
|
+
spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
|
57
|
+
spiral/expressions/refs.py,sha256=F3xr7wmrbAawMkLTWcvVwczbOMNOnIttSMRDuzjZHbo,1774
|
58
|
+
spiral/expressions/udf.py,sha256=vOlrdxiVpt7vdSgiTKX_XR86YKyu02Fdwb9xlINCby4,1363
|
59
|
+
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
60
|
+
spiral/expressions/base.py,sha256=mTBwS6CwdDaV8uotjZUiKi7GHQzX2TRPpnseJJUDrR0,4776
|
61
|
+
spiral/expressions/struct.py,sha256=MuxoBP6ESpwmjzusG-_HxHGYKvQQz6AZWzvw7vNUHJM,2007
|
62
|
+
spiral/settings.py,sha256=e_F6GQea6ljXzCRgFxMBMecoGhFpEhePhMRQkYOoVO4,4555
|
63
|
+
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
+
spiral/api/filesystems.py,sha256=NPf5PCyCQ7eEU723fOdKcuGPgn83pthkb_jJ9aB0fwM,3431
|
65
|
+
spiral/api/workloads.py,sha256=4MWs2pp9AWvx6cZhgyW-ehyCRxpHc_NAQgHaoYOEBfg,1266
|
66
|
+
spiral/api/__init__.py,sha256=Ub3IYeQUJ_z0-Y_SXmNasxb6uefKynQkuUZgRM1enyc,6660
|
67
|
+
spiral/api/tokens.py,sha256=WaSRr_l3i81t5u2qi3kWW-fySbyKFm-PQK1ZlmoSByc,1464
|
68
|
+
spiral/api/admin.py,sha256=HJBrRJScbcdDuFhF_06E0EyE-_Y0osfYPxVoRAyEoTc,837
|
69
|
+
spiral/api/organizations.py,sha256=-PO93HTX02IxhXM6SJpAhnAXpVW1WjthFO4-AOzZAC4,2670
|
70
|
+
spiral/api/tables.py,sha256=uPoWkkcW7lJUxU0fUgDK5Gy_DT8y7gJFXdgxQpqcc5w,2548
|
71
|
+
spiral/api/projects.py,sha256=-VGlu5V3TJ3XLCGu85bPHRFiktIADnAxfLWIH8Rmxug,4986
|
72
|
+
spiral/table.py,sha256=iJ-Lhu9ieSNg728cvUNRERcxKz7pj8hMCXPGBFHg7tI,4845
|
73
|
+
spiral/authn/modal_.py,sha256=agcnR3dYTslkH2K_a2Eis_2JWn9Ps11FVrGG_jkOdGk,472
|
74
|
+
spiral/authn/device.py,sha256=ohHSVLW3a-qLNYQGN-3kXxV_836xOe0UYBN8i63cqAQ,6796
|
75
|
+
spiral/authn/authn.py,sha256=OCGJAUfoKLiXw9xAcAnX6i6mBRlvsl6qEFcimqQOu7g,2555
|
76
|
+
spiral/authn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
|
+
spiral/authn/github_.py,sha256=K-0RUHDreINjnCDHyT9aeVDRk6WtNP7noBYEcwdz2W4,1313
|
78
|
+
spiral/adbc.py,sha256=H_bzevPy5teyZKzjczh1gQ_zPcfk5sNASiJKQvyab9E,13830
|
79
|
+
spiral/project.py,sha256=q9jHql7hz5OQzPfDfvE_hN3K9cZotD0cxkdug9EUTwM,4889
|
80
|
+
spiral/_lib.abi3.so,sha256=x_LC6gshT81y-rCvIaOo1Tmznr6gFwgSguFISORTF4E,61021472
|
81
|
+
pyspiral-0.1.0.dist-info/RECORD,,
|
spiral/__init__.py
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Python client for the Spiral warehouse."""
|
2
|
+
|
3
|
+
from spiral import _lib
|
4
|
+
from spiral.catalog import Spiral
|
5
|
+
from spiral.scan_ import Scan, scan
|
6
|
+
from spiral.table import Table
|
7
|
+
|
8
|
+
# Eagerly import the Spiral library
|
9
|
+
assert _lib, "Spiral library"
|
10
|
+
|
11
|
+
__all__ = ["scan", "Scan", "Table", "Spiral"]
|
spiral/_lib.abi3.so
ADDED
Binary file
|
spiral/adbc.py
ADDED
@@ -0,0 +1,386 @@
|
|
1
|
+
import abc
|
2
|
+
import functools
|
3
|
+
import logging
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
import duckdb
|
8
|
+
import pyarrow as pa
|
9
|
+
import pyarrow.compute as pc
|
10
|
+
import sqlglot
|
11
|
+
import sqlglot.expressions as exp
|
12
|
+
from betterproto.lib.google.protobuf import Any
|
13
|
+
from pyarrow.flight import (
|
14
|
+
Action,
|
15
|
+
FlightDescriptor,
|
16
|
+
FlightEndpoint,
|
17
|
+
FlightError,
|
18
|
+
FlightInfo,
|
19
|
+
FlightMetadataWriter,
|
20
|
+
FlightServerBase,
|
21
|
+
MetadataRecordBatchReader,
|
22
|
+
RecordBatchStream,
|
23
|
+
ServerCallContext,
|
24
|
+
Ticket,
|
25
|
+
)
|
26
|
+
|
27
|
+
from spiral import Spiral
|
28
|
+
from spiral.proto._.arrow.flight.protocol import sql as rpc
|
29
|
+
from spiral.proto._.arrow.flight.protocol.sql import (
|
30
|
+
CommandGetCatalogs,
|
31
|
+
CommandGetDbSchemas,
|
32
|
+
CommandGetSqlInfo,
|
33
|
+
CommandGetTables,
|
34
|
+
CommandStatementQuery,
|
35
|
+
SqlInfo,
|
36
|
+
SqlSupportedTransaction,
|
37
|
+
)
|
38
|
+
|
39
|
+
log = logging.getLogger(__name__)
|
40
|
+
|
41
|
+
|
42
|
+
def debuggable(func):
|
43
|
+
"""A decorator to enable GUI (i.e. PyCharm) debugging in the
|
44
|
+
decorated Arrow Flight RPC Server function.
|
45
|
+
|
46
|
+
See: https://github.com/apache/arrow/issues/36844
|
47
|
+
for more details...
|
48
|
+
"""
|
49
|
+
|
50
|
+
@functools.wraps(func)
|
51
|
+
def wrapper_decorator(*args, **kwargs):
|
52
|
+
try:
|
53
|
+
import pydevd
|
54
|
+
|
55
|
+
pydevd.connected = True
|
56
|
+
pydevd.settrace(suspend=False)
|
57
|
+
except ImportError:
|
58
|
+
# Not running in debugger
|
59
|
+
pass
|
60
|
+
value = func(*args, **kwargs)
|
61
|
+
return value
|
62
|
+
|
63
|
+
return wrapper_decorator
|
64
|
+
|
65
|
+
|
66
|
+
class ADBCServerBase:
|
67
|
+
def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
|
68
|
+
"""Default implementation that reports no support for any complex features."""
|
69
|
+
info = {
|
70
|
+
SqlInfo.FLIGHT_SQL_SERVER_NAME: "Spiral ADBC Server",
|
71
|
+
SqlInfo.FLIGHT_SQL_SERVER_VERSION: "0.0.1",
|
72
|
+
SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION: pa.__version__,
|
73
|
+
SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY: True,
|
74
|
+
SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION: SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_NONE.value,
|
75
|
+
}
|
76
|
+
|
77
|
+
# See https://github.com/apache/arrow-adbc/blob/38c21c2311a59803559cb0091b3f34180c28b25f/rust/core/src/schemas.rs#L35
|
78
|
+
union_fields = [
|
79
|
+
pa.field("string_value", pa.string()),
|
80
|
+
pa.field("bool_value", pa.bool_()),
|
81
|
+
pa.field("int64_value", pa.int64()),
|
82
|
+
pa.field("int32_bitmask", pa.int32()),
|
83
|
+
pa.field("string_list", pa.list_(pa.string())),
|
84
|
+
pa.field(
|
85
|
+
"int32_to_int32_list_map",
|
86
|
+
pa.map_(pa.int32(), pa.list_(pa.int32()), keys_sorted=False),
|
87
|
+
),
|
88
|
+
]
|
89
|
+
schema = pa.schema(
|
90
|
+
[
|
91
|
+
pa.field("info_name", pa.uint32(), nullable=False),
|
92
|
+
pa.field("info_value", pa.dense_union(union_fields), nullable=False),
|
93
|
+
]
|
94
|
+
)
|
95
|
+
|
96
|
+
# PyArrow doesn't support creating a dense union for us :(
|
97
|
+
types = []
|
98
|
+
offsets = []
|
99
|
+
ints = []
|
100
|
+
bools = []
|
101
|
+
strs = []
|
102
|
+
for value in info.values():
|
103
|
+
if isinstance(value, str):
|
104
|
+
types.append(0)
|
105
|
+
offsets.append(len(strs))
|
106
|
+
strs.append(value)
|
107
|
+
elif isinstance(value, bool):
|
108
|
+
types.append(1)
|
109
|
+
offsets.append(len(bools))
|
110
|
+
bools.append(value)
|
111
|
+
else:
|
112
|
+
types.append(1)
|
113
|
+
offsets.append(len(ints))
|
114
|
+
ints.append(value)
|
115
|
+
|
116
|
+
values = pa.UnionArray.from_dense(
|
117
|
+
pa.array(types, type=pa.int8()),
|
118
|
+
pa.array(offsets, type=pa.int32()),
|
119
|
+
[pa.array(data, type=f.type) for data, f in zip([strs, bools, ints, [], [], []], union_fields)],
|
120
|
+
[f.name for f in union_fields],
|
121
|
+
)
|
122
|
+
|
123
|
+
return pa.table(data=[pa.array(list(info.keys()), type=pa.uint32()), values], schema=schema).to_reader()
|
124
|
+
|
125
|
+
@abc.abstractmethod
|
126
|
+
def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader: ...
|
127
|
+
|
128
|
+
@abc.abstractmethod
|
129
|
+
def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader: ...
|
130
|
+
|
131
|
+
@abc.abstractmethod
|
132
|
+
def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader: ...
|
133
|
+
|
134
|
+
@abc.abstractmethod
|
135
|
+
def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader: ...
|
136
|
+
|
137
|
+
|
138
|
+
class SpiralADBCServer(ADBCServerBase):
|
139
|
+
def __init__(self, spiral: Spiral):
|
140
|
+
self.sp = spiral
|
141
|
+
|
142
|
+
self.pool = ThreadPoolExecutor()
|
143
|
+
|
144
|
+
def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
|
145
|
+
schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
|
146
|
+
|
147
|
+
@debuggable
|
148
|
+
def batches():
|
149
|
+
yield pa.RecordBatch.from_arrays(
|
150
|
+
[list(self.sp.list_project_ids())],
|
151
|
+
schema=schema,
|
152
|
+
)
|
153
|
+
|
154
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
155
|
+
|
156
|
+
def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader:
|
157
|
+
"""Get the schemas from the database."""
|
158
|
+
|
159
|
+
schema = pa.schema(
|
160
|
+
[
|
161
|
+
pa.field("catalog_name", pa.string()),
|
162
|
+
pa.field("db_schema_name", pa.string(), nullable=False),
|
163
|
+
]
|
164
|
+
)
|
165
|
+
|
166
|
+
@debuggable
|
167
|
+
def batches():
|
168
|
+
if req.catalog == "":
|
169
|
+
# Empty string means databases _without_ a catalog, which we don't support
|
170
|
+
return
|
171
|
+
|
172
|
+
# Otherwise, catalog is either the project ID, or None.
|
173
|
+
if req.catalog is None:
|
174
|
+
projects = list(self.sp.list_projects())
|
175
|
+
else:
|
176
|
+
projects = [self.sp.project(req.catalog)]
|
177
|
+
|
178
|
+
for project in projects:
|
179
|
+
datasets = {dt[0] for dt in project.list_table_names()}
|
180
|
+
batch = pa.RecordBatch.from_arrays(
|
181
|
+
[
|
182
|
+
[project.id] * len(datasets),
|
183
|
+
list(datasets),
|
184
|
+
],
|
185
|
+
schema=schema,
|
186
|
+
)
|
187
|
+
|
188
|
+
if req.db_schema_filter_pattern:
|
189
|
+
mask = pc.match_like(batch["db_schema_name"], req.db_schema_filter_pattern)
|
190
|
+
batch = batch.filter(mask)
|
191
|
+
|
192
|
+
yield batch
|
193
|
+
|
194
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
195
|
+
|
196
|
+
def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader:
|
197
|
+
schema = pa.schema(
|
198
|
+
[
|
199
|
+
pa.field("catalog_name", pa.string()),
|
200
|
+
pa.field("db_schema_name", pa.string()),
|
201
|
+
pa.field("table_name", pa.string(), nullable=False),
|
202
|
+
pa.field("table_type", pa.string(), nullable=False),
|
203
|
+
]
|
204
|
+
+ [pa.field("table_schema", pa.binary(), nullable=False)]
|
205
|
+
if req.include_schema
|
206
|
+
else []
|
207
|
+
)
|
208
|
+
|
209
|
+
@debuggable
|
210
|
+
def batches():
|
211
|
+
if req.catalog == "":
|
212
|
+
# Empty string means databases _without_ a catalog, which we don't support
|
213
|
+
return
|
214
|
+
|
215
|
+
if req.catalog is None:
|
216
|
+
projects = list(self.sp.list_projects())
|
217
|
+
else:
|
218
|
+
projects = [self.sp.project(req.catalog)]
|
219
|
+
|
220
|
+
def _process_project(project):
|
221
|
+
tables = project.list_tables()
|
222
|
+
|
223
|
+
rows = []
|
224
|
+
for table in tables:
|
225
|
+
_project_id, dataset, name = str(table).split(".")
|
226
|
+
|
227
|
+
row = {
|
228
|
+
"catalog_name": project.id,
|
229
|
+
"db_schema_name": dataset,
|
230
|
+
"table_name": name,
|
231
|
+
"table_type": "TABLE",
|
232
|
+
}
|
233
|
+
|
234
|
+
if req.include_schema:
|
235
|
+
row["table_schema"] = table.to_dataset().schema.serialize().to_pybytes()
|
236
|
+
|
237
|
+
rows.append(row)
|
238
|
+
|
239
|
+
return pa.RecordBatch.from_pylist(rows, schema=schema)
|
240
|
+
|
241
|
+
yield from self.pool.map(_process_project, projects)
|
242
|
+
|
243
|
+
return pa.RecordBatchReader.from_batches(schema, batches())
|
244
|
+
|
245
|
+
@debuggable
|
246
|
+
def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
|
247
|
+
# Extract the tables from the query, and bring them into the Python locals scope.
|
248
|
+
expr = sqlglot.parse_one(req.query, dialect="duckdb")
|
249
|
+
for tbl in expr.find_all(exp.Table):
|
250
|
+
# We swap the three-part identifier out for a single identifier
|
251
|
+
# This lets us insert a PyArrow Dataset into Python locals such that
|
252
|
+
# DuckDB will pick up on it for the query.
|
253
|
+
name = exp.table_name(tbl)
|
254
|
+
locals()[name] = self.sp.project(tbl.catalog).table(f"{tbl.db}.{tbl.name}").to_dataset()
|
255
|
+
tbl.replace(exp.table_(table=name))
|
256
|
+
|
257
|
+
try:
|
258
|
+
sql = duckdb.sql(expr.sql(dialect="duckdb"))
|
259
|
+
except Exception as e:
|
260
|
+
raise FlightError(str(e))
|
261
|
+
|
262
|
+
if limit is not None:
|
263
|
+
sql = sql.limit(limit)
|
264
|
+
|
265
|
+
return sql.fetch_arrow_reader(batch_size=1_000)
|
266
|
+
|
267
|
+
|
268
|
+
class ADBCFlightServer(FlightServerBase):
|
269
|
+
"""An implementation of a FlightSQL ADBC server."""
|
270
|
+
|
271
|
+
def __init__(self, abdc: ADBCServerBase, *, location=None, **kwargs):
|
272
|
+
super().__init__(location=location, **kwargs)
|
273
|
+
self.location = location
|
274
|
+
self.adbc = abdc
|
275
|
+
|
276
|
+
self.host = "localhost"
|
277
|
+
self.tls = False
|
278
|
+
if location:
|
279
|
+
parts = urlparse(location)
|
280
|
+
self.host = parts.hostname
|
281
|
+
self.tls = parts.scheme.endswith("s")
|
282
|
+
|
283
|
+
@debuggable
|
284
|
+
def do_action(self, context: ServerCallContext, action: Action):
|
285
|
+
log.info("DoAction %s: %s", context.peer(), action)
|
286
|
+
super().do_action(context, action)
|
287
|
+
|
288
|
+
@debuggable
|
289
|
+
def do_exchange(self, context: ServerCallContext, descriptor: FlightDescriptor, reader, writer):
|
290
|
+
log.info("DoExchange %s: %s", context.peer(), descriptor)
|
291
|
+
super().do_exchange(context, descriptor, reader, writer)
|
292
|
+
|
293
|
+
@debuggable
|
294
|
+
def do_get(self, context: ServerCallContext, ticket: Ticket):
|
295
|
+
log.info("DoGet %s: %s", context.peer(), ticket)
|
296
|
+
req = self.parse_command(ticket.ticket)
|
297
|
+
match req:
|
298
|
+
case CommandGetSqlInfo():
|
299
|
+
return RecordBatchStream(self.adbc.get_sql_info(req))
|
300
|
+
case CommandGetCatalogs():
|
301
|
+
return RecordBatchStream(self.adbc.get_catalogs(req))
|
302
|
+
case CommandGetDbSchemas():
|
303
|
+
return RecordBatchStream(self.adbc.get_db_schemas(req))
|
304
|
+
case CommandGetTables():
|
305
|
+
return RecordBatchStream(self.adbc.get_tables(req))
|
306
|
+
case CommandStatementQuery():
|
307
|
+
return RecordBatchStream(self.adbc.statement_query(req))
|
308
|
+
case _:
|
309
|
+
raise NotImplementedError(f"Unsupported do_Get: {req}")
|
310
|
+
|
311
|
+
@debuggable
|
312
|
+
def do_put(
|
313
|
+
self,
|
314
|
+
context: ServerCallContext,
|
315
|
+
descriptor: FlightDescriptor,
|
316
|
+
reader: MetadataRecordBatchReader,
|
317
|
+
writer: FlightMetadataWriter,
|
318
|
+
):
|
319
|
+
log.info("DoPut %s: %s", context.peer(), descriptor)
|
320
|
+
super().do_put(context, descriptor, reader, writer)
|
321
|
+
|
322
|
+
@debuggable
|
323
|
+
def get_flight_info(self, context: ServerCallContext, descriptor: FlightDescriptor) -> FlightInfo:
|
324
|
+
log.info("GetFlightInfo %s: %s", context.peer(), descriptor)
|
325
|
+
req = self.parse_command(descriptor.command)
|
326
|
+
match req:
|
327
|
+
case CommandGetSqlInfo():
|
328
|
+
# Each metadata type contributes to the schema.
|
329
|
+
schema = self.adbc.get_sql_info(req).schema
|
330
|
+
case CommandGetCatalogs():
|
331
|
+
schema = self.adbc.get_catalogs(req).schema
|
332
|
+
case CommandGetDbSchemas():
|
333
|
+
schema = self.adbc.get_db_schemas(req).schema
|
334
|
+
case CommandGetTables():
|
335
|
+
schema = self.adbc.get_tables(req).schema
|
336
|
+
case CommandStatementQuery():
|
337
|
+
schema = self.adbc.statement_query(req, limit=0).schema
|
338
|
+
case _:
|
339
|
+
raise NotImplementedError(f"Unsupported command: {req}")
|
340
|
+
|
341
|
+
return self._make_flight_info(self.descriptor_to_key(descriptor), descriptor, schema)
|
342
|
+
|
343
|
+
@staticmethod
|
344
|
+
def parse_command(command: bytes):
|
345
|
+
command = Any().parse(command)
|
346
|
+
|
347
|
+
if not command.type_url.startswith("type.googleapis.com/arrow.flight.protocol.sql."):
|
348
|
+
raise NotImplementedError(f"Unsupported command: {command.type_url}")
|
349
|
+
|
350
|
+
proto_cls_name = command.type_url[len("type.googleapis.com/arrow.flight.protocol.sql.") :]
|
351
|
+
proto_cls = getattr(rpc, proto_cls_name)
|
352
|
+
return proto_cls().parse(command.value)
|
353
|
+
|
354
|
+
@staticmethod
|
355
|
+
def descriptor_to_key(descriptor):
|
356
|
+
return descriptor.command
|
357
|
+
|
358
|
+
@debuggable
|
359
|
+
def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor):
|
360
|
+
log.info("GetSchema %s: %s", context.peer(), descriptor)
|
361
|
+
return super().get_schema(context, descriptor)
|
362
|
+
|
363
|
+
@debuggable
|
364
|
+
def list_actions(self, context: ServerCallContext):
|
365
|
+
log.info("ListActions %s", context.peer())
|
366
|
+
super().list_actions(context)
|
367
|
+
|
368
|
+
@debuggable
|
369
|
+
def list_flights(self, context: ServerCallContext, criteria):
|
370
|
+
log.info("ListFlights %s: %s", context.peer(), criteria)
|
371
|
+
super().list_flights(context, criteria)
|
372
|
+
|
373
|
+
def _make_flight_info(self, key, descriptor, schema: pa.Schema):
|
374
|
+
# If we pass zero locations, the FlightSQL client should attempt to use the original connection.
|
375
|
+
endpoints = [FlightEndpoint(key, [])]
|
376
|
+
return FlightInfo(schema, descriptor, endpoints, -1, -1)
|
377
|
+
|
378
|
+
|
379
|
+
if __name__ == "__main__":
|
380
|
+
import logging
|
381
|
+
|
382
|
+
logging.basicConfig()
|
383
|
+
logging.getLogger("spiral").setLevel(logging.DEBUG)
|
384
|
+
|
385
|
+
server = ADBCFlightServer(SpiralADBCServer(Spiral()), location="grpc://localhost:5005")
|
386
|
+
server.serve()
|