pyspiral 0.1.0__cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. pyspiral-0.1.0.dist-info/METADATA +48 -0
  2. pyspiral-0.1.0.dist-info/RECORD +81 -0
  3. pyspiral-0.1.0.dist-info/WHEEL +4 -0
  4. pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
  5. spiral/__init__.py +11 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +386 -0
  8. spiral/api/__init__.py +221 -0
  9. spiral/api/admin.py +29 -0
  10. spiral/api/filesystems.py +125 -0
  11. spiral/api/organizations.py +90 -0
  12. spiral/api/projects.py +160 -0
  13. spiral/api/tables.py +94 -0
  14. spiral/api/tokens.py +56 -0
  15. spiral/api/workloads.py +45 -0
  16. spiral/arrow.py +209 -0
  17. spiral/authn/__init__.py +0 -0
  18. spiral/authn/authn.py +89 -0
  19. spiral/authn/device.py +206 -0
  20. spiral/authn/github_.py +33 -0
  21. spiral/authn/modal_.py +18 -0
  22. spiral/catalog.py +78 -0
  23. spiral/cli/__init__.py +82 -0
  24. spiral/cli/__main__.py +4 -0
  25. spiral/cli/admin.py +21 -0
  26. spiral/cli/app.py +48 -0
  27. spiral/cli/console.py +95 -0
  28. spiral/cli/fs.py +47 -0
  29. spiral/cli/login.py +13 -0
  30. spiral/cli/org.py +90 -0
  31. spiral/cli/printer.py +45 -0
  32. spiral/cli/project.py +107 -0
  33. spiral/cli/state.py +3 -0
  34. spiral/cli/table.py +20 -0
  35. spiral/cli/token.py +27 -0
  36. spiral/cli/types.py +53 -0
  37. spiral/cli/workload.py +59 -0
  38. spiral/config.py +26 -0
  39. spiral/core/__init__.py +0 -0
  40. spiral/core/core/__init__.pyi +53 -0
  41. spiral/core/manifests/__init__.pyi +53 -0
  42. spiral/core/metastore/__init__.pyi +91 -0
  43. spiral/core/spec/__init__.pyi +257 -0
  44. spiral/dataset.py +239 -0
  45. spiral/debug.py +251 -0
  46. spiral/expressions/__init__.py +222 -0
  47. spiral/expressions/base.py +149 -0
  48. spiral/expressions/http.py +86 -0
  49. spiral/expressions/io.py +100 -0
  50. spiral/expressions/list_.py +68 -0
  51. spiral/expressions/refs.py +44 -0
  52. spiral/expressions/str_.py +39 -0
  53. spiral/expressions/struct.py +57 -0
  54. spiral/expressions/tiff.py +223 -0
  55. spiral/expressions/udf.py +46 -0
  56. spiral/grpc_.py +32 -0
  57. spiral/project.py +137 -0
  58. spiral/proto/_/__init__.py +0 -0
  59. spiral/proto/_/arrow/__init__.py +0 -0
  60. spiral/proto/_/arrow/flight/__init__.py +0 -0
  61. spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
  62. spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
  63. spiral/proto/_/scandal/__init__.py +223 -0
  64. spiral/proto/_/spfs/__init__.py +36 -0
  65. spiral/proto/_/spiral/__init__.py +0 -0
  66. spiral/proto/_/spiral/table/__init__.py +225 -0
  67. spiral/proto/_/spiraldb/__init__.py +0 -0
  68. spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
  69. spiral/proto/__init__.py +0 -0
  70. spiral/proto/scandal/__init__.py +45 -0
  71. spiral/proto/spiral/__init__.py +0 -0
  72. spiral/proto/spiral/table/__init__.py +96 -0
  73. spiral/proto/substrait/__init__.py +3399 -0
  74. spiral/proto/substrait/extensions/__init__.py +115 -0
  75. spiral/proto/util.py +41 -0
  76. spiral/py.typed +0 -0
  77. spiral/scan_.py +168 -0
  78. spiral/settings.py +157 -0
  79. spiral/substrait_.py +275 -0
  80. spiral/table.py +157 -0
  81. spiral/types_.py +6 -0
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyspiral
3
+ Version: 0.1.0
4
+ Classifier: Intended Audience :: Science/Research
5
+ Classifier: Operating System :: OS Independent
6
+ Classifier: Programming Language :: Python
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3 :: Only
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Rust
14
+ Classifier: License :: Other/Proprietary License
15
+ Requires-Dist: betterproto==2.0.0b7
16
+ Requires-Dist: google-re2>=1.1.20240702
17
+ Requires-Dist: grpclib>=0.4.7
18
+ Requires-Dist: hishel>=0.0.30
19
+ Requires-Dist: httpx>=0.27.0
20
+ Requires-Dist: numpy>=1.26.3
21
+ Requires-Dist: opentelemetry-api>=1.27.0
22
+ Requires-Dist: opentelemetry-sdk>=1.27.0
23
+ Requires-Dist: polars>=1.6.0
24
+ Requires-Dist: pyarrow>=17.0.0
25
+ Requires-Dist: pydantic-settings>=2.3.4
26
+ Requires-Dist: pydantic[email]>=2.5.3
27
+ Requires-Dist: pyjwt[crypto]>=2.9.0
28
+ Requires-Dist: pyroaring>=0.4.4
29
+ Requires-Dist: questionary>=2.0.1
30
+ Requires-Dist: tqdm>=4.66.5
31
+ Requires-Dist: typer>=0.12.3
32
+ Requires-Dist: xxhash>=3.4.1
33
+ Requires-Dist: nanoid>=2.0.0
34
+ Requires-Dist: sqlglot[rs]>=25.25.1
35
+ Requires-Dist: duckdb>=1.1.1
36
+ Requires-Dist: pyperclip>=1.9.0
37
+ Summary: Python implementation of Spiral table format.
38
+ Keywords: vortex,spiraldb
39
+ Home-Page: https://spiraldb.com
40
+ Author: Spiral<hello@spiraldb.com>
41
+ Author-email: SpiralDB <hello@spiraldb.com>
42
+ License: Proprietary
43
+ Requires-Python: >=3.10
44
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
45
+ Project-URL: Source Code, https://github.com/spiraldb/spiraldb
46
+
47
+ # PySpiral
48
+
@@ -0,0 +1,81 @@
1
+ pyspiral-0.1.0.dist-info/METADATA,sha256=rPBsHTqcVDRoUPx9y63fXXLZ-7K1D_p6jQBjeRTM-t8,1699
2
+ pyspiral-0.1.0.dist-info/WHEEL,sha256=1L8UL2wrWiNkziZv-3hQSZMxy7RYSqejqwbCb6Xe2FM,128
3
+ pyspiral-0.1.0.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
+ spiral/catalog.py,sha256=BtthmRApU1RSb6KbUfVTM2aYeLsnlO0nKDYHBYhdr9M,2496
5
+ spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
6
+ spiral/substrait_.py,sha256=5ZXnYcsXEdrBogECnoL6IMlsjsseYHEnVARgRpy2vt8,12671
7
+ spiral/table.py,sha256=iJ-Lhu9ieSNg728cvUNRERcxKz7pj8hMCXPGBFHg7tI,4845
8
+ spiral/core/spec/__init__.pyi,sha256=zwOPhpBS_iOrPkOdc4ySpgzICZNbteMZf4c2wdkWw1Y,7251
9
+ spiral/core/core/__init__.pyi,sha256=-OCtTgqjUN7sACAmgrAA_TrqmuP7epNSyL9XjqnNTa4,1914
10
+ spiral/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ spiral/core/metastore/__init__.pyi,sha256=pdKED91GVJ9XWxTWc9gwkHvVHV_RKxvL43Ofs5ndmew,3145
12
+ spiral/core/manifests/__init__.pyi,sha256=DXr4Ab_Xo11AzrJlSj7FTSJc8qoO-SkL-_ik4kB855U,1516
13
+ spiral/__init__.py,sha256=4EiQkY17qHT9dpxu41fdOV1kqGl_b-HXNRQg--ZwJbo,286
14
+ spiral/api/__init__.py,sha256=Ub3IYeQUJ_z0-Y_SXmNasxb6uefKynQkuUZgRM1enyc,6660
15
+ spiral/api/admin.py,sha256=HJBrRJScbcdDuFhF_06E0EyE-_Y0osfYPxVoRAyEoTc,837
16
+ spiral/api/workloads.py,sha256=4MWs2pp9AWvx6cZhgyW-ehyCRxpHc_NAQgHaoYOEBfg,1266
17
+ spiral/api/organizations.py,sha256=-PO93HTX02IxhXM6SJpAhnAXpVW1WjthFO4-AOzZAC4,2670
18
+ spiral/api/filesystems.py,sha256=NPf5PCyCQ7eEU723fOdKcuGPgn83pthkb_jJ9aB0fwM,3431
19
+ spiral/api/tables.py,sha256=uPoWkkcW7lJUxU0fUgDK5Gy_DT8y7gJFXdgxQpqcc5w,2548
20
+ spiral/api/projects.py,sha256=-VGlu5V3TJ3XLCGu85bPHRFiktIADnAxfLWIH8Rmxug,4986
21
+ spiral/api/tokens.py,sha256=WaSRr_l3i81t5u2qi3kWW-fySbyKFm-PQK1ZlmoSByc,1464
22
+ spiral/adbc.py,sha256=H_bzevPy5teyZKzjczh1gQ_zPcfk5sNASiJKQvyab9E,13830
23
+ spiral/config.py,sha256=ovtE5D3r6_g90ZRDJhlJyWhOBlxLjagvomOyA-VZmdc,911
24
+ spiral/settings.py,sha256=e_F6GQea6ljXzCRgFxMBMecoGhFpEhePhMRQkYOoVO4,4555
25
+ spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ spiral/scan_.py,sha256=rbLl85yeOkRuLsbk6QKng0_U4gtGGsW6C-aJiJSxuv8,5946
27
+ spiral/dataset.py,sha256=jUeXvE4B5nKh9VNmHxvROQbEkTUxqyYS8oS6EQyRbng,7555
28
+ spiral/expressions/__init__.py,sha256=T2POn0Z-mQj9PY8ukYWYn5A832Fz7Y6F8RRASo6Xl3A,5638
29
+ spiral/expressions/struct.py,sha256=MuxoBP6ESpwmjzusG-_HxHGYKvQQz6AZWzvw7vNUHJM,2007
30
+ spiral/expressions/refs.py,sha256=F3xr7wmrbAawMkLTWcvVwczbOMNOnIttSMRDuzjZHbo,1774
31
+ spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
32
+ spiral/expressions/base.py,sha256=mTBwS6CwdDaV8uotjZUiKi7GHQzX2TRPpnseJJUDrR0,4776
33
+ spiral/expressions/list_.py,sha256=nbo4xQAuqBsQGajq_JgORaJl8_CDvOAv14zMbqmtZh4,1814
34
+ spiral/expressions/tiff.py,sha256=k5GMzm4FmGBJMyja-D6u_kt_-vHYIdj4bnYYZut5lK4,7579
35
+ spiral/expressions/udf.py,sha256=vOlrdxiVpt7vdSgiTKX_XR86YKyu02Fdwb9xlINCby4,1363
36
+ spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
37
+ spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
38
+ spiral/authn/modal_.py,sha256=agcnR3dYTslkH2K_a2Eis_2JWn9Ps11FVrGG_jkOdGk,472
39
+ spiral/authn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ spiral/authn/authn.py,sha256=OCGJAUfoKLiXw9xAcAnX6i6mBRlvsl6qEFcimqQOu7g,2555
41
+ spiral/authn/github_.py,sha256=K-0RUHDreINjnCDHyT9aeVDRk6WtNP7noBYEcwdz2W4,1313
42
+ spiral/authn/device.py,sha256=ohHSVLW3a-qLNYQGN-3kXxV_836xOe0UYBN8i63cqAQ,6796
43
+ spiral/arrow.py,sha256=LBPwZcGkP4kXb42_kl5IUwWW3DO84CV3QJDwCHjG5Dg,7225
44
+ spiral/project.py,sha256=q9jHql7hz5OQzPfDfvE_hN3K9cZotD0cxkdug9EUTwM,4889
45
+ spiral/debug.py,sha256=t590eAUtNWwMTsSdkjVNN7J1iMqY2p4PRJ3BWR_ozho,8999
46
+ spiral/cli/table.py,sha256=eh2NAk0GlfvthwRNeIbcZTsRWU3ypFx_uu9OaOLHPUo,628
47
+ spiral/cli/__init__.py,sha256=CoiAJ7FDgqjG_TrU-6SpP1hyZloIlEP4wcwnrF8flHM,2237
48
+ spiral/cli/state.py,sha256=1quvei8TnDTT6mDRo58P8FUfy4w16Z9sggBz7cFgllY,70
49
+ spiral/cli/types.py,sha256=4cphJs-i0vfq_CcnHxT9FpHiZdGwxME5GnOmIGB6Thw,1436
50
+ spiral/cli/admin.py,sha256=3pIs6PxDugMtdgzfRpn_HfBDv-cwAYtF0cJP2INB01A,579
51
+ spiral/cli/fs.py,sha256=8sQAgMahAq0gtXJqnuiKUkx4sO6vEEF4Iaq_-AF3wro,1524
52
+ spiral/cli/token.py,sha256=dv30aa745bbS-c3tyzQUTSxGG_N0kCt8G4bip9fP_EM,968
53
+ spiral/cli/workload.py,sha256=-XreFPJcX7kZvcYE3oQMeGkkYoXi25R5nuktJPg-PuY,2000
54
+ spiral/cli/__main__.py,sha256=kNaKM2xgJo7GRogf83nYldLM-RGUR6vymdGwZxywQu0,71
55
+ spiral/cli/project.py,sha256=aCxvw8UVwSmh_anArizIQ3_pLVT6QH9jYwMsGfpJUgM,4486
56
+ spiral/cli/app.py,sha256=2oZfDTgj_gZ-lFMMzzJJTnvVzQhp_iedvH-FJnaaMW0,1487
57
+ spiral/cli/org.py,sha256=ezWhoGUkJQQAwI1jKvDP8uZPNlnou_hXtRDa1us5cSE,2935
58
+ spiral/cli/login.py,sha256=C7VpqVyYO2daUeIWHoelWnSGN7cju8YEuqOy12ImH4c,381
59
+ spiral/cli/console.py,sha256=-OP0bB_efxhWh4lZ95KRdu-SRgSUMJ47Rbi9FHv1TlY,2577
60
+ spiral/cli/printer.py,sha256=5HD3UcszFfPk-dK8U5akuvtXqMB7PMgOB1DFYMqspG8,1625
61
+ spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
62
+ spiral/proto/scandal/__init__.py,sha256=wAAEkPN4S4XDpGQtw1MV5zFUeHM01XSAa5tgUgcALvg,777
63
+ spiral/proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ spiral/proto/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
65
+ spiral/proto/substrait/__init__.py,sha256=pV4-T-lwAHKkfFrNYSUGY4IkbIvuKjSo_imzF7BLj_s,126526
66
+ spiral/proto/substrait/extensions/__init__.py,sha256=yD7dg0TBqn-GK_L0qeVof1GKnwSLg_kPyQSV3kcSljs,3655
67
+ spiral/proto/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
+ spiral/proto/spiral/table/__init__.py,sha256=_F1f52RMkZsXofPXpJb2KE8KR5l6zxCtrGrabR1uDxo,2816
69
+ spiral/proto/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
+ spiral/proto/_/arrow/flight/protocol/sql/__init__.py,sha256=_xhj9QkWEW1qZ-iVxcQ8k4EjYr7KJ5ofitJGqVUGQi4,79921
71
+ spiral/proto/_/arrow/flight/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
+ spiral/proto/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
+ spiral/proto/_/scandal/__init__.py,sha256=rQJdbN3UKDJ8vOJ5V7l3KumNHlRyY8iw25HCLsIDB4I,6582
74
+ spiral/proto/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
+ spiral/proto/_/spiraldb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
+ spiral/proto/_/spiraldb/metastore/__init__.py,sha256=40Egtg8MRYTaTTYRKOHkwuiyXEkw3Yg7ETCQskIzpIg,16873
77
+ spiral/proto/_/spiral/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ spiral/proto/_/spiral/table/__init__.py,sha256=sjK2dmvB09PqV3lxKMEk5QoHjC37HMW0MnxR1QDuBg0,7387
79
+ spiral/proto/_/spfs/__init__.py,sha256=9WtIXr7HGslKWRHHieFDo8N_qnGL4QQyLOCWEkOKRvk,1017
80
+ spiral/_lib.abi3.so,sha256=3gQK4_PPR_H0f3Ve5fg3NsCnIjx2UBuGvUBnYUYn8AA,58598496
81
+ pyspiral-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.8.2)
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ spiral=spiral.cli.app:main
spiral/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """Python client for the Spiral warehouse."""
2
+
3
+ from spiral import _lib
4
+ from spiral.catalog import Spiral
5
+ from spiral.scan_ import Scan, scan
6
+ from spiral.table import Table
7
+
8
+ # Eagerly import the Spiral library
9
+ assert _lib, "Spiral library"
10
+
11
+ __all__ = ["scan", "Scan", "Table", "Spiral"]
spiral/_lib.abi3.so ADDED
Binary file
spiral/adbc.py ADDED
@@ -0,0 +1,386 @@
1
+ import abc
2
+ import functools
3
+ import logging
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from urllib.parse import urlparse
6
+
7
+ import duckdb
8
+ import pyarrow as pa
9
+ import pyarrow.compute as pc
10
+ import sqlglot
11
+ import sqlglot.expressions as exp
12
+ from betterproto.lib.google.protobuf import Any
13
+ from pyarrow.flight import (
14
+ Action,
15
+ FlightDescriptor,
16
+ FlightEndpoint,
17
+ FlightError,
18
+ FlightInfo,
19
+ FlightMetadataWriter,
20
+ FlightServerBase,
21
+ MetadataRecordBatchReader,
22
+ RecordBatchStream,
23
+ ServerCallContext,
24
+ Ticket,
25
+ )
26
+
27
+ from spiral import Spiral
28
+ from spiral.proto._.arrow.flight.protocol import sql as rpc
29
+ from spiral.proto._.arrow.flight.protocol.sql import (
30
+ CommandGetCatalogs,
31
+ CommandGetDbSchemas,
32
+ CommandGetSqlInfo,
33
+ CommandGetTables,
34
+ CommandStatementQuery,
35
+ SqlInfo,
36
+ SqlSupportedTransaction,
37
+ )
38
+
39
+ log = logging.getLogger(__name__)
40
+
41
+
42
+ def debuggable(func):
43
+ """A decorator to enable GUI (i.e. PyCharm) debugging in the
44
+ decorated Arrow Flight RPC Server function.
45
+
46
+ See: https://github.com/apache/arrow/issues/36844
47
+ for more details...
48
+ """
49
+
50
+ @functools.wraps(func)
51
+ def wrapper_decorator(*args, **kwargs):
52
+ try:
53
+ import pydevd
54
+
55
+ pydevd.connected = True
56
+ pydevd.settrace(suspend=False)
57
+ except ImportError:
58
+ # Not running in debugger
59
+ pass
60
+ value = func(*args, **kwargs)
61
+ return value
62
+
63
+ return wrapper_decorator
64
+
65
+
66
+ class ADBCServerBase:
67
+ def get_sql_info(self, _req: CommandGetSqlInfo) -> pa.RecordBatchReader:
68
+ """Default implementation that reports no support for any complex features."""
69
+ info = {
70
+ SqlInfo.FLIGHT_SQL_SERVER_NAME: "Spiral ADBC Server",
71
+ SqlInfo.FLIGHT_SQL_SERVER_VERSION: "0.0.1",
72
+ SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION: pa.__version__,
73
+ SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY: True,
74
+ SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION: SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_NONE.value,
75
+ }
76
+
77
+ # See https://github.com/apache/arrow-adbc/blob/38c21c2311a59803559cb0091b3f34180c28b25f/rust/core/src/schemas.rs#L35
78
+ union_fields = [
79
+ pa.field("string_value", pa.string()),
80
+ pa.field("bool_value", pa.bool_()),
81
+ pa.field("int64_value", pa.int64()),
82
+ pa.field("int32_bitmask", pa.int32()),
83
+ pa.field("string_list", pa.list_(pa.string())),
84
+ pa.field(
85
+ "int32_to_int32_list_map",
86
+ pa.map_(pa.int32(), pa.list_(pa.int32()), keys_sorted=False),
87
+ ),
88
+ ]
89
+ schema = pa.schema(
90
+ [
91
+ pa.field("info_name", pa.uint32(), nullable=False),
92
+ pa.field("info_value", pa.dense_union(union_fields), nullable=False),
93
+ ]
94
+ )
95
+
96
+ # PyArrow doesn't support creating a dense union for us :(
97
+ types = []
98
+ offsets = []
99
+ ints = []
100
+ bools = []
101
+ strs = []
102
+ for value in info.values():
103
+ if isinstance(value, str):
104
+ types.append(0)
105
+ offsets.append(len(strs))
106
+ strs.append(value)
107
+ elif isinstance(value, bool):
108
+ types.append(1)
109
+ offsets.append(len(bools))
110
+ bools.append(value)
111
+ else:
112
+ types.append(1)
113
+ offsets.append(len(ints))
114
+ ints.append(value)
115
+
116
+ values = pa.UnionArray.from_dense(
117
+ pa.array(types, type=pa.int8()),
118
+ pa.array(offsets, type=pa.int32()),
119
+ [pa.array(data, type=f.type) for data, f in zip([strs, bools, ints, [], [], []], union_fields)],
120
+ [f.name for f in union_fields],
121
+ )
122
+
123
+ return pa.table(data=[pa.array(list(info.keys()), type=pa.uint32()), values], schema=schema).to_reader()
124
+
125
+ @abc.abstractmethod
126
+ def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader: ...
127
+
128
+ @abc.abstractmethod
129
+ def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader: ...
130
+
131
+ @abc.abstractmethod
132
+ def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader: ...
133
+
134
+ @abc.abstractmethod
135
+ def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader: ...
136
+
137
+
138
+ class SpiralADBCServer(ADBCServerBase):
139
+ def __init__(self, spiral: Spiral):
140
+ self.sp = spiral
141
+
142
+ self.pool = ThreadPoolExecutor()
143
+
144
+ def get_catalogs(self, req: CommandGetCatalogs) -> pa.RecordBatchReader:
145
+ schema = pa.schema([pa.field("catalog_name", pa.string(), nullable=False)])
146
+
147
+ @debuggable
148
+ def batches():
149
+ yield pa.RecordBatch.from_arrays(
150
+ [list(self.sp.list_project_ids())],
151
+ schema=schema,
152
+ )
153
+
154
+ return pa.RecordBatchReader.from_batches(schema, batches())
155
+
156
+ def get_db_schemas(self, req: CommandGetDbSchemas) -> pa.RecordBatchReader:
157
+ """Get the schemas from the database."""
158
+
159
+ schema = pa.schema(
160
+ [
161
+ pa.field("catalog_name", pa.string()),
162
+ pa.field("db_schema_name", pa.string(), nullable=False),
163
+ ]
164
+ )
165
+
166
+ @debuggable
167
+ def batches():
168
+ if req.catalog == "":
169
+ # Empty string means databases _without_ a catalog, which we don't support
170
+ return
171
+
172
+ # Otherwise, catalog is either the project ID, or None.
173
+ if req.catalog is None:
174
+ projects = list(self.sp.list_projects())
175
+ else:
176
+ projects = [self.sp.project(req.catalog)]
177
+
178
+ for project in projects:
179
+ datasets = {dt[0] for dt in project.list_table_names()}
180
+ batch = pa.RecordBatch.from_arrays(
181
+ [
182
+ [project.id] * len(datasets),
183
+ list(datasets),
184
+ ],
185
+ schema=schema,
186
+ )
187
+
188
+ if req.db_schema_filter_pattern:
189
+ mask = pc.match_like(batch["db_schema_name"], req.db_schema_filter_pattern)
190
+ batch = batch.filter(mask)
191
+
192
+ yield batch
193
+
194
+ return pa.RecordBatchReader.from_batches(schema, batches())
195
+
196
+ def get_tables(self, req: CommandGetTables) -> pa.RecordBatchReader:
197
+ schema = pa.schema(
198
+ [
199
+ pa.field("catalog_name", pa.string()),
200
+ pa.field("db_schema_name", pa.string()),
201
+ pa.field("table_name", pa.string(), nullable=False),
202
+ pa.field("table_type", pa.string(), nullable=False),
203
+ ]
204
+ + [pa.field("table_schema", pa.binary(), nullable=False)]
205
+ if req.include_schema
206
+ else []
207
+ )
208
+
209
+ @debuggable
210
+ def batches():
211
+ if req.catalog == "":
212
+ # Empty string means databases _without_ a catalog, which we don't support
213
+ return
214
+
215
+ if req.catalog is None:
216
+ projects = list(self.sp.list_projects())
217
+ else:
218
+ projects = [self.sp.project(req.catalog)]
219
+
220
+ def _process_project(project):
221
+ tables = project.list_tables()
222
+
223
+ rows = []
224
+ for table in tables:
225
+ _project_id, dataset, name = str(table).split(".")
226
+
227
+ row = {
228
+ "catalog_name": project.id,
229
+ "db_schema_name": dataset,
230
+ "table_name": name,
231
+ "table_type": "TABLE",
232
+ }
233
+
234
+ if req.include_schema:
235
+ row["table_schema"] = table.to_dataset().schema.serialize().to_pybytes()
236
+
237
+ rows.append(row)
238
+
239
+ return pa.RecordBatch.from_pylist(rows, schema=schema)
240
+
241
+ yield from self.pool.map(_process_project, projects)
242
+
243
+ return pa.RecordBatchReader.from_batches(schema, batches())
244
+
245
+ @debuggable
246
+ def statement_query(self, req: CommandStatementQuery, limit: int | None = None) -> pa.RecordBatchReader:
247
+ # Extract the tables from the query, and bring them into the Python locals scope.
248
+ expr = sqlglot.parse_one(req.query, dialect="duckdb")
249
+ for tbl in expr.find_all(exp.Table):
250
+ # We swap the three-part identifier out for a single identifier
251
+ # This lets us insert a PyArrow Dataset into Python locals such that
252
+ # DuckDB will pick up on it for the query.
253
+ name = exp.table_name(tbl)
254
+ locals()[name] = self.sp.project(tbl.catalog).table(f"{tbl.db}.{tbl.name}").to_dataset()
255
+ tbl.replace(exp.table_(table=name))
256
+
257
+ try:
258
+ sql = duckdb.sql(expr.sql(dialect="duckdb"))
259
+ except Exception as e:
260
+ raise FlightError(str(e))
261
+
262
+ if limit is not None:
263
+ sql = sql.limit(limit)
264
+
265
+ return sql.fetch_arrow_reader(batch_size=1_000)
266
+
267
+
268
+ class ADBCFlightServer(FlightServerBase):
269
+ """An implementation of a FlightSQL ADBC server."""
270
+
271
+ def __init__(self, abdc: ADBCServerBase, *, location=None, **kwargs):
272
+ super().__init__(location=location, **kwargs)
273
+ self.location = location
274
+ self.adbc = abdc
275
+
276
+ self.host = "localhost"
277
+ self.tls = False
278
+ if location:
279
+ parts = urlparse(location)
280
+ self.host = parts.hostname
281
+ self.tls = parts.scheme.endswith("s")
282
+
283
+ @debuggable
284
+ def do_action(self, context: ServerCallContext, action: Action):
285
+ log.info("DoAction %s: %s", context.peer(), action)
286
+ super().do_action(context, action)
287
+
288
+ @debuggable
289
+ def do_exchange(self, context: ServerCallContext, descriptor: FlightDescriptor, reader, writer):
290
+ log.info("DoExchange %s: %s", context.peer(), descriptor)
291
+ super().do_exchange(context, descriptor, reader, writer)
292
+
293
+ @debuggable
294
+ def do_get(self, context: ServerCallContext, ticket: Ticket):
295
+ log.info("DoGet %s: %s", context.peer(), ticket)
296
+ req = self.parse_command(ticket.ticket)
297
+ match req:
298
+ case CommandGetSqlInfo():
299
+ return RecordBatchStream(self.adbc.get_sql_info(req))
300
+ case CommandGetCatalogs():
301
+ return RecordBatchStream(self.adbc.get_catalogs(req))
302
+ case CommandGetDbSchemas():
303
+ return RecordBatchStream(self.adbc.get_db_schemas(req))
304
+ case CommandGetTables():
305
+ return RecordBatchStream(self.adbc.get_tables(req))
306
+ case CommandStatementQuery():
307
+ return RecordBatchStream(self.adbc.statement_query(req))
308
+ case _:
309
+ raise NotImplementedError(f"Unsupported do_Get: {req}")
310
+
311
+ @debuggable
312
+ def do_put(
313
+ self,
314
+ context: ServerCallContext,
315
+ descriptor: FlightDescriptor,
316
+ reader: MetadataRecordBatchReader,
317
+ writer: FlightMetadataWriter,
318
+ ):
319
+ log.info("DoPut %s: %s", context.peer(), descriptor)
320
+ super().do_put(context, descriptor, reader, writer)
321
+
322
+ @debuggable
323
+ def get_flight_info(self, context: ServerCallContext, descriptor: FlightDescriptor) -> FlightInfo:
324
+ log.info("GetFlightInfo %s: %s", context.peer(), descriptor)
325
+ req = self.parse_command(descriptor.command)
326
+ match req:
327
+ case CommandGetSqlInfo():
328
+ # Each metadata type contributes to the schema.
329
+ schema = self.adbc.get_sql_info(req).schema
330
+ case CommandGetCatalogs():
331
+ schema = self.adbc.get_catalogs(req).schema
332
+ case CommandGetDbSchemas():
333
+ schema = self.adbc.get_db_schemas(req).schema
334
+ case CommandGetTables():
335
+ schema = self.adbc.get_tables(req).schema
336
+ case CommandStatementQuery():
337
+ schema = self.adbc.statement_query(req, limit=0).schema
338
+ case _:
339
+ raise NotImplementedError(f"Unsupported command: {req}")
340
+
341
+ return self._make_flight_info(self.descriptor_to_key(descriptor), descriptor, schema)
342
+
343
+ @staticmethod
344
+ def parse_command(command: bytes):
345
+ command = Any().parse(command)
346
+
347
+ if not command.type_url.startswith("type.googleapis.com/arrow.flight.protocol.sql."):
348
+ raise NotImplementedError(f"Unsupported command: {command.type_url}")
349
+
350
+ proto_cls_name = command.type_url[len("type.googleapis.com/arrow.flight.protocol.sql.") :]
351
+ proto_cls = getattr(rpc, proto_cls_name)
352
+ return proto_cls().parse(command.value)
353
+
354
+ @staticmethod
355
+ def descriptor_to_key(descriptor):
356
+ return descriptor.command
357
+
358
+ @debuggable
359
+ def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor):
360
+ log.info("GetSchema %s: %s", context.peer(), descriptor)
361
+ return super().get_schema(context, descriptor)
362
+
363
+ @debuggable
364
+ def list_actions(self, context: ServerCallContext):
365
+ log.info("ListActions %s", context.peer())
366
+ super().list_actions(context)
367
+
368
+ @debuggable
369
+ def list_flights(self, context: ServerCallContext, criteria):
370
+ log.info("ListFlights %s: %s", context.peer(), criteria)
371
+ super().list_flights(context, criteria)
372
+
373
+ def _make_flight_info(self, key, descriptor, schema: pa.Schema):
374
+ # If we pass zero locations, the FlightSQL client should attempt to use the original connection.
375
+ endpoints = [FlightEndpoint(key, [])]
376
+ return FlightInfo(schema, descriptor, endpoints, -1, -1)
377
+
378
+
379
+ if __name__ == "__main__":
380
+ import logging
381
+
382
+ logging.basicConfig()
383
+ logging.getLogger("spiral").setLevel(logging.DEBUG)
384
+
385
+ server = ADBCFlightServer(SpiralADBCServer(Spiral()), location="grpc://localhost:5005")
386
+ server.serve()