pyspiral 0.4.0__pp310-pypy310_pp73-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.4.0.dist-info/METADATA +46 -0
- pyspiral-0.4.0.dist-info/RECORD +98 -0
- pyspiral-0.4.0.dist-info/WHEEL +4 -0
- pyspiral-0.4.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +10 -0
- spiral/_lib.pypy310-pp73-darwin.so +0 -0
- spiral/adbc.py +393 -0
- spiral/api/__init__.py +64 -0
- spiral/api/admin.py +15 -0
- spiral/api/client.py +160 -0
- spiral/api/filesystems.py +153 -0
- spiral/api/organizations.py +77 -0
- spiral/api/projects.py +197 -0
- spiral/api/telemetry.py +19 -0
- spiral/api/types.py +20 -0
- spiral/api/workloads.py +52 -0
- spiral/arrow_.py +221 -0
- spiral/cli/__init__.py +79 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +16 -0
- spiral/cli/app.py +65 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +112 -0
- spiral/cli/iceberg/__init__.py +7 -0
- spiral/cli/iceberg/namespaces.py +47 -0
- spiral/cli/iceberg/tables.py +60 -0
- spiral/cli/indexes/__init__.py +19 -0
- spiral/cli/login.py +22 -0
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +53 -0
- spiral/cli/projects.py +136 -0
- spiral/cli/state.py +5 -0
- spiral/cli/tables/__init__.py +121 -0
- spiral/cli/telemetry.py +18 -0
- spiral/cli/types.py +51 -0
- spiral/cli/workloads.py +59 -0
- spiral/client.py +79 -0
- spiral/core/__init__.pyi +0 -0
- spiral/core/client/__init__.pyi +117 -0
- spiral/core/index/__init__.pyi +15 -0
- spiral/core/table/__init__.pyi +108 -0
- spiral/core/table/manifests/__init__.pyi +35 -0
- spiral/core/table/metastore/__init__.pyi +62 -0
- spiral/core/table/spec/__init__.pyi +214 -0
- spiral/datetime_.py +27 -0
- spiral/expressions/__init__.py +245 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/mp4.py +62 -0
- spiral/expressions/png.py +18 -0
- spiral/expressions/qoi.py +18 -0
- spiral/expressions/refs.py +58 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +59 -0
- spiral/expressions/text.py +62 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/iceberg/__init__.py +3 -0
- spiral/iceberg/client.py +33 -0
- spiral/indexes/__init__.py +5 -0
- spiral/indexes/client.py +137 -0
- spiral/indexes/index.py +34 -0
- spiral/indexes/scan.py +22 -0
- spiral/project.py +46 -0
- spiral/protogen/_/__init__.py +0 -0
- spiral/protogen/_/arrow/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/protogen/_/scandal/__init__.py +178 -0
- spiral/protogen/_/spiral/__init__.py +0 -0
- spiral/protogen/_/spiral/table/__init__.py +22 -0
- spiral/protogen/_/substrait/__init__.py +3399 -0
- spiral/protogen/_/substrait/extensions/__init__.py +115 -0
- spiral/protogen/__init__.py +0 -0
- spiral/protogen/substrait/__init__.py +3399 -0
- spiral/protogen/substrait/extensions/__init__.py +115 -0
- spiral/protogen/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/server.py +17 -0
- spiral/settings.py +101 -0
- spiral/substrait_.py +279 -0
- spiral/tables/__init__.py +12 -0
- spiral/tables/client.py +130 -0
- spiral/tables/dataset.py +250 -0
- spiral/tables/debug/__init__.py +0 -0
- spiral/tables/debug/manifests.py +70 -0
- spiral/tables/debug/metrics.py +56 -0
- spiral/tables/debug/scan.py +248 -0
- spiral/tables/maintenance.py +12 -0
- spiral/tables/scan.py +193 -0
- spiral/tables/snapshot.py +78 -0
- spiral/tables/table.py +157 -0
- spiral/tables/transaction.py +52 -0
- spiral/types_.py +6 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# sources: substrait/extensions/extensions.proto
|
3
|
+
# plugin: python-betterproto
|
4
|
+
# This file has been @generated
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import List
|
8
|
+
|
9
|
+
import betterproto
|
10
|
+
import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass(eq=False, repr=False)
|
14
|
+
class SimpleExtensionUri(betterproto.Message):
|
15
|
+
extension_uri_anchor: int = betterproto.uint32_field(1)
|
16
|
+
"""
|
17
|
+
A surrogate key used in the context of a single plan used to reference the
|
18
|
+
URI associated with an extension.
|
19
|
+
"""
|
20
|
+
|
21
|
+
uri: str = betterproto.string_field(2)
|
22
|
+
"""
|
23
|
+
The URI where this extension YAML can be retrieved. This is the "namespace"
|
24
|
+
of this extension.
|
25
|
+
"""
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass(eq=False, repr=False)
|
29
|
+
class SimpleExtensionDeclaration(betterproto.Message):
|
30
|
+
"""
|
31
|
+
Describes a mapping between a specific extension entity and the uri where
|
32
|
+
that extension can be found.
|
33
|
+
"""
|
34
|
+
|
35
|
+
extension_type: "SimpleExtensionDeclarationExtensionType" = (
|
36
|
+
betterproto.message_field(1, group="mapping_type")
|
37
|
+
)
|
38
|
+
extension_type_variation: "SimpleExtensionDeclarationExtensionTypeVariation" = (
|
39
|
+
betterproto.message_field(2, group="mapping_type")
|
40
|
+
)
|
41
|
+
extension_function: "SimpleExtensionDeclarationExtensionFunction" = (
|
42
|
+
betterproto.message_field(3, group="mapping_type")
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass(eq=False, repr=False)
|
47
|
+
class SimpleExtensionDeclarationExtensionType(betterproto.Message):
|
48
|
+
"""Describes a Type"""
|
49
|
+
|
50
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
51
|
+
"""
|
52
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
53
|
+
"""
|
54
|
+
|
55
|
+
type_anchor: int = betterproto.uint32_field(2)
|
56
|
+
"""
|
57
|
+
A surrogate key used in the context of a single plan to reference a
|
58
|
+
specific extension type
|
59
|
+
"""
|
60
|
+
|
61
|
+
name: str = betterproto.string_field(3)
|
62
|
+
"""the name of the type in the defined extension YAML."""
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass(eq=False, repr=False)
|
66
|
+
class SimpleExtensionDeclarationExtensionTypeVariation(betterproto.Message):
|
67
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
68
|
+
"""
|
69
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
70
|
+
"""
|
71
|
+
|
72
|
+
type_variation_anchor: int = betterproto.uint32_field(2)
|
73
|
+
"""
|
74
|
+
A surrogate key used in the context of a single plan to reference a
|
75
|
+
specific type variation
|
76
|
+
"""
|
77
|
+
|
78
|
+
name: str = betterproto.string_field(3)
|
79
|
+
"""the name of the type in the defined extension YAML."""
|
80
|
+
|
81
|
+
|
82
|
+
@dataclass(eq=False, repr=False)
|
83
|
+
class SimpleExtensionDeclarationExtensionFunction(betterproto.Message):
|
84
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
85
|
+
"""
|
86
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
87
|
+
"""
|
88
|
+
|
89
|
+
function_anchor: int = betterproto.uint32_field(2)
|
90
|
+
"""
|
91
|
+
A surrogate key used in the context of a single plan to reference a
|
92
|
+
specific function
|
93
|
+
"""
|
94
|
+
|
95
|
+
name: str = betterproto.string_field(3)
|
96
|
+
"""A function signature compound name"""
|
97
|
+
|
98
|
+
|
99
|
+
@dataclass(eq=False, repr=False)
|
100
|
+
class AdvancedExtension(betterproto.Message):
|
101
|
+
"""
|
102
|
+
A generic object that can be used to embed additional extension information
|
103
|
+
into the serialized substrait plan.
|
104
|
+
"""
|
105
|
+
|
106
|
+
optimization: List[
|
107
|
+
"betterproto_lib_google_protobuf.Any"
|
108
|
+
] = betterproto.message_field(1)
|
109
|
+
"""
|
110
|
+
An optimization is helpful information that don't influence semantics. May
|
111
|
+
be ignored by a consumer.
|
112
|
+
"""
|
113
|
+
|
114
|
+
enhancement: "betterproto_lib_google_protobuf.Any" = betterproto.message_field(2)
|
115
|
+
"""An enhancement alter semantics. Cannot be ignored by a consumer."""
|
spiral/protogen/util.py
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
import betterproto
|
2
|
+
from betterproto.grpc.grpclib_server import ServiceBase
|
3
|
+
|
4
|
+
|
5
|
+
def patch_protos(proto_module, our_module_globals):
|
6
|
+
"""Calculate __all__ to re-export protos from a module."""
|
7
|
+
|
8
|
+
betterproto_types = (betterproto.Message, betterproto.Enum, betterproto.ServiceStub, ServiceBase)
|
9
|
+
|
10
|
+
proto_overrides = {}
|
11
|
+
missing = set()
|
12
|
+
for ident in dir(proto_module):
|
13
|
+
var = getattr(proto_module, ident)
|
14
|
+
if isinstance(var, type) and issubclass(var, betterproto_types):
|
15
|
+
if ident in our_module_globals:
|
16
|
+
override = id(our_module_globals.get(ident)) != id(var)
|
17
|
+
else:
|
18
|
+
override = False
|
19
|
+
missing.add(ident)
|
20
|
+
proto_overrides[ident] = override
|
21
|
+
|
22
|
+
if missing:
|
23
|
+
print(f"from {proto_module.__name__} import (")
|
24
|
+
for ident, override in proto_overrides.items():
|
25
|
+
if override:
|
26
|
+
print(f" {ident} as {ident}_,")
|
27
|
+
else:
|
28
|
+
print(f" {ident},")
|
29
|
+
print(")")
|
30
|
+
print("\n")
|
31
|
+
print("__all__ = [")
|
32
|
+
for ident in proto_overrides:
|
33
|
+
print(f' "{ident}",')
|
34
|
+
print("]")
|
35
|
+
|
36
|
+
raise ValueError(f"Missing types that need to be re-exported: {missing}")
|
37
|
+
|
38
|
+
# Patch any local subclasses back into the original module so the gRPC client will use them
|
39
|
+
for ident, override in proto_overrides.items():
|
40
|
+
if override:
|
41
|
+
setattr(proto_module, ident, our_module_globals[ident])
|
spiral/py.typed
ADDED
File without changes
|
spiral/server.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
import socket
|
2
|
+
import time
|
3
|
+
|
4
|
+
|
5
|
+
def wait_for_port(port: int, host: str = "localhost", timeout: float = 5.0):
|
6
|
+
"""Wait until a port starts accepting TCP connections."""
|
7
|
+
start_time = time.time()
|
8
|
+
while True:
|
9
|
+
try:
|
10
|
+
with socket.create_connection((host, port), timeout=timeout):
|
11
|
+
break
|
12
|
+
except OSError as ex:
|
13
|
+
time.sleep(0.01)
|
14
|
+
if time.time() - start_time >= timeout:
|
15
|
+
raise TimeoutError(
|
16
|
+
f"Waited too long for the port {port} on host {host} to start accepting connections."
|
17
|
+
) from ex
|
spiral/settings.py
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
import functools
|
2
|
+
import os
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Annotated
|
5
|
+
|
6
|
+
import typer
|
7
|
+
from pydantic import Field, ValidatorFunctionWrapHandler, WrapValidator
|
8
|
+
from pydantic_settings import (
|
9
|
+
BaseSettings,
|
10
|
+
InitSettingsSource,
|
11
|
+
PydanticBaseSettingsSource,
|
12
|
+
SettingsConfigDict,
|
13
|
+
)
|
14
|
+
|
15
|
+
from spiral.api import SpiralAPI
|
16
|
+
from spiral.core.client import Authn, DeviceCodeAuth, Token
|
17
|
+
|
18
|
+
DEV = "PYTEST_VERSION" in os.environ or bool(os.environ.get("SPIRAL_DEV", None))
|
19
|
+
CI = "GITHUB_ACTIONS" in os.environ
|
20
|
+
|
21
|
+
APP_DIR = Path(typer.get_app_dir("pyspiral"))
|
22
|
+
LOG_DIR = APP_DIR / "logs"
|
23
|
+
|
24
|
+
|
25
|
+
def validate_token(v, handler: ValidatorFunctionWrapHandler):
|
26
|
+
if isinstance(v, str):
|
27
|
+
return Token(v)
|
28
|
+
else:
|
29
|
+
raise ValueError("Token value must be a string")
|
30
|
+
|
31
|
+
|
32
|
+
TokenType = Annotated[Token, WrapValidator(validate_token)]
|
33
|
+
|
34
|
+
|
35
|
+
class SpiralDBSettings(BaseSettings):
|
36
|
+
model_config = SettingsConfigDict(frozen=True)
|
37
|
+
|
38
|
+
host: str = "localhost" if DEV else "api.spiraldb.com"
|
39
|
+
port: int = 4279 if DEV else 443
|
40
|
+
ssl: bool = not DEV
|
41
|
+
token: TokenType | None = None
|
42
|
+
|
43
|
+
@property
|
44
|
+
def uri(self) -> str:
|
45
|
+
return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
|
46
|
+
|
47
|
+
|
48
|
+
class SpfsSettings(BaseSettings):
|
49
|
+
model_config = SettingsConfigDict(frozen=True)
|
50
|
+
|
51
|
+
host: str = "localhost" if DEV else "spfs.spiraldb.dev"
|
52
|
+
port: int = 4295 if DEV else 443
|
53
|
+
ssl: bool = not DEV
|
54
|
+
|
55
|
+
@property
|
56
|
+
def uri(self) -> str:
|
57
|
+
return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
|
58
|
+
|
59
|
+
|
60
|
+
class Settings(BaseSettings):
|
61
|
+
model_config = SettingsConfigDict(
|
62
|
+
env_nested_delimiter="__",
|
63
|
+
env_prefix="SPIRAL__",
|
64
|
+
frozen=True,
|
65
|
+
)
|
66
|
+
|
67
|
+
spiraldb: SpiralDBSettings = Field(default_factory=SpiralDBSettings)
|
68
|
+
spfs: SpfsSettings = Field(default_factory=SpfsSettings)
|
69
|
+
file_format: str = Field(default="vortex")
|
70
|
+
|
71
|
+
@functools.cached_property
|
72
|
+
def api(self) -> "SpiralAPI":
|
73
|
+
from spiral.api import SpiralAPI
|
74
|
+
|
75
|
+
return SpiralAPI(self.authn, base_url=self.spiraldb.uri)
|
76
|
+
|
77
|
+
@functools.cached_property
|
78
|
+
def authn(self):
|
79
|
+
if self.spiraldb.token:
|
80
|
+
return Authn.from_token(self.spiraldb.token)
|
81
|
+
return Authn.from_fallback()
|
82
|
+
|
83
|
+
@functools.cached_property
|
84
|
+
def device_code_auth(self) -> DeviceCodeAuth:
|
85
|
+
return DeviceCodeAuth.default()
|
86
|
+
|
87
|
+
@classmethod
|
88
|
+
def settings_customise_sources(
|
89
|
+
cls,
|
90
|
+
settings_cls: type[BaseSettings],
|
91
|
+
env_settings: PydanticBaseSettingsSource,
|
92
|
+
dotenv_settings: PydanticBaseSettingsSource,
|
93
|
+
init_settings: InitSettingsSource,
|
94
|
+
**kwargs,
|
95
|
+
) -> tuple[PydanticBaseSettingsSource, ...]:
|
96
|
+
return env_settings, dotenv_settings, init_settings
|
97
|
+
|
98
|
+
|
99
|
+
@functools.cache
|
100
|
+
def settings() -> Settings:
|
101
|
+
return Settings()
|
spiral/substrait_.py
ADDED
@@ -0,0 +1,279 @@
|
|
1
|
+
import betterproto
|
2
|
+
import pyarrow as pa
|
3
|
+
|
4
|
+
import spiral.expressions as se
|
5
|
+
from spiral.expressions.base import Expr
|
6
|
+
from spiral.protogen.substrait import (
|
7
|
+
Expression,
|
8
|
+
ExpressionFieldReference,
|
9
|
+
ExpressionLiteral,
|
10
|
+
ExpressionLiteralList,
|
11
|
+
ExpressionLiteralStruct,
|
12
|
+
ExpressionLiteralUserDefined,
|
13
|
+
ExpressionMaskExpression,
|
14
|
+
ExpressionReferenceSegment,
|
15
|
+
ExpressionReferenceSegmentListElement,
|
16
|
+
ExpressionReferenceSegmentStructField,
|
17
|
+
ExpressionScalarFunction,
|
18
|
+
ExtendedExpression,
|
19
|
+
)
|
20
|
+
from spiral.protogen.substrait.extensions import (
|
21
|
+
SimpleExtensionDeclaration,
|
22
|
+
SimpleExtensionDeclarationExtensionFunction,
|
23
|
+
SimpleExtensionDeclarationExtensionType,
|
24
|
+
SimpleExtensionDeclarationExtensionTypeVariation,
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
class SubstraitConverter:
|
29
|
+
def __init__(self, scope: Expr, schema: pa.Schema, key_schema: pa.Schema):
|
30
|
+
self.scope = scope
|
31
|
+
self.schema = schema
|
32
|
+
self.key_names = set(key_schema.names)
|
33
|
+
|
34
|
+
# Extension URIs, keyed by extension URI anchor
|
35
|
+
self.extension_uris = {}
|
36
|
+
|
37
|
+
# Functions, keyed by function_anchor
|
38
|
+
self.functions = {}
|
39
|
+
|
40
|
+
# Types, keyed by type anchor
|
41
|
+
self.type_factories = {}
|
42
|
+
|
43
|
+
def convert(self, buffer: pa.Buffer) -> Expr:
|
44
|
+
"""Convert a Substrait Extended Expression into a Spiral expression."""
|
45
|
+
|
46
|
+
expr: ExtendedExpression = ExtendedExpression().parse(buffer)
|
47
|
+
assert len(expr.referred_expr) == 1, "Only one expression is supported"
|
48
|
+
|
49
|
+
# Parse the extension URIs from the plan.
|
50
|
+
for ext_uri in expr.extension_uris:
|
51
|
+
self.extension_uris[ext_uri.extension_uri_anchor] = ext_uri.uri
|
52
|
+
|
53
|
+
# Parse the extensions from the plan.
|
54
|
+
for ext in expr.extensions:
|
55
|
+
self._extension_declaration(ext)
|
56
|
+
|
57
|
+
# Convert the expression
|
58
|
+
return self._expr(expr.referred_expr[0].expression)
|
59
|
+
|
60
|
+
def _extension_declaration(self, ext: SimpleExtensionDeclaration):
|
61
|
+
match betterproto.which_one_of(ext, "mapping_type"):
|
62
|
+
case "extension_function", ext_func:
|
63
|
+
self._extension_function(ext_func)
|
64
|
+
case "extension_type", ext_type:
|
65
|
+
self._extension_type(ext_type)
|
66
|
+
case "extension_type_variation", ext_type_variation:
|
67
|
+
self._extension_type_variation(ext_type_variation)
|
68
|
+
case _:
|
69
|
+
raise AssertionError("Invalid substrait plan")
|
70
|
+
|
71
|
+
def _extension_function(self, ext: SimpleExtensionDeclarationExtensionFunction):
|
72
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
73
|
+
match ext_uri:
|
74
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml":
|
75
|
+
match ext.name:
|
76
|
+
case "or":
|
77
|
+
self.functions[ext.function_anchor] = se.or_
|
78
|
+
case "and":
|
79
|
+
self.functions[ext.function_anchor] = se.and_
|
80
|
+
case "xor":
|
81
|
+
self.functions[ext.function_anchor] = se.xor
|
82
|
+
case "not":
|
83
|
+
self.functions[ext.function_anchor] = se.not_
|
84
|
+
case _:
|
85
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
86
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml":
|
87
|
+
match ext.name:
|
88
|
+
case "equal":
|
89
|
+
self.functions[ext.function_anchor] = se.eq
|
90
|
+
case "not_equal":
|
91
|
+
self.functions[ext.function_anchor] = se.neq
|
92
|
+
case "lt":
|
93
|
+
self.functions[ext.function_anchor] = se.lt
|
94
|
+
case "lte":
|
95
|
+
self.functions[ext.function_anchor] = se.lte
|
96
|
+
case "gt":
|
97
|
+
self.functions[ext.function_anchor] = se.gt
|
98
|
+
case "gte":
|
99
|
+
self.functions[ext.function_anchor] = se.gte
|
100
|
+
case "is_null":
|
101
|
+
self.functions[ext.function_anchor] = se.is_null
|
102
|
+
case "is_not_null":
|
103
|
+
self.functions[ext.function_anchor] = se.is_not_null
|
104
|
+
case _:
|
105
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
106
|
+
case uri:
|
107
|
+
raise NotImplementedError(f"Function extension URI {uri} not supported")
|
108
|
+
|
109
|
+
def _extension_type(self, ext: SimpleExtensionDeclarationExtensionType):
|
110
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
111
|
+
match ext_uri:
|
112
|
+
case "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml":
|
113
|
+
match ext.name:
|
114
|
+
case "null":
|
115
|
+
self.type_factories[ext.type_anchor] = pa.null
|
116
|
+
case "interval_month_day_nano":
|
117
|
+
self.type_factories[ext.type_anchor] = pa.month_day_nano_interval
|
118
|
+
case "u8":
|
119
|
+
self.type_factories[ext.type_anchor] = pa.uint8
|
120
|
+
case "u16":
|
121
|
+
self.type_factories[ext.type_anchor] = pa.uint16
|
122
|
+
case "u32":
|
123
|
+
self.type_factories[ext.type_anchor] = pa.uint32
|
124
|
+
case "u64":
|
125
|
+
self.type_factories[ext.type_anchor] = pa.uint64
|
126
|
+
case "fp16":
|
127
|
+
self.type_factories[ext.type_anchor] = pa.float16
|
128
|
+
case "date_millis":
|
129
|
+
self.type_factories[ext.type_anchor] = pa.date64
|
130
|
+
case "time_seconds":
|
131
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("s")
|
132
|
+
case "time_millis":
|
133
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("ms")
|
134
|
+
case "time_nanos":
|
135
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time64("ns")
|
136
|
+
case "large_string":
|
137
|
+
self.type_factories[ext.type_anchor] = pa.large_string
|
138
|
+
case "large_binary":
|
139
|
+
self.type_factories[ext.type_anchor] = pa.large_binary
|
140
|
+
case "decimal256":
|
141
|
+
self.type_factories[ext.type_anchor] = pa.decimal256
|
142
|
+
case "large_list":
|
143
|
+
self.type_factories[ext.type_anchor] = pa.large_list
|
144
|
+
case "fixed_size_list":
|
145
|
+
self.type_factories[ext.type_anchor] = pa.list_
|
146
|
+
case "duration":
|
147
|
+
self.type_factories[ext.type_anchor] = pa.duration
|
148
|
+
case uri:
|
149
|
+
raise NotImplementedError(f"Type extension URI {uri} not support")
|
150
|
+
|
151
|
+
def _extension_type_variation(self, ext: SimpleExtensionDeclarationExtensionTypeVariation):
|
152
|
+
raise NotImplementedError()
|
153
|
+
|
154
|
+
def _expr(self, expr: Expression) -> Expr:
|
155
|
+
match betterproto.which_one_of(expr, "rex_type"):
|
156
|
+
case "literal", e:
|
157
|
+
return self._expr_literal(e)
|
158
|
+
case "selection", e:
|
159
|
+
return self._expr_selection(e)
|
160
|
+
case "scalar_function", e:
|
161
|
+
return self._expr_scalar_function(e)
|
162
|
+
case "window_function", _:
|
163
|
+
raise ValueError("Window functions are not supported in Spiral push-down")
|
164
|
+
case "if_then", e:
|
165
|
+
return self._expr_if_then(e)
|
166
|
+
case "switch", e:
|
167
|
+
return self._expr_switch(e)
|
168
|
+
case "singular_or_list", _:
|
169
|
+
raise ValueError("singular_or_list is not supported in Spiral push-down")
|
170
|
+
case "multi_or_list", _:
|
171
|
+
raise ValueError("multi_or_list is not supported in Spiral push-down")
|
172
|
+
case "cast", e:
|
173
|
+
return self._expr_cast(e)
|
174
|
+
case "subquery", _:
|
175
|
+
raise ValueError("Subqueries are not supported in Spiral push-down")
|
176
|
+
case "nested", e:
|
177
|
+
return self._expr_nested(e)
|
178
|
+
case _:
|
179
|
+
raise NotImplementedError(f"Expression type {expr.rex_type} not implemented")
|
180
|
+
|
181
|
+
def _expr_literal(self, expr: ExpressionLiteral):
|
182
|
+
# TODO(ngates): the Spiral literal expression is quite weakly typed...
|
183
|
+
# Maybe we can switch to Vortex?
|
184
|
+
simple = {
|
185
|
+
"boolean",
|
186
|
+
"i8",
|
187
|
+
"i16",
|
188
|
+
"i32",
|
189
|
+
"i64",
|
190
|
+
"fp32",
|
191
|
+
"fp64",
|
192
|
+
"string",
|
193
|
+
"binary",
|
194
|
+
"fixed_char",
|
195
|
+
"var_char",
|
196
|
+
"fixed_binary",
|
197
|
+
}
|
198
|
+
|
199
|
+
match betterproto.which_one_of(expr, "literal_type"):
|
200
|
+
case type_, v if type_ in simple:
|
201
|
+
return se.scalar(pa.scalar(v))
|
202
|
+
case "timestamp", v:
|
203
|
+
return se.scalar(pa.scalar(v, type=pa.timestamp("us")))
|
204
|
+
case "date", v:
|
205
|
+
return se.scalar(pa.scalar(v, type=pa.date32()))
|
206
|
+
case "time", v:
|
207
|
+
# Substrait time is us since midnight. PyArrow only supports ms.
|
208
|
+
v: int
|
209
|
+
v = int(v / 1000)
|
210
|
+
return se.scalar(pa.scalar(v, type=pa.time32("ms")))
|
211
|
+
case "null", _null_type:
|
212
|
+
# We need a typed null value
|
213
|
+
raise NotImplementedError()
|
214
|
+
case "struct", v:
|
215
|
+
v: ExpressionLiteralStruct
|
216
|
+
# Hmm, v has fields, but no field names. I guess we return a list and the type is applied later?
|
217
|
+
raise NotImplementedError()
|
218
|
+
case "list", v:
|
219
|
+
v: ExpressionLiteralList
|
220
|
+
return pa.scalar([self._expr_literal(e) for e in v.values])
|
221
|
+
case "user_defined", v:
|
222
|
+
v: ExpressionLiteralUserDefined
|
223
|
+
raise NotImplementedError()
|
224
|
+
case literal_type, _:
|
225
|
+
raise NotImplementedError(f"Literal type not supported: {literal_type}")
|
226
|
+
|
227
|
+
def _expr_selection(self, expr: ExpressionFieldReference):
|
228
|
+
match betterproto.which_one_of(expr, "root_type"):
|
229
|
+
case "root_reference", _:
|
230
|
+
# The reference is relative to the root
|
231
|
+
base_expr = self.scope
|
232
|
+
base_type = pa.struct(self.schema)
|
233
|
+
case _:
|
234
|
+
raise NotImplementedError("Only root_reference expressions are supported")
|
235
|
+
|
236
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
237
|
+
case "direct_reference", direct_ref:
|
238
|
+
return self._expr_direct_reference(base_expr, base_type, direct_ref)
|
239
|
+
case "masked_reference", masked_ref:
|
240
|
+
return self._expr_masked_reference(base_expr, base_type, masked_ref)
|
241
|
+
case _:
|
242
|
+
raise NotImplementedError()
|
243
|
+
|
244
|
+
def _expr_direct_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionReferenceSegment):
|
245
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
246
|
+
case "map_key", ref:
|
247
|
+
raise NotImplementedError("Map types not yet supported in Spiral")
|
248
|
+
case "struct_field", ref:
|
249
|
+
ref: ExpressionReferenceSegmentStructField
|
250
|
+
field_name = scope_type.field(ref.field).name
|
251
|
+
|
252
|
+
if field_name in self.key_names:
|
253
|
+
# This is a key column, so we need to select it from the scope.
|
254
|
+
return se.key(field_name)
|
255
|
+
|
256
|
+
scope = se.getitem(scope, field_name)
|
257
|
+
scope_type = scope_type.field(ref.field).type
|
258
|
+
if ref.is_set("child"):
|
259
|
+
return self._expr_direct_reference(scope, scope_type, ref.child)
|
260
|
+
return scope
|
261
|
+
case "list_element", ref:
|
262
|
+
ref: ExpressionReferenceSegmentListElement
|
263
|
+
scope = se.getitem(scope, ref.offset)
|
264
|
+
scope_type = scope_type.field(ref.field).type
|
265
|
+
if ref.is_set("child"):
|
266
|
+
return self._expr_direct_reference(scope, scope_type, ref.child)
|
267
|
+
return scope
|
268
|
+
case "", ref:
|
269
|
+
# Because Proto... we hit this case when we recurse into a child node and it's actually "None".
|
270
|
+
return scope
|
271
|
+
case _:
|
272
|
+
raise NotImplementedError()
|
273
|
+
|
274
|
+
def _expr_masked_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionMaskExpression):
|
275
|
+
raise NotImplementedError("Masked references are not yet supported in Spiral push-down")
|
276
|
+
|
277
|
+
def _expr_scalar_function(self, expr: ExpressionScalarFunction):
|
278
|
+
args = [self._expr(arg.value) for arg in expr.arguments]
|
279
|
+
return self.functions[expr.function_reference](*args)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from spiral import _lib
|
2
|
+
from spiral.tables.client import Tables
|
3
|
+
from spiral.tables.maintenance import Maintenance
|
4
|
+
from spiral.tables.scan import Scan
|
5
|
+
from spiral.tables.snapshot import Snapshot
|
6
|
+
from spiral.tables.table import Table
|
7
|
+
from spiral.tables.transaction import Transaction
|
8
|
+
|
9
|
+
# Eagerly import the Spiral library
|
10
|
+
assert _lib, "Spiral library"
|
11
|
+
|
12
|
+
__all__ = ["Tables", "Table", "Snapshot", "Scan", "Transaction", "Maintenance"]
|