pyspiral 0.3.1__cp310-abi3-macosx_11_0_arm64.whl → 0.4.1__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/METADATA +9 -13
- pyspiral-0.4.1.dist-info/RECORD +98 -0
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/WHEEL +1 -1
- spiral/__init__.py +6 -9
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +21 -14
- spiral/api/__init__.py +14 -175
- spiral/api/admin.py +12 -26
- spiral/api/client.py +160 -0
- spiral/api/filesystems.py +100 -72
- spiral/api/organizations.py +45 -58
- spiral/api/projects.py +171 -134
- spiral/api/telemetry.py +19 -0
- spiral/api/types.py +20 -0
- spiral/api/workloads.py +32 -25
- spiral/{arrow.py → arrow_.py} +12 -0
- spiral/cli/__init__.py +2 -5
- spiral/cli/admin.py +7 -12
- spiral/cli/app.py +23 -6
- spiral/cli/console.py +1 -1
- spiral/cli/fs.py +82 -17
- spiral/cli/iceberg/__init__.py +7 -0
- spiral/cli/iceberg/namespaces.py +47 -0
- spiral/cli/iceberg/tables.py +60 -0
- spiral/cli/indexes/__init__.py +19 -0
- spiral/cli/login.py +14 -5
- spiral/cli/orgs.py +90 -0
- spiral/cli/printer.py +9 -1
- spiral/cli/projects.py +136 -0
- spiral/cli/state.py +2 -0
- spiral/cli/tables/__init__.py +121 -0
- spiral/cli/telemetry.py +18 -0
- spiral/cli/types.py +8 -10
- spiral/cli/{workload.py → workloads.py} +11 -11
- spiral/{catalog.py → client.py} +23 -37
- spiral/core/client/__init__.pyi +117 -0
- spiral/core/index/__init__.pyi +15 -0
- spiral/core/{core → table}/__init__.pyi +44 -17
- spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
- spiral/core/table/metastore/__init__.pyi +62 -0
- spiral/core/{spec → table/spec}/__init__.pyi +41 -66
- spiral/datetime_.py +27 -0
- spiral/expressions/__init__.py +26 -18
- spiral/expressions/base.py +5 -5
- spiral/expressions/list_.py +1 -1
- spiral/expressions/mp4.py +2 -9
- spiral/expressions/png.py +1 -1
- spiral/expressions/qoi.py +1 -1
- spiral/expressions/refs.py +3 -9
- spiral/expressions/struct.py +7 -5
- spiral/expressions/text.py +62 -0
- spiral/expressions/udf.py +3 -3
- spiral/iceberg/__init__.py +3 -0
- spiral/iceberg/client.py +33 -0
- spiral/indexes/__init__.py +5 -0
- spiral/indexes/client.py +137 -0
- spiral/indexes/index.py +34 -0
- spiral/indexes/scan.py +22 -0
- spiral/project.py +19 -110
- spiral/{proto → protogen}/_/scandal/__init__.py +23 -135
- spiral/protogen/_/spiral/table/__init__.py +22 -0
- spiral/protogen/substrait/__init__.py +3399 -0
- spiral/protogen/substrait/extensions/__init__.py +115 -0
- spiral/server.py +17 -0
- spiral/settings.py +29 -91
- spiral/substrait_.py +9 -5
- spiral/tables/__init__.py +12 -0
- spiral/tables/client.py +130 -0
- spiral/{dataset.py → tables/dataset.py} +9 -199
- spiral/tables/debug/manifests.py +70 -0
- spiral/tables/debug/metrics.py +56 -0
- spiral/{debug.py → tables/debug/scan.py} +6 -9
- spiral/{maintenance.py → tables/maintenance.py} +1 -1
- spiral/{scan_.py → tables/scan.py} +63 -89
- spiral/tables/snapshot.py +78 -0
- spiral/{table.py → tables/table.py} +59 -73
- spiral/{txn.py → tables/transaction.py} +7 -3
- pyspiral-0.3.1.dist-info/RECORD +0 -85
- spiral/api/tables.py +0 -91
- spiral/api/tokens.py +0 -56
- spiral/authn/authn.py +0 -89
- spiral/authn/device.py +0 -206
- spiral/authn/github_.py +0 -33
- spiral/authn/modal_.py +0 -18
- spiral/cli/org.py +0 -90
- spiral/cli/project.py +0 -109
- spiral/cli/table.py +0 -20
- spiral/cli/token.py +0 -27
- spiral/core/metastore/__init__.pyi +0 -91
- spiral/proto/_/spfs/__init__.py +0 -36
- spiral/proto/_/spiral/table/__init__.py +0 -276
- spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +0 -45
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +0 -96
- {pyspiral-0.3.1.dist-info → pyspiral-0.4.1.dist-info}/entry_points.txt +0 -0
- /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
- /spiral/{core → protogen/_}/__init__.py +0 -0
- /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
- /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
- /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
- /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
- /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
- /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
- /spiral/{proto → protogen}/util.py +0 -0
- /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# sources: substrait/extensions/extensions.proto
|
3
|
+
# plugin: python-betterproto
|
4
|
+
# This file has been @generated
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import List
|
8
|
+
|
9
|
+
import betterproto
|
10
|
+
import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass(eq=False, repr=False)
|
14
|
+
class SimpleExtensionUri(betterproto.Message):
|
15
|
+
extension_uri_anchor: int = betterproto.uint32_field(1)
|
16
|
+
"""
|
17
|
+
A surrogate key used in the context of a single plan used to reference the
|
18
|
+
URI associated with an extension.
|
19
|
+
"""
|
20
|
+
|
21
|
+
uri: str = betterproto.string_field(2)
|
22
|
+
"""
|
23
|
+
The URI where this extension YAML can be retrieved. This is the "namespace"
|
24
|
+
of this extension.
|
25
|
+
"""
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass(eq=False, repr=False)
|
29
|
+
class SimpleExtensionDeclaration(betterproto.Message):
|
30
|
+
"""
|
31
|
+
Describes a mapping between a specific extension entity and the uri where
|
32
|
+
that extension can be found.
|
33
|
+
"""
|
34
|
+
|
35
|
+
extension_type: "SimpleExtensionDeclarationExtensionType" = (
|
36
|
+
betterproto.message_field(1, group="mapping_type")
|
37
|
+
)
|
38
|
+
extension_type_variation: "SimpleExtensionDeclarationExtensionTypeVariation" = (
|
39
|
+
betterproto.message_field(2, group="mapping_type")
|
40
|
+
)
|
41
|
+
extension_function: "SimpleExtensionDeclarationExtensionFunction" = (
|
42
|
+
betterproto.message_field(3, group="mapping_type")
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass(eq=False, repr=False)
|
47
|
+
class SimpleExtensionDeclarationExtensionType(betterproto.Message):
|
48
|
+
"""Describes a Type"""
|
49
|
+
|
50
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
51
|
+
"""
|
52
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
53
|
+
"""
|
54
|
+
|
55
|
+
type_anchor: int = betterproto.uint32_field(2)
|
56
|
+
"""
|
57
|
+
A surrogate key used in the context of a single plan to reference a
|
58
|
+
specific extension type
|
59
|
+
"""
|
60
|
+
|
61
|
+
name: str = betterproto.string_field(3)
|
62
|
+
"""the name of the type in the defined extension YAML."""
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass(eq=False, repr=False)
|
66
|
+
class SimpleExtensionDeclarationExtensionTypeVariation(betterproto.Message):
|
67
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
68
|
+
"""
|
69
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
70
|
+
"""
|
71
|
+
|
72
|
+
type_variation_anchor: int = betterproto.uint32_field(2)
|
73
|
+
"""
|
74
|
+
A surrogate key used in the context of a single plan to reference a
|
75
|
+
specific type variation
|
76
|
+
"""
|
77
|
+
|
78
|
+
name: str = betterproto.string_field(3)
|
79
|
+
"""the name of the type in the defined extension YAML."""
|
80
|
+
|
81
|
+
|
82
|
+
@dataclass(eq=False, repr=False)
|
83
|
+
class SimpleExtensionDeclarationExtensionFunction(betterproto.Message):
|
84
|
+
extension_uri_reference: int = betterproto.uint32_field(1)
|
85
|
+
"""
|
86
|
+
references the extension_uri_anchor defined for a specific extension URI.
|
87
|
+
"""
|
88
|
+
|
89
|
+
function_anchor: int = betterproto.uint32_field(2)
|
90
|
+
"""
|
91
|
+
A surrogate key used in the context of a single plan to reference a
|
92
|
+
specific function
|
93
|
+
"""
|
94
|
+
|
95
|
+
name: str = betterproto.string_field(3)
|
96
|
+
"""A function signature compound name"""
|
97
|
+
|
98
|
+
|
99
|
+
@dataclass(eq=False, repr=False)
|
100
|
+
class AdvancedExtension(betterproto.Message):
|
101
|
+
"""
|
102
|
+
A generic object that can be used to embed additional extension information
|
103
|
+
into the serialized substrait plan.
|
104
|
+
"""
|
105
|
+
|
106
|
+
optimization: List[
|
107
|
+
"betterproto_lib_google_protobuf.Any"
|
108
|
+
] = betterproto.message_field(1)
|
109
|
+
"""
|
110
|
+
An optimization is helpful information that don't influence semantics. May
|
111
|
+
be ignored by a consumer.
|
112
|
+
"""
|
113
|
+
|
114
|
+
enhancement: "betterproto_lib_google_protobuf.Any" = betterproto.message_field(2)
|
115
|
+
"""An enhancement alter semantics. Cannot be ignored by a consumer."""
|
spiral/server.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
import socket
|
2
|
+
import time
|
3
|
+
|
4
|
+
|
5
|
+
def wait_for_port(port: int, host: str = "localhost", timeout: float = 5.0):
|
6
|
+
"""Wait until a port starts accepting TCP connections."""
|
7
|
+
start_time = time.time()
|
8
|
+
while True:
|
9
|
+
try:
|
10
|
+
with socket.create_connection((host, port), timeout=timeout):
|
11
|
+
break
|
12
|
+
except OSError as ex:
|
13
|
+
time.sleep(0.01)
|
14
|
+
if time.time() - start_time >= timeout:
|
15
|
+
raise TimeoutError(
|
16
|
+
f"Waited too long for the port {port} on host {host} to start accepting connections."
|
17
|
+
) from ex
|
spiral/settings.py
CHANGED
@@ -1,54 +1,35 @@
|
|
1
1
|
import functools
|
2
|
-
import hashlib
|
3
2
|
import os
|
4
|
-
from collections.abc import Callable, Mapping
|
5
3
|
from pathlib import Path
|
6
|
-
from typing import
|
4
|
+
from typing import Annotated
|
7
5
|
|
8
|
-
import jwt
|
9
6
|
import typer
|
10
|
-
from pydantic import
|
7
|
+
from pydantic import Field, ValidatorFunctionWrapHandler, WrapValidator
|
11
8
|
from pydantic_settings import (
|
12
9
|
BaseSettings,
|
13
10
|
InitSettingsSource,
|
14
11
|
PydanticBaseSettingsSource,
|
15
12
|
SettingsConfigDict,
|
16
|
-
TomlConfigSettingsSource,
|
17
13
|
)
|
18
14
|
|
19
15
|
from spiral.api import SpiralAPI
|
20
|
-
from spiral.
|
21
|
-
DeviceAuthProvider,
|
22
|
-
EnvironmentAuthn,
|
23
|
-
FallbackAuthn,
|
24
|
-
TokenAuthn,
|
25
|
-
TokenExchangeProvider,
|
26
|
-
)
|
27
|
-
from spiral.authn.device import DeviceAuth
|
28
|
-
from spiral.authn.github_ import GitHubActionsProvider
|
29
|
-
from spiral.authn.modal_ import ModalProvider
|
16
|
+
from spiral.core.client import Authn, DeviceCodeAuth, Token
|
30
17
|
|
31
18
|
DEV = "PYTEST_VERSION" in os.environ or bool(os.environ.get("SPIRAL_DEV", None))
|
32
|
-
|
19
|
+
CI = "GITHUB_ACTIONS" in os.environ
|
33
20
|
|
34
21
|
APP_DIR = Path(typer.get_app_dir("pyspiral"))
|
35
22
|
LOG_DIR = APP_DIR / "logs"
|
36
|
-
CONFIG_FILE = APP_DIR / "config.toml"
|
37
|
-
|
38
23
|
|
39
|
-
class AuthSettings(BaseSettings):
|
40
|
-
model_config = SettingsConfigDict(frozen=True)
|
41
24
|
|
42
|
-
|
43
|
-
|
25
|
+
def validate_token(v, handler: ValidatorFunctionWrapHandler):
|
26
|
+
if isinstance(v, str):
|
27
|
+
return Token(v)
|
28
|
+
else:
|
29
|
+
raise ValueError("Token value must be a string")
|
44
30
|
|
45
31
|
|
46
|
-
|
47
|
-
token: str
|
48
|
-
|
49
|
-
@functools.cached_property
|
50
|
-
def payload(self) -> dict:
|
51
|
-
return jwt.decode(self.token, options={"verify_signature": False})
|
32
|
+
TokenType = Annotated[Token, WrapValidator(validate_token)]
|
52
33
|
|
53
34
|
|
54
35
|
class SpiralDBSettings(BaseSettings):
|
@@ -57,38 +38,35 @@ class SpiralDBSettings(BaseSettings):
|
|
57
38
|
host: str = "localhost" if DEV else "api.spiraldb.com"
|
58
39
|
port: int = 4279 if DEV else 443
|
59
40
|
ssl: bool = not DEV
|
60
|
-
|
61
|
-
token: str | None = None
|
41
|
+
token: TokenType | None = None
|
62
42
|
|
63
43
|
@property
|
64
44
|
def uri(self) -> str:
|
65
45
|
return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
|
66
46
|
|
67
|
-
@property
|
68
|
-
def uri_scandal(self) -> str:
|
69
|
-
# TODO(marko): Scandal will be a different service. For now, gRPC API is hosted on the SpiralDB service.
|
70
|
-
return f"{'grpc+tls' if self.ssl else 'grpc'}://{self.host}:{self.port}"
|
71
47
|
|
72
|
-
|
73
|
-
|
74
|
-
return self.uri + "/iceberg"
|
48
|
+
class SpfsSettings(BaseSettings):
|
49
|
+
model_config = SettingsConfigDict(frozen=True)
|
75
50
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
51
|
+
host: str = "localhost" if DEV else "spfs.spiraldb.dev"
|
52
|
+
port: int = 4295 if DEV else 443
|
53
|
+
ssl: bool = not DEV
|
54
|
+
|
55
|
+
@property
|
56
|
+
def uri(self) -> str:
|
57
|
+
return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
|
81
58
|
|
82
59
|
|
83
60
|
class Settings(BaseSettings):
|
84
61
|
model_config = SettingsConfigDict(
|
85
|
-
toml_file=CONFIG_FILE,
|
86
62
|
env_nested_delimiter="__",
|
87
63
|
env_prefix="SPIRAL__",
|
88
64
|
frozen=True,
|
89
65
|
)
|
90
66
|
|
91
67
|
spiraldb: SpiralDBSettings = Field(default_factory=SpiralDBSettings)
|
68
|
+
spfs: SpfsSettings = Field(default_factory=SpfsSettings)
|
69
|
+
file_format: str = Field(default="vortex")
|
92
70
|
|
93
71
|
@functools.cached_property
|
94
72
|
def api(self) -> "SpiralAPI":
|
@@ -99,17 +77,12 @@ class Settings(BaseSettings):
|
|
99
77
|
@functools.cached_property
|
100
78
|
def authn(self):
|
101
79
|
if self.spiraldb.token:
|
102
|
-
return
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
# TODO(marko): Github and Modal should also be behind token exchanged.
|
109
|
-
TokenExchangeProvider(EnvironmentAuthn(), self.spiraldb.uri),
|
110
|
-
DeviceAuthProvider(self.spiraldb.device_auth()),
|
111
|
-
]
|
112
|
-
)
|
80
|
+
return Authn.from_token(self.spiraldb.token)
|
81
|
+
return Authn.from_fallback()
|
82
|
+
|
83
|
+
@functools.cached_property
|
84
|
+
def device_code_auth(self) -> DeviceCodeAuth:
|
85
|
+
return DeviceCodeAuth.default()
|
113
86
|
|
114
87
|
@classmethod
|
115
88
|
def settings_customise_sources(
|
@@ -120,42 +93,7 @@ class Settings(BaseSettings):
|
|
120
93
|
init_settings: InitSettingsSource,
|
121
94
|
**kwargs,
|
122
95
|
) -> tuple[PydanticBaseSettingsSource, ...]:
|
123
|
-
return env_settings, dotenv_settings,
|
124
|
-
|
125
|
-
|
126
|
-
class _LazyDict(Mapping):
|
127
|
-
def __init__(self, lazy_values: dict[str, Any | Callable[[], Any]]):
|
128
|
-
self._lazy_values = lazy_values
|
129
|
-
|
130
|
-
def _dict(self):
|
131
|
-
return {k: v() if callable(v) else v for k, v in self._lazy_values.items()}
|
132
|
-
|
133
|
-
def __getitem__(self, key, /):
|
134
|
-
return self._lazy_values[key]()
|
135
|
-
|
136
|
-
def get(self, key, /, default=None):
|
137
|
-
return self._lazy_values.get(key, lambda: default)()
|
138
|
-
|
139
|
-
def items(self):
|
140
|
-
return self._dict().items()
|
141
|
-
|
142
|
-
def keys(self):
|
143
|
-
return self._lazy_values.keys()
|
144
|
-
|
145
|
-
def values(self):
|
146
|
-
return self._dict().values()
|
147
|
-
|
148
|
-
def __contains__(self, key, /):
|
149
|
-
return key in self._lazy_values
|
150
|
-
|
151
|
-
def __eq__(self, other, /):
|
152
|
-
return False
|
153
|
-
|
154
|
-
def __iter__(self):
|
155
|
-
return iter(self._dict())
|
156
|
-
|
157
|
-
def __len__(self):
|
158
|
-
return len(self._lazy_values)
|
96
|
+
return env_settings, dotenv_settings, init_settings
|
159
97
|
|
160
98
|
|
161
99
|
@functools.cache
|
spiral/substrait_.py
CHANGED
@@ -3,7 +3,7 @@ import pyarrow as pa
|
|
3
3
|
|
4
4
|
import spiral.expressions as se
|
5
5
|
from spiral.expressions.base import Expr
|
6
|
-
from spiral.
|
6
|
+
from spiral.protogen.substrait import (
|
7
7
|
Expression,
|
8
8
|
ExpressionFieldReference,
|
9
9
|
ExpressionLiteral,
|
@@ -17,7 +17,7 @@ from spiral.proto.substrait import (
|
|
17
17
|
ExpressionScalarFunction,
|
18
18
|
ExtendedExpression,
|
19
19
|
)
|
20
|
-
from spiral.
|
20
|
+
from spiral.protogen.substrait.extensions import (
|
21
21
|
SimpleExtensionDeclaration,
|
22
22
|
SimpleExtensionDeclarationExtensionFunction,
|
23
23
|
SimpleExtensionDeclarationExtensionType,
|
@@ -43,7 +43,7 @@ class SubstraitConverter:
|
|
43
43
|
def convert(self, buffer: pa.Buffer) -> Expr:
|
44
44
|
"""Convert a Substrait Extended Expression into a Spiral expression."""
|
45
45
|
|
46
|
-
expr: ExtendedExpression = ExtendedExpression().parse(buffer
|
46
|
+
expr: ExtendedExpression = ExtendedExpression().parse(buffer)
|
47
47
|
assert len(expr.referred_expr) == 1, "Only one expression is supported"
|
48
48
|
|
49
49
|
# Parse the extension URIs from the plan.
|
@@ -255,12 +255,16 @@ class SubstraitConverter:
|
|
255
255
|
|
256
256
|
scope = se.getitem(scope, field_name)
|
257
257
|
scope_type = scope_type.field(ref.field).type
|
258
|
-
|
258
|
+
if ref.is_set("child"):
|
259
|
+
return self._expr_direct_reference(scope, scope_type, ref.child)
|
260
|
+
return scope
|
259
261
|
case "list_element", ref:
|
260
262
|
ref: ExpressionReferenceSegmentListElement
|
261
263
|
scope = se.getitem(scope, ref.offset)
|
262
264
|
scope_type = scope_type.field(ref.field).type
|
263
|
-
|
265
|
+
if ref.is_set("child"):
|
266
|
+
return self._expr_direct_reference(scope, scope_type, ref.child)
|
267
|
+
return scope
|
264
268
|
case "", ref:
|
265
269
|
# Because Proto... we hit this case when we recurse into a child node and it's actually "None".
|
266
270
|
return scope
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from spiral import _lib
|
2
|
+
from spiral.tables.client import Tables
|
3
|
+
from spiral.tables.maintenance import Maintenance
|
4
|
+
from spiral.tables.scan import Scan
|
5
|
+
from spiral.tables.snapshot import Snapshot
|
6
|
+
from spiral.tables.table import Table
|
7
|
+
from spiral.tables.transaction import Transaction
|
8
|
+
|
9
|
+
# Eagerly import the Spiral library
|
10
|
+
assert _lib, "Spiral library"
|
11
|
+
|
12
|
+
__all__ = ["Tables", "Table", "Snapshot", "Scan", "Transaction", "Maintenance"]
|
spiral/tables/client.py
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral.api import SpiralAPI
|
7
|
+
from spiral.api.projects import TableResource
|
8
|
+
from spiral.core.client import Spiral as CoreSpiral
|
9
|
+
from spiral.core.table.spec import Schema
|
10
|
+
from spiral.datetime_ import timestamp_micros
|
11
|
+
from spiral.expressions import ExprLike
|
12
|
+
from spiral.tables.scan import Scan
|
13
|
+
from spiral.tables.table import Table
|
14
|
+
from spiral.types_ import Uri
|
15
|
+
|
16
|
+
|
17
|
+
class Tables:
|
18
|
+
"""
|
19
|
+
Spiral Tables a powerful and flexible way for storing, analyzing,
|
20
|
+
and querying massive and/or multimodal datasets.
|
21
|
+
|
22
|
+
The data model will feel familiar to users of SQL- or DataFrame-style systems,
|
23
|
+
yet is designed to be more flexible, more powerful, and more useful in the context
|
24
|
+
of modern data processing. Tables are stored and queried directly from object storage.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
|
28
|
+
self._api = api
|
29
|
+
self._spiral = spiral
|
30
|
+
self._project_id = project_id
|
31
|
+
|
32
|
+
def table(self, identifier: str) -> Table:
|
33
|
+
"""Open a table with a `dataset.table` identifier, or `table` name using the `default` dataset."""
|
34
|
+
project_id, dataset, table = self._parse_identifier(identifier)
|
35
|
+
if project_id is None:
|
36
|
+
raise ValueError("Must provide a fully qualified table identifier.")
|
37
|
+
|
38
|
+
res = list(self._api.project.list_tables(project_id, dataset=dataset, table=table))
|
39
|
+
if len(res) == 0:
|
40
|
+
raise ValueError(f"Table not found: {project_id}.{dataset}.{table}")
|
41
|
+
|
42
|
+
res = res[0]
|
43
|
+
return Table(self, self._spiral.get_table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}")
|
44
|
+
|
45
|
+
def list_tables(self) -> list[TableResource]:
|
46
|
+
project_id = self._project_id
|
47
|
+
if project_id is None:
|
48
|
+
raise ValueError("Must provide a project ID to list tables.")
|
49
|
+
return list(self._api.project.list_tables(project_id))
|
50
|
+
|
51
|
+
def create_table(
|
52
|
+
self,
|
53
|
+
identifier: str,
|
54
|
+
*,
|
55
|
+
key_schema: pa.Schema | Any,
|
56
|
+
root_uri: Uri | None = None,
|
57
|
+
exist_ok: bool = False,
|
58
|
+
) -> Table:
|
59
|
+
"""Create a new table in the project.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
identifier: The table identifier, in the form `project.dataset.table`, `dataset.table` or `table`.
|
63
|
+
key_schema: The schema of the table's keys.
|
64
|
+
root_uri: The root URI for the table.
|
65
|
+
exist_ok: If True, do not raise an error if the table already exists.
|
66
|
+
"""
|
67
|
+
project_id, dataset, table = self._parse_identifier(identifier)
|
68
|
+
if project_id is None:
|
69
|
+
raise ValueError("Must provide a fully qualified table identifier.")
|
70
|
+
|
71
|
+
if not isinstance(key_schema, pa.Schema):
|
72
|
+
key_schema = pa.schema(key_schema)
|
73
|
+
key_schema = Schema.from_arrow(key_schema)
|
74
|
+
|
75
|
+
core_table = self._spiral.create_table(
|
76
|
+
project_id,
|
77
|
+
dataset=dataset,
|
78
|
+
table=table,
|
79
|
+
key_schema=key_schema,
|
80
|
+
root_uri=root_uri,
|
81
|
+
exist_ok=exist_ok,
|
82
|
+
)
|
83
|
+
|
84
|
+
return Table(self, core_table, identifier=f"{project_id}.{dataset}.{table}")
|
85
|
+
|
86
|
+
def _parse_identifier(self, identifier: str) -> tuple[str | None, str, str]:
|
87
|
+
parts = identifier.split(".")
|
88
|
+
if len(parts) == 1:
|
89
|
+
return self._project_id, "default", parts[0]
|
90
|
+
elif len(parts) == 2:
|
91
|
+
return self._project_id, parts[0], parts[1]
|
92
|
+
elif len(parts) == 3:
|
93
|
+
return parts[0], parts[1], parts[2]
|
94
|
+
else:
|
95
|
+
raise ValueError(f"Invalid table identifier: {identifier}")
|
96
|
+
|
97
|
+
def scan(
|
98
|
+
self,
|
99
|
+
*projections: ExprLike,
|
100
|
+
where: ExprLike | None = None,
|
101
|
+
asof: datetime | int | None = None,
|
102
|
+
exclude_keys: bool = False,
|
103
|
+
) -> Scan:
|
104
|
+
"""Starts a read transaction on the Spiral.
|
105
|
+
|
106
|
+
Args:
|
107
|
+
projections: a set of expressions that return struct arrays.
|
108
|
+
where: a query expression to apply to the data.
|
109
|
+
asof: only data written before the given timestamp will be returned, caveats around compaction.
|
110
|
+
exclude_keys: whether to exclude the key columns in the scan result, defaults to False.
|
111
|
+
Note that if a projection includes a key column, it will be included in the result.
|
112
|
+
"""
|
113
|
+
from spiral import expressions as se
|
114
|
+
|
115
|
+
if isinstance(asof, datetime):
|
116
|
+
asof = timestamp_micros(asof)
|
117
|
+
|
118
|
+
# Combine all projections into a single struct.
|
119
|
+
projection = se.merge(*projections)
|
120
|
+
if where is not None:
|
121
|
+
where = se.lift(where)
|
122
|
+
|
123
|
+
return Scan(
|
124
|
+
self._spiral.open_table_scan(
|
125
|
+
projection.__expr__,
|
126
|
+
filter=where.__expr__ if where else None,
|
127
|
+
asof=asof,
|
128
|
+
exclude_keys=exclude_keys,
|
129
|
+
),
|
130
|
+
)
|