pyspiral 0.3.1__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/METADATA +9 -13
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -9
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +14 -175
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +82 -17
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +23 -37
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/{core → table}/__init__.pyi +44 -17
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +41 -66
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +26 -18
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +2 -9
  47. spiral/expressions/png.py +1 -1
  48. spiral/expressions/qoi.py +1 -1
  49. spiral/expressions/refs.py +3 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/udf.py +3 -3
  53. spiral/iceberg/__init__.py +3 -0
  54. spiral/iceberg/client.py +33 -0
  55. spiral/indexes/__init__.py +5 -0
  56. spiral/indexes/client.py +137 -0
  57. spiral/indexes/index.py +34 -0
  58. spiral/indexes/scan.py +22 -0
  59. spiral/project.py +19 -110
  60. spiral/{proto → protogen}/_/scandal/__init__.py +23 -135
  61. spiral/protogen/_/spiral/table/__init__.py +22 -0
  62. spiral/protogen/substrait/__init__.py +3399 -0
  63. spiral/protogen/substrait/extensions/__init__.py +115 -0
  64. spiral/server.py +17 -0
  65. spiral/settings.py +29 -91
  66. spiral/substrait_.py +9 -5
  67. spiral/tables/__init__.py +12 -0
  68. spiral/tables/client.py +130 -0
  69. spiral/{dataset.py → tables/dataset.py} +9 -199
  70. spiral/tables/debug/manifests.py +70 -0
  71. spiral/tables/debug/metrics.py +56 -0
  72. spiral/{debug.py → tables/debug/scan.py} +6 -9
  73. spiral/{maintenance.py → tables/maintenance.py} +1 -1
  74. spiral/{scan_.py → tables/scan.py} +63 -89
  75. spiral/tables/snapshot.py +78 -0
  76. spiral/{table.py → tables/table.py} +59 -73
  77. spiral/{txn.py → tables/transaction.py} +7 -3
  78. pyspiral-0.3.1.dist-info/RECORD +0 -85
  79. spiral/api/tables.py +0 -91
  80. spiral/api/tokens.py +0 -56
  81. spiral/authn/authn.py +0 -89
  82. spiral/authn/device.py +0 -206
  83. spiral/authn/github_.py +0 -33
  84. spiral/authn/modal_.py +0 -18
  85. spiral/cli/org.py +0 -90
  86. spiral/cli/project.py +0 -109
  87. spiral/cli/table.py +0 -20
  88. spiral/cli/token.py +0 -27
  89. spiral/core/metastore/__init__.pyi +0 -91
  90. spiral/proto/_/spfs/__init__.py +0 -36
  91. spiral/proto/_/spiral/table/__init__.py +0 -276
  92. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  93. spiral/proto/__init__.py +0 -0
  94. spiral/proto/scandal/__init__.py +0 -45
  95. spiral/proto/spiral/__init__.py +0 -0
  96. spiral/proto/spiral/table/__init__.py +0 -96
  97. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  98. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  99. /spiral/{core → protogen/_}/__init__.py +0 -0
  100. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  101. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  102. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  103. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  104. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  105. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  106. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  107. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/util.py +0 -0
  109. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -0,0 +1,115 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # sources: substrait/extensions/extensions.proto
3
+ # plugin: python-betterproto
4
+ # This file has been @generated
5
+
6
+ from dataclasses import dataclass
7
+ from typing import List
8
+
9
+ import betterproto
10
+ import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf
11
+
12
+
13
+ @dataclass(eq=False, repr=False)
14
+ class SimpleExtensionUri(betterproto.Message):
15
+ extension_uri_anchor: int = betterproto.uint32_field(1)
16
+ """
17
+ A surrogate key used in the context of a single plan used to reference the
18
+ URI associated with an extension.
19
+ """
20
+
21
+ uri: str = betterproto.string_field(2)
22
+ """
23
+ The URI where this extension YAML can be retrieved. This is the "namespace"
24
+ of this extension.
25
+ """
26
+
27
+
28
+ @dataclass(eq=False, repr=False)
29
+ class SimpleExtensionDeclaration(betterproto.Message):
30
+ """
31
+ Describes a mapping between a specific extension entity and the uri where
32
+ that extension can be found.
33
+ """
34
+
35
+ extension_type: "SimpleExtensionDeclarationExtensionType" = (
36
+ betterproto.message_field(1, group="mapping_type")
37
+ )
38
+ extension_type_variation: "SimpleExtensionDeclarationExtensionTypeVariation" = (
39
+ betterproto.message_field(2, group="mapping_type")
40
+ )
41
+ extension_function: "SimpleExtensionDeclarationExtensionFunction" = (
42
+ betterproto.message_field(3, group="mapping_type")
43
+ )
44
+
45
+
46
+ @dataclass(eq=False, repr=False)
47
+ class SimpleExtensionDeclarationExtensionType(betterproto.Message):
48
+ """Describes a Type"""
49
+
50
+ extension_uri_reference: int = betterproto.uint32_field(1)
51
+ """
52
+ references the extension_uri_anchor defined for a specific extension URI.
53
+ """
54
+
55
+ type_anchor: int = betterproto.uint32_field(2)
56
+ """
57
+ A surrogate key used in the context of a single plan to reference a
58
+ specific extension type
59
+ """
60
+
61
+ name: str = betterproto.string_field(3)
62
+ """the name of the type in the defined extension YAML."""
63
+
64
+
65
+ @dataclass(eq=False, repr=False)
66
+ class SimpleExtensionDeclarationExtensionTypeVariation(betterproto.Message):
67
+ extension_uri_reference: int = betterproto.uint32_field(1)
68
+ """
69
+ references the extension_uri_anchor defined for a specific extension URI.
70
+ """
71
+
72
+ type_variation_anchor: int = betterproto.uint32_field(2)
73
+ """
74
+ A surrogate key used in the context of a single plan to reference a
75
+ specific type variation
76
+ """
77
+
78
+ name: str = betterproto.string_field(3)
79
+ """the name of the type in the defined extension YAML."""
80
+
81
+
82
+ @dataclass(eq=False, repr=False)
83
+ class SimpleExtensionDeclarationExtensionFunction(betterproto.Message):
84
+ extension_uri_reference: int = betterproto.uint32_field(1)
85
+ """
86
+ references the extension_uri_anchor defined for a specific extension URI.
87
+ """
88
+
89
+ function_anchor: int = betterproto.uint32_field(2)
90
+ """
91
+ A surrogate key used in the context of a single plan to reference a
92
+ specific function
93
+ """
94
+
95
+ name: str = betterproto.string_field(3)
96
+ """A function signature compound name"""
97
+
98
+
99
+ @dataclass(eq=False, repr=False)
100
+ class AdvancedExtension(betterproto.Message):
101
+ """
102
+ A generic object that can be used to embed additional extension information
103
+ into the serialized substrait plan.
104
+ """
105
+
106
+ optimization: List[
107
+ "betterproto_lib_google_protobuf.Any"
108
+ ] = betterproto.message_field(1)
109
+ """
110
+ An optimization is helpful information that don't influence semantics. May
111
+ be ignored by a consumer.
112
+ """
113
+
114
+ enhancement: "betterproto_lib_google_protobuf.Any" = betterproto.message_field(2)
115
+ """An enhancement alter semantics. Cannot be ignored by a consumer."""
spiral/server.py ADDED
@@ -0,0 +1,17 @@
1
+ import socket
2
+ import time
3
+
4
+
5
+ def wait_for_port(port: int, host: str = "localhost", timeout: float = 5.0):
6
+ """Wait until a port starts accepting TCP connections."""
7
+ start_time = time.time()
8
+ while True:
9
+ try:
10
+ with socket.create_connection((host, port), timeout=timeout):
11
+ break
12
+ except OSError as ex:
13
+ time.sleep(0.01)
14
+ if time.time() - start_time >= timeout:
15
+ raise TimeoutError(
16
+ f"Waited too long for the port {port} on host {host} to start accepting connections."
17
+ ) from ex
spiral/settings.py CHANGED
@@ -1,54 +1,35 @@
1
1
  import functools
2
- import hashlib
3
2
  import os
4
- from collections.abc import Callable, Mapping
5
3
  from pathlib import Path
6
- from typing import Any
4
+ from typing import Annotated
7
5
 
8
- import jwt
9
6
  import typer
10
- from pydantic import BaseModel, Field
7
+ from pydantic import Field, ValidatorFunctionWrapHandler, WrapValidator
11
8
  from pydantic_settings import (
12
9
  BaseSettings,
13
10
  InitSettingsSource,
14
11
  PydanticBaseSettingsSource,
15
12
  SettingsConfigDict,
16
- TomlConfigSettingsSource,
17
13
  )
18
14
 
19
15
  from spiral.api import SpiralAPI
20
- from spiral.authn.authn import (
21
- DeviceAuthProvider,
22
- EnvironmentAuthn,
23
- FallbackAuthn,
24
- TokenAuthn,
25
- TokenExchangeProvider,
26
- )
27
- from spiral.authn.device import DeviceAuth
28
- from spiral.authn.github_ import GitHubActionsProvider
29
- from spiral.authn.modal_ import ModalProvider
16
+ from spiral.core.client import Authn, DeviceCodeAuth, Token
30
17
 
31
18
  DEV = "PYTEST_VERSION" in os.environ or bool(os.environ.get("SPIRAL_DEV", None))
32
- FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
19
+ CI = "GITHUB_ACTIONS" in os.environ
33
20
 
34
21
  APP_DIR = Path(typer.get_app_dir("pyspiral"))
35
22
  LOG_DIR = APP_DIR / "logs"
36
- CONFIG_FILE = APP_DIR / "config.toml"
37
-
38
23
 
39
- class AuthSettings(BaseSettings):
40
- model_config = SettingsConfigDict(frozen=True)
41
24
 
42
- domain: str = "https://device.spiraldb.com"
43
- client_id: str = "client_01J1CRS967RFQY7JSE8XZ44ATR" if DEV else "client_01J1CRS9MGF103FKTKZDCNJVM3"
25
+ def validate_token(v, handler: ValidatorFunctionWrapHandler):
26
+ if isinstance(v, str):
27
+ return Token(v)
28
+ else:
29
+ raise ValueError("Token value must be a string")
44
30
 
45
31
 
46
- class AccessToken(BaseModel):
47
- token: str
48
-
49
- @functools.cached_property
50
- def payload(self) -> dict:
51
- return jwt.decode(self.token, options={"verify_signature": False})
32
+ TokenType = Annotated[Token, WrapValidator(validate_token)]
52
33
 
53
34
 
54
35
  class SpiralDBSettings(BaseSettings):
@@ -57,38 +38,35 @@ class SpiralDBSettings(BaseSettings):
57
38
  host: str = "localhost" if DEV else "api.spiraldb.com"
58
39
  port: int = 4279 if DEV else 443
59
40
  ssl: bool = not DEV
60
- auth: AuthSettings = Field(default_factory=AuthSettings)
61
- token: str | None = None
41
+ token: TokenType | None = None
62
42
 
63
43
  @property
64
44
  def uri(self) -> str:
65
45
  return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
66
46
 
67
- @property
68
- def uri_scandal(self) -> str:
69
- # TODO(marko): Scandal will be a different service. For now, gRPC API is hosted on the SpiralDB service.
70
- return f"{'grpc+tls' if self.ssl else 'grpc'}://{self.host}:{self.port}"
71
47
 
72
- @property
73
- def uri_iceberg(self) -> str:
74
- return self.uri + "/iceberg"
48
+ class SpfsSettings(BaseSettings):
49
+ model_config = SettingsConfigDict(frozen=True)
75
50
 
76
- def device_auth(self) -> DeviceAuth:
77
- auth_file = (
78
- APP_DIR / hashlib.md5(f"{self.auth.domain}/{self.auth.client_id}".encode()).hexdigest() / "auth.json"
79
- )
80
- return DeviceAuth(auth_file=auth_file, domain=self.auth.domain, client_id=self.auth.client_id)
51
+ host: str = "localhost" if DEV else "spfs.spiraldb.dev"
52
+ port: int = 4295 if DEV else 443
53
+ ssl: bool = not DEV
54
+
55
+ @property
56
+ def uri(self) -> str:
57
+ return f"{'https' if self.ssl else 'http'}://{self.host}:{self.port}"
81
58
 
82
59
 
83
60
  class Settings(BaseSettings):
84
61
  model_config = SettingsConfigDict(
85
- toml_file=CONFIG_FILE,
86
62
  env_nested_delimiter="__",
87
63
  env_prefix="SPIRAL__",
88
64
  frozen=True,
89
65
  )
90
66
 
91
67
  spiraldb: SpiralDBSettings = Field(default_factory=SpiralDBSettings)
68
+ spfs: SpfsSettings = Field(default_factory=SpfsSettings)
69
+ file_format: str = Field(default="vortex")
92
70
 
93
71
  @functools.cached_property
94
72
  def api(self) -> "SpiralAPI":
@@ -99,17 +77,12 @@ class Settings(BaseSettings):
99
77
  @functools.cached_property
100
78
  def authn(self):
101
79
  if self.spiraldb.token:
102
- return TokenAuthn(self.spiraldb.token)
103
-
104
- return FallbackAuthn(
105
- [
106
- GitHubActionsProvider(),
107
- ModalProvider(),
108
- # TODO(marko): Github and Modal should also be behind token exchanged.
109
- TokenExchangeProvider(EnvironmentAuthn(), self.spiraldb.uri),
110
- DeviceAuthProvider(self.spiraldb.device_auth()),
111
- ]
112
- )
80
+ return Authn.from_token(self.spiraldb.token)
81
+ return Authn.from_fallback()
82
+
83
+ @functools.cached_property
84
+ def device_code_auth(self) -> DeviceCodeAuth:
85
+ return DeviceCodeAuth.default()
113
86
 
114
87
  @classmethod
115
88
  def settings_customise_sources(
@@ -120,42 +93,7 @@ class Settings(BaseSettings):
120
93
  init_settings: InitSettingsSource,
121
94
  **kwargs,
122
95
  ) -> tuple[PydanticBaseSettingsSource, ...]:
123
- return env_settings, dotenv_settings, TomlConfigSettingsSource(settings_cls), init_settings
124
-
125
-
126
- class _LazyDict(Mapping):
127
- def __init__(self, lazy_values: dict[str, Any | Callable[[], Any]]):
128
- self._lazy_values = lazy_values
129
-
130
- def _dict(self):
131
- return {k: v() if callable(v) else v for k, v in self._lazy_values.items()}
132
-
133
- def __getitem__(self, key, /):
134
- return self._lazy_values[key]()
135
-
136
- def get(self, key, /, default=None):
137
- return self._lazy_values.get(key, lambda: default)()
138
-
139
- def items(self):
140
- return self._dict().items()
141
-
142
- def keys(self):
143
- return self._lazy_values.keys()
144
-
145
- def values(self):
146
- return self._dict().values()
147
-
148
- def __contains__(self, key, /):
149
- return key in self._lazy_values
150
-
151
- def __eq__(self, other, /):
152
- return False
153
-
154
- def __iter__(self):
155
- return iter(self._dict())
156
-
157
- def __len__(self):
158
- return len(self._lazy_values)
96
+ return env_settings, dotenv_settings, init_settings
159
97
 
160
98
 
161
99
  @functools.cache
spiral/substrait_.py CHANGED
@@ -3,7 +3,7 @@ import pyarrow as pa
3
3
 
4
4
  import spiral.expressions as se
5
5
  from spiral.expressions.base import Expr
6
- from spiral.proto.substrait import (
6
+ from spiral.protogen.substrait import (
7
7
  Expression,
8
8
  ExpressionFieldReference,
9
9
  ExpressionLiteral,
@@ -17,7 +17,7 @@ from spiral.proto.substrait import (
17
17
  ExpressionScalarFunction,
18
18
  ExtendedExpression,
19
19
  )
20
- from spiral.proto.substrait.extensions import (
20
+ from spiral.protogen.substrait.extensions import (
21
21
  SimpleExtensionDeclaration,
22
22
  SimpleExtensionDeclarationExtensionFunction,
23
23
  SimpleExtensionDeclarationExtensionType,
@@ -43,7 +43,7 @@ class SubstraitConverter:
43
43
  def convert(self, buffer: pa.Buffer) -> Expr:
44
44
  """Convert a Substrait Extended Expression into a Spiral expression."""
45
45
 
46
- expr: ExtendedExpression = ExtendedExpression().parse(buffer.to_pybytes())
46
+ expr: ExtendedExpression = ExtendedExpression().parse(buffer)
47
47
  assert len(expr.referred_expr) == 1, "Only one expression is supported"
48
48
 
49
49
  # Parse the extension URIs from the plan.
@@ -255,12 +255,16 @@ class SubstraitConverter:
255
255
 
256
256
  scope = se.getitem(scope, field_name)
257
257
  scope_type = scope_type.field(ref.field).type
258
- return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
258
+ if ref.is_set("child"):
259
+ return self._expr_direct_reference(scope, scope_type, ref.child)
260
+ return scope
259
261
  case "list_element", ref:
260
262
  ref: ExpressionReferenceSegmentListElement
261
263
  scope = se.getitem(scope, ref.offset)
262
264
  scope_type = scope_type.field(ref.field).type
263
- return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
265
+ if ref.is_set("child"):
266
+ return self._expr_direct_reference(scope, scope_type, ref.child)
267
+ return scope
264
268
  case "", ref:
265
269
  # Because Proto... we hit this case when we recurse into a child node and it's actually "None".
266
270
  return scope
@@ -0,0 +1,12 @@
1
+ from spiral import _lib
2
+ from spiral.tables.client import Tables
3
+ from spiral.tables.maintenance import Maintenance
4
+ from spiral.tables.scan import Scan
5
+ from spiral.tables.snapshot import Snapshot
6
+ from spiral.tables.table import Table
7
+ from spiral.tables.transaction import Transaction
8
+
9
+ # Eagerly import the Spiral library
10
+ assert _lib, "Spiral library"
11
+
12
+ __all__ = ["Tables", "Table", "Snapshot", "Scan", "Transaction", "Maintenance"]
@@ -0,0 +1,130 @@
1
+ from datetime import datetime
2
+ from typing import Any
3
+
4
+ import pyarrow as pa
5
+
6
+ from spiral.api import SpiralAPI
7
+ from spiral.api.projects import TableResource
8
+ from spiral.core.client import Spiral as CoreSpiral
9
+ from spiral.core.table.spec import Schema
10
+ from spiral.datetime_ import timestamp_micros
11
+ from spiral.expressions import ExprLike
12
+ from spiral.tables.scan import Scan
13
+ from spiral.tables.table import Table
14
+ from spiral.types_ import Uri
15
+
16
+
17
+ class Tables:
18
+ """
19
+ Spiral Tables a powerful and flexible way for storing, analyzing,
20
+ and querying massive and/or multimodal datasets.
21
+
22
+ The data model will feel familiar to users of SQL- or DataFrame-style systems,
23
+ yet is designed to be more flexible, more powerful, and more useful in the context
24
+ of modern data processing. Tables are stored and queried directly from object storage.
25
+ """
26
+
27
+ def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
28
+ self._api = api
29
+ self._spiral = spiral
30
+ self._project_id = project_id
31
+
32
+ def table(self, identifier: str) -> Table:
33
+ """Open a table with a `dataset.table` identifier, or `table` name using the `default` dataset."""
34
+ project_id, dataset, table = self._parse_identifier(identifier)
35
+ if project_id is None:
36
+ raise ValueError("Must provide a fully qualified table identifier.")
37
+
38
+ res = list(self._api.project.list_tables(project_id, dataset=dataset, table=table))
39
+ if len(res) == 0:
40
+ raise ValueError(f"Table not found: {project_id}.{dataset}.{table}")
41
+
42
+ res = res[0]
43
+ return Table(self, self._spiral.get_table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}")
44
+
45
+ def list_tables(self) -> list[TableResource]:
46
+ project_id = self._project_id
47
+ if project_id is None:
48
+ raise ValueError("Must provide a project ID to list tables.")
49
+ return list(self._api.project.list_tables(project_id))
50
+
51
+ def create_table(
52
+ self,
53
+ identifier: str,
54
+ *,
55
+ key_schema: pa.Schema | Any,
56
+ root_uri: Uri | None = None,
57
+ exist_ok: bool = False,
58
+ ) -> Table:
59
+ """Create a new table in the project.
60
+
61
+ Args:
62
+ identifier: The table identifier, in the form `project.dataset.table`, `dataset.table` or `table`.
63
+ key_schema: The schema of the table's keys.
64
+ root_uri: The root URI for the table.
65
+ exist_ok: If True, do not raise an error if the table already exists.
66
+ """
67
+ project_id, dataset, table = self._parse_identifier(identifier)
68
+ if project_id is None:
69
+ raise ValueError("Must provide a fully qualified table identifier.")
70
+
71
+ if not isinstance(key_schema, pa.Schema):
72
+ key_schema = pa.schema(key_schema)
73
+ key_schema = Schema.from_arrow(key_schema)
74
+
75
+ core_table = self._spiral.create_table(
76
+ project_id,
77
+ dataset=dataset,
78
+ table=table,
79
+ key_schema=key_schema,
80
+ root_uri=root_uri,
81
+ exist_ok=exist_ok,
82
+ )
83
+
84
+ return Table(self, core_table, identifier=f"{project_id}.{dataset}.{table}")
85
+
86
+ def _parse_identifier(self, identifier: str) -> tuple[str | None, str, str]:
87
+ parts = identifier.split(".")
88
+ if len(parts) == 1:
89
+ return self._project_id, "default", parts[0]
90
+ elif len(parts) == 2:
91
+ return self._project_id, parts[0], parts[1]
92
+ elif len(parts) == 3:
93
+ return parts[0], parts[1], parts[2]
94
+ else:
95
+ raise ValueError(f"Invalid table identifier: {identifier}")
96
+
97
+ def scan(
98
+ self,
99
+ *projections: ExprLike,
100
+ where: ExprLike | None = None,
101
+ asof: datetime | int | None = None,
102
+ exclude_keys: bool = False,
103
+ ) -> Scan:
104
+ """Starts a read transaction on the Spiral.
105
+
106
+ Args:
107
+ projections: a set of expressions that return struct arrays.
108
+ where: a query expression to apply to the data.
109
+ asof: only data written before the given timestamp will be returned, caveats around compaction.
110
+ exclude_keys: whether to exclude the key columns in the scan result, defaults to False.
111
+ Note that if a projection includes a key column, it will be included in the result.
112
+ """
113
+ from spiral import expressions as se
114
+
115
+ if isinstance(asof, datetime):
116
+ asof = timestamp_micros(asof)
117
+
118
+ # Combine all projections into a single struct.
119
+ projection = se.merge(*projections)
120
+ if where is not None:
121
+ where = se.lift(where)
122
+
123
+ return Scan(
124
+ self._spiral.open_table_scan(
125
+ projection.__expr__,
126
+ filter=where.__expr__ if where else None,
127
+ asof=asof,
128
+ exclude_keys=exclude_keys,
129
+ ),
130
+ )