pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
spiral/cli/state.py
ADDED
spiral/cli/table.py
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import rich
|
4
|
+
from typer import Option
|
5
|
+
|
6
|
+
from spiral.api.tables import ListTables, Table
|
7
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
8
|
+
from spiral.cli.types import ProjectArg
|
9
|
+
|
10
|
+
app = AsyncTyper()
|
11
|
+
|
12
|
+
|
13
|
+
@app.command(help="List tables.")
|
14
|
+
def ls(
|
15
|
+
project: ProjectArg,
|
16
|
+
dataset: Annotated[OptionalStr, Option(help="Filter by dataset name.")] = None,
|
17
|
+
):
|
18
|
+
"""List tables."""
|
19
|
+
tables = list(state.settings.api.table.list(ListTables.Request(project_id=project, dataset=dataset)))
|
20
|
+
rich.print(printer.table_of_models(Table, tables, fields=["id", "project_id", "dataset", "table"]))
|
spiral/cli/token.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import rich
|
4
|
+
from typer import Argument, Option
|
5
|
+
|
6
|
+
from spiral.api.tokens import ListTokens, RevokeToken, Token
|
7
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
8
|
+
from spiral.cli.types import ProjectArg
|
9
|
+
|
10
|
+
app = AsyncTyper()
|
11
|
+
|
12
|
+
|
13
|
+
@app.command(help="List tokens.")
|
14
|
+
def ls(
|
15
|
+
project: ProjectArg,
|
16
|
+
on_behalf_of: Annotated[OptionalStr, Option(help="Filter by on behalf of.")] = None,
|
17
|
+
):
|
18
|
+
tokens = list(state.settings.api.token.list(ListTokens.Request(project_id=project, on_behalf_of=on_behalf_of)))
|
19
|
+
rich.print(printer.table_of_models(Token, tokens, fields=["id", "project_id", "on_behalf_of"]))
|
20
|
+
|
21
|
+
|
22
|
+
@app.command(help="Revoke a token.")
|
23
|
+
def revoke(token_id: Annotated[str, Argument(help="Token ID.")]):
|
24
|
+
res = state.settings.api.token.revoke(RevokeToken.Request(token_id=token_id))
|
25
|
+
rich.print(
|
26
|
+
f"Revoked token {res.token.id} for project {res.token.project_id} acting on behalf of {res.token.on_behalf_of}"
|
27
|
+
)
|
spiral/cli/types.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import questionary
|
4
|
+
import rich
|
5
|
+
import typer
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Argument
|
8
|
+
|
9
|
+
from spiral.api import OrganizationId, ProjectId
|
10
|
+
from spiral.cli import state
|
11
|
+
|
12
|
+
|
13
|
+
def _project_default():
|
14
|
+
projects = list(state.settings.api.project.list())
|
15
|
+
|
16
|
+
if not projects:
|
17
|
+
rich.print("[red]No projects found[/red]")
|
18
|
+
raise typer.Exit(1)
|
19
|
+
|
20
|
+
return questionary.select(
|
21
|
+
"Select a project",
|
22
|
+
choices=[
|
23
|
+
Choice(title=f"{project.id} - {project.name}" if project.name else project.id, value=project.id)
|
24
|
+
for project in projects
|
25
|
+
],
|
26
|
+
).ask()
|
27
|
+
|
28
|
+
|
29
|
+
ProjectArg = Annotated[ProjectId, Argument(help="Project ID", show_default=False, default_factory=_project_default)]
|
30
|
+
|
31
|
+
|
32
|
+
def _org_default():
|
33
|
+
memberships = list(state.settings.api.organization.list_user_memberships())
|
34
|
+
|
35
|
+
if not memberships:
|
36
|
+
rich.print("[red]No organizations found[/red]")
|
37
|
+
raise typer.Exit(1)
|
38
|
+
|
39
|
+
return questionary.select(
|
40
|
+
"Select an organization",
|
41
|
+
choices=[
|
42
|
+
Choice(
|
43
|
+
title=f"{m.organization.id} - {m.organization.name}" if m.organization.name else m.organization.id,
|
44
|
+
value=m.organization.id,
|
45
|
+
)
|
46
|
+
for m in memberships
|
47
|
+
],
|
48
|
+
).ask()
|
49
|
+
|
50
|
+
|
51
|
+
OrganizationArg = Annotated[
|
52
|
+
OrganizationId, Argument(help="Organization ID", show_default=False, default_factory=_org_default)
|
53
|
+
]
|
spiral/cli/workload.py
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import pyperclip
|
4
|
+
import questionary
|
5
|
+
import rich
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Argument, Option
|
8
|
+
|
9
|
+
from spiral.api.workloads import CreateWorkload, IssueToken, ListWorkloads, Workload
|
10
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
11
|
+
from spiral.cli.types import ProjectArg
|
12
|
+
|
13
|
+
app = AsyncTyper()
|
14
|
+
|
15
|
+
|
16
|
+
@app.command(help="Create a new workload.")
|
17
|
+
def create(
|
18
|
+
project: ProjectArg,
|
19
|
+
name: Annotated[OptionalStr, Option(help="Friendly name for the workload.")] = None,
|
20
|
+
):
|
21
|
+
res = state.settings.api.workload.create(CreateWorkload.Request(project_id=project, name=name))
|
22
|
+
rich.print(f"Created workload {res.workload.id}")
|
23
|
+
|
24
|
+
|
25
|
+
@app.command(help="List workloads.")
|
26
|
+
def ls(
|
27
|
+
project: ProjectArg,
|
28
|
+
):
|
29
|
+
workloads = list(state.settings.api.workload.list(ListWorkloads.Request(project_id=project)))
|
30
|
+
rich.print(printer.table_of_models(Workload, workloads, fields=["id", "project_id", "name"]))
|
31
|
+
|
32
|
+
|
33
|
+
@app.command(help="Issue a token.")
|
34
|
+
def token(workload_id: Annotated[str, Argument(help="Workload ID.")]):
|
35
|
+
res = state.settings.api.workload.issue_token(IssueToken.Request(workload_id=workload_id))
|
36
|
+
|
37
|
+
while True:
|
38
|
+
choice = questionary.select(
|
39
|
+
"What would you like to do with the secret? You will not be able to see this secret again!",
|
40
|
+
choices=[
|
41
|
+
Choice(title="Copy to clipboard", value=1),
|
42
|
+
Choice(title="Print to console", value=2),
|
43
|
+
Choice(title="Exit", value=3),
|
44
|
+
],
|
45
|
+
).ask()
|
46
|
+
|
47
|
+
if choice == 1:
|
48
|
+
pyperclip.copy(res.token_secret)
|
49
|
+
rich.print("[green]Secret copied to clipboard![/green]")
|
50
|
+
break
|
51
|
+
elif choice == 2:
|
52
|
+
rich.print(f"[green]Token Secret:[/green] {res.token_secret}")
|
53
|
+
break
|
54
|
+
elif choice == 3:
|
55
|
+
break
|
56
|
+
else:
|
57
|
+
rich.print("[red]Invalid choice. Please try again.[/red]")
|
58
|
+
|
59
|
+
rich.print(f"[green]Token ID:[/green] {res.token_id}")
|
spiral/config.py
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
4
|
+
|
5
|
+
FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
|
6
|
+
|
7
|
+
|
8
|
+
class Config(BaseSettings):
|
9
|
+
model_config = SettingsConfigDict(
|
10
|
+
env_nested_delimiter="__",
|
11
|
+
env_prefix="SPIRAL_CORE__",
|
12
|
+
frozen=True,
|
13
|
+
)
|
14
|
+
|
15
|
+
partition_file_min_size: int = 256 * 1024 * 1024 # 256MB
|
16
|
+
flush_wal_on_write: bool = False
|
17
|
+
|
18
|
+
# TODO(marko): Support config. Unused after migration to Rust.
|
19
|
+
# #: Defaults to ThreadPoolExecutor's default (based on os.cpu_count().
|
20
|
+
# scan_num_threads: int | None = 61 # 61 is used by Golang and Tokio, for some reason...
|
21
|
+
#
|
22
|
+
# #: The duration of WAL that is preserved to allow for txn conflict resolution.
|
23
|
+
# transaction_window: int = 0 if DEV else timedelta(days=1).total_seconds()
|
24
|
+
#
|
25
|
+
# #: Truncation length of string statistics.
|
26
|
+
# string_truncation_length: int = 1024
|
spiral/core/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
from spiral.core.manifests import FragmentManifest
|
5
|
+
from spiral.core.metastore import PyMetastore
|
6
|
+
from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, KeyRange, Schema, WriteAheadLog
|
7
|
+
from spiral.expressions import Expr
|
8
|
+
|
9
|
+
class Table:
|
10
|
+
def __init__(self, metastore: PyMetastore): ...
|
11
|
+
|
12
|
+
id: str
|
13
|
+
root_uri: str
|
14
|
+
key_schema: Schema
|
15
|
+
metastore: PyMetastore
|
16
|
+
|
17
|
+
def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
|
18
|
+
def get_column_group_metadata(self, column_group: ColumnGroup, *, asof: int | None) -> ColumnGroupMetadata: ...
|
19
|
+
def list_column_groups(self, *, asof: int | None) -> list[ColumnGroup] | None: ...
|
20
|
+
def get_schema(self, *, asof: int | None) -> Schema: ...
|
21
|
+
|
22
|
+
class TableScan:
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
projection: Expr,
|
26
|
+
filter: Expr | None = None,
|
27
|
+
asof: int | None = None,
|
28
|
+
exclude_keys: bool = False,
|
29
|
+
aux_schema: pa.Schema | None = None,
|
30
|
+
) -> TableScan: ...
|
31
|
+
def key_schema(self) -> Schema: ...
|
32
|
+
def schema(self) -> Schema: ...
|
33
|
+
def is_empty(self) -> bool: ...
|
34
|
+
def split(self) -> list[KeyRange]: ...
|
35
|
+
def table_ids(self) -> list[str]: ...
|
36
|
+
def column_groups(self) -> list[ColumnGroup]: ...
|
37
|
+
def to_record_batches(self, aux_table: pa.Table | pa.RecordBatch | None = None) -> pa.RecordBatchReader: ...
|
38
|
+
def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
|
39
|
+
def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
|
40
|
+
def metrics(self) -> dict[str, Any]: ...
|
41
|
+
|
42
|
+
class KeySpaceScan:
|
43
|
+
manifest: FragmentManifest
|
44
|
+
|
45
|
+
def key_schema(self) -> Schema: ...
|
46
|
+
|
47
|
+
class ColumnGroupScan:
|
48
|
+
manifest: FragmentManifest
|
49
|
+
|
50
|
+
def schema(self) -> Schema: ...
|
51
|
+
|
52
|
+
def write(table: Table, expr: Expr, format: str = "parquet", *, partition_size: int | None = None): ...
|
53
|
+
def flush_wal(table: Table, manifest_format: str = "parquet"): ...
|
@@ -0,0 +1,53 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
from spiral.core.spec import FileFormat, FragmentLevel, KeyExtent, KeyMap, KeyRange, KeySpan
|
3
|
+
from spiral.types_ import Timestamp
|
4
|
+
|
5
|
+
class FragmentManifest:
|
6
|
+
def __len__(self): ...
|
7
|
+
def __getitem__(self, idx: int): ...
|
8
|
+
def to_arrow(self) -> pa.RecordBatchReader: ...
|
9
|
+
@staticmethod
|
10
|
+
def compute_schema(data_schema: pa.Schema) -> pa.Schema: ...
|
11
|
+
@staticmethod
|
12
|
+
def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
|
13
|
+
@staticmethod
|
14
|
+
def from_arrow(reader: pa.RecordBatchReader) -> FragmentManifest: ...
|
15
|
+
@staticmethod
|
16
|
+
def empty() -> FragmentManifest: ...
|
17
|
+
|
18
|
+
class FragmentFile:
|
19
|
+
id: str
|
20
|
+
committed_at: Timestamp | None
|
21
|
+
compacted_at: Timestamp | None
|
22
|
+
format: FileFormat
|
23
|
+
format_metadata: bytes | None
|
24
|
+
size_bytes: int
|
25
|
+
# NOTE: Empty for keyspace file.
|
26
|
+
column_ids: list[str]
|
27
|
+
fs_id: str
|
28
|
+
fs_level: FragmentLevel
|
29
|
+
ks_id: str
|
30
|
+
key_span: KeySpan
|
31
|
+
key_extent: KeyExtent
|
32
|
+
key_map: KeyMap | None
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
*,
|
37
|
+
id: str,
|
38
|
+
committed_at: Timestamp | None,
|
39
|
+
compacted_at: Timestamp | None,
|
40
|
+
format: FileFormat,
|
41
|
+
format_metadata: bytes,
|
42
|
+
size_bytes: int,
|
43
|
+
column_ids: list[str],
|
44
|
+
fs_id: str,
|
45
|
+
fs_level: FragmentLevel,
|
46
|
+
ks_id: str,
|
47
|
+
key_span: KeySpan,
|
48
|
+
key_extent: KeyExtent,
|
49
|
+
key_map: KeyMap | None,
|
50
|
+
stats: pa.StructArray,
|
51
|
+
): ...
|
52
|
+
@property
|
53
|
+
def key_range(self) -> KeyRange: ...
|
@@ -0,0 +1,91 @@
|
|
1
|
+
"""The SpiralDB metastore API."""
|
2
|
+
|
3
|
+
from collections.abc import Callable
|
4
|
+
|
5
|
+
from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, LogEntry, Schema, WriteAheadLog
|
6
|
+
from spiral.types_ import Timestamp, Uri
|
7
|
+
from spiraldb.proto.spiral.table import ManifestHandle
|
8
|
+
|
9
|
+
class FileHandle:
|
10
|
+
def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
|
11
|
+
|
12
|
+
uri: str
|
13
|
+
format: FileFormat
|
14
|
+
spfs_token: str | None
|
15
|
+
|
16
|
+
class FileRef:
|
17
|
+
def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
|
18
|
+
|
19
|
+
id: str
|
20
|
+
file_type: FileType
|
21
|
+
file_format: FileFormat
|
22
|
+
|
23
|
+
def resolve_uri(self, root_uri: str) -> str:
|
24
|
+
"""Resolves the file reference URI given the root URI."""
|
25
|
+
|
26
|
+
class FileType:
|
27
|
+
FragmentFile: FileType
|
28
|
+
FragmentManifest: FileType
|
29
|
+
ReferenceFile: FileType
|
30
|
+
|
31
|
+
def __int__(self) -> int:
|
32
|
+
"""Returns the protobuf enum int value."""
|
33
|
+
|
34
|
+
class PyMetastore:
|
35
|
+
"""Rust implementation of the metastore API."""
|
36
|
+
|
37
|
+
@property
|
38
|
+
def table_id(self) -> str: ...
|
39
|
+
@property
|
40
|
+
def root_uri(self) -> Uri: ...
|
41
|
+
@property
|
42
|
+
def key_schema(self) -> Schema: ...
|
43
|
+
def get_wal(self) -> WriteAheadLog:
|
44
|
+
"""Return the log for the table."""
|
45
|
+
...
|
46
|
+
|
47
|
+
def append_wal(self, prev_last_modified_at: Timestamp, entries: list[LogEntry]) -> WriteAheadLog:
|
48
|
+
"""Append additional entries into the write-ahead log given the previous write-ahead log timestamp.
|
49
|
+
|
50
|
+
The given entries should have a timestamp of zero and will be assigned an actual timestamp by the server.
|
51
|
+
|
52
|
+
This API is designed to support both a trivial compare-and-swap on the WAL, and also to support more advanced
|
53
|
+
conflict resolution within the metastore.
|
54
|
+
"""
|
55
|
+
...
|
56
|
+
|
57
|
+
def update_wal(
|
58
|
+
self,
|
59
|
+
prev_ks_manifest_handle_id: str,
|
60
|
+
truncate_ts_max: Timestamp | None = None,
|
61
|
+
new_ks_manifest_handle: ManifestHandle | None = None,
|
62
|
+
) -> WriteAheadLog:
|
63
|
+
"""Update the write-ahead log atomically.
|
64
|
+
|
65
|
+
Supports WAL truncation and manifest handle updates necessary for flushing.
|
66
|
+
"""
|
67
|
+
...
|
68
|
+
|
69
|
+
def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
|
70
|
+
"""Return the metadata for column group."""
|
71
|
+
...
|
72
|
+
|
73
|
+
def update_column_group_metadata(
|
74
|
+
self, prev_last_modified_at: Timestamp, column_group_metadata: ColumnGroupMetadata
|
75
|
+
) -> ColumnGroupMetadata:
|
76
|
+
"""Update the column group metadata to the metastore given the previous metadata timestamp."""
|
77
|
+
...
|
78
|
+
|
79
|
+
def list_column_groups(self) -> tuple[list[ColumnGroup], Timestamp]:
|
80
|
+
"""List all column groups in the table, or None if no index is available."""
|
81
|
+
...
|
82
|
+
|
83
|
+
@staticmethod
|
84
|
+
def http(
|
85
|
+
table_id: str, root_uri: str, key_schema: Schema, base_url: str, token_provider: Callable[[], str]
|
86
|
+
) -> PyMetastore:
|
87
|
+
"""Construct a PyMetastore backed by an HTTP metastore service."""
|
88
|
+
|
89
|
+
@staticmethod
|
90
|
+
def test(table_id: str, root_uri: str, key_schema: Schema) -> PyMetastore:
|
91
|
+
"""Construct a PyMetastore backed by an in-memory mock metastore service."""
|
@@ -0,0 +1,257 @@
|
|
1
|
+
"""Type definitions for the spiral.core.spec module shipped as part of the native library."""
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
class ColumnGroup:
|
6
|
+
def __init__(self, path: list[str]): ...
|
7
|
+
@property
|
8
|
+
def table_id(self) -> str: ...
|
9
|
+
@property
|
10
|
+
def path(self) -> list[str]: ...
|
11
|
+
def identifier(self, salt: int) -> str:
|
12
|
+
"""Return the column group identifier based on the given salt."""
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def from_str(path: str) -> ColumnGroup: ...
|
16
|
+
|
17
|
+
class ColumnGroupMetadata:
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
*,
|
21
|
+
column_group: ColumnGroup,
|
22
|
+
manifest_handle: ManifestHandle | None,
|
23
|
+
last_modified_at: int,
|
24
|
+
schema_versions: list[VersionedSchema] | None,
|
25
|
+
immutable_schema: bool,
|
26
|
+
schema_salt: int,
|
27
|
+
): ...
|
28
|
+
|
29
|
+
column_group: ColumnGroup
|
30
|
+
manifest_handle: ManifestHandle | None
|
31
|
+
last_modified_at: int
|
32
|
+
schema_versions: list[VersionedSchema]
|
33
|
+
immutable_schema: bool
|
34
|
+
schema_salt: int
|
35
|
+
|
36
|
+
def latest_schema(self) -> VersionedSchema:
|
37
|
+
"""Returns the latest schema of the column group."""
|
38
|
+
...
|
39
|
+
|
40
|
+
def asof(self, asof: int) -> ColumnGroupMetadata:
|
41
|
+
"""Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
|
42
|
+
...
|
43
|
+
|
44
|
+
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
45
|
+
"""Applies the given WAL to the metadata."""
|
46
|
+
|
47
|
+
def __bytes__(self):
|
48
|
+
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
49
|
+
|
50
|
+
@staticmethod
|
51
|
+
def from_proto(buffer: bytes) -> ColumnGroupMetadata:
|
52
|
+
"""Deserializes a ColumnGroupMetadata from a protobuf buffer."""
|
53
|
+
...
|
54
|
+
|
55
|
+
class LogEntry:
|
56
|
+
ts: int
|
57
|
+
operation: KeySpaceWriteOp | FragmentSetWriteOp | ConfigurationOp | SchemaEvolutionOp | SchemaBreakOp
|
58
|
+
|
59
|
+
def column_group(self) -> ColumnGroup | None:
|
60
|
+
"""Returns the column group of the entry if it is associated with one."""
|
61
|
+
|
62
|
+
def replace_timestamp(self, ts: int) -> LogEntry:
|
63
|
+
"""Returns a copy of the entry with the timestamp replaced."""
|
64
|
+
|
65
|
+
@staticmethod
|
66
|
+
def schema_break(*, column_group: ColumnGroup, removed_column_names: list[str]) -> LogEntry: ...
|
67
|
+
@staticmethod
|
68
|
+
def schema_evolution(*, column_group: ColumnGroup, new_schema: Schema) -> LogEntry: ...
|
69
|
+
@staticmethod
|
70
|
+
def ks_write(
|
71
|
+
*,
|
72
|
+
ks_id: str,
|
73
|
+
manifest_handle: ManifestHandle,
|
74
|
+
) -> LogEntry: ...
|
75
|
+
@staticmethod
|
76
|
+
def fs_write(
|
77
|
+
*,
|
78
|
+
column_group: ColumnGroup,
|
79
|
+
fs_id: str,
|
80
|
+
fs_level: FragmentLevel,
|
81
|
+
manifest_handle: ManifestHandle,
|
82
|
+
key_span: KeySpan,
|
83
|
+
key_extent: KeyExtent,
|
84
|
+
column_ids: list[str],
|
85
|
+
) -> LogEntry: ...
|
86
|
+
|
87
|
+
class FileFormat:
|
88
|
+
def __init__(self, value: int): ...
|
89
|
+
|
90
|
+
Parquet: FileFormat
|
91
|
+
Protobuf: FileFormat
|
92
|
+
BinaryArray: FileFormat
|
93
|
+
|
94
|
+
def __int__(self) -> int:
|
95
|
+
"""Returns the protobuf enum int value."""
|
96
|
+
...
|
97
|
+
|
98
|
+
class FragmentLevel:
|
99
|
+
L0: FragmentLevel
|
100
|
+
L1: FragmentLevel
|
101
|
+
|
102
|
+
def __int__(self) -> int:
|
103
|
+
"""Returns the protobuf enum int value."""
|
104
|
+
...
|
105
|
+
|
106
|
+
class Key:
|
107
|
+
def __init__(self, key: bytes): ...
|
108
|
+
|
109
|
+
key: bytes
|
110
|
+
|
111
|
+
def __add__(self, other: Key) -> Key:
|
112
|
+
"""Concatenates two keys.
|
113
|
+
|
114
|
+
TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
|
115
|
+
|
116
|
+
def __bytes__(self): ...
|
117
|
+
def step(self) -> Key:
|
118
|
+
"""Returns the next key in the key space."""
|
119
|
+
|
120
|
+
@staticmethod
|
121
|
+
def min() -> Key: ...
|
122
|
+
@staticmethod
|
123
|
+
def max() -> Key: ...
|
124
|
+
@staticmethod
|
125
|
+
def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
|
126
|
+
|
127
|
+
class KeyExtent:
|
128
|
+
"""An inclusive range of keys."""
|
129
|
+
|
130
|
+
def __init__(self, *, min: Key, max: Key): ...
|
131
|
+
|
132
|
+
min: Key
|
133
|
+
max: Key
|
134
|
+
|
135
|
+
def to_range(self) -> KeyRange:
|
136
|
+
"""Turn this inclusive key extent into an exclusive key range."""
|
137
|
+
|
138
|
+
def union(self, key_extent: KeyExtent) -> KeyExtent: ...
|
139
|
+
def __or__(self, other: KeyExtent) -> KeyExtent: ...
|
140
|
+
def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
|
141
|
+
def __and__(self, other: KeyExtent) -> KeyExtent | None: ...
|
142
|
+
def contains(self, item: Key) -> bool: ...
|
143
|
+
def __contains__(self, item: Key) -> bool: ...
|
144
|
+
|
145
|
+
class KeyRange:
|
146
|
+
"""A right-exclusive range of keys."""
|
147
|
+
|
148
|
+
def __init__(self, *, begin: Key, end: Key): ...
|
149
|
+
|
150
|
+
begin: Key
|
151
|
+
end: Key
|
152
|
+
|
153
|
+
def union(self, other: KeyRange) -> KeyRange: ...
|
154
|
+
def __or__(self, other: KeyRange) -> KeyRange: ...
|
155
|
+
def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
|
156
|
+
def __and__(self, other: KeyRange) -> KeyRange | None: ...
|
157
|
+
def contains(self, item: Key) -> bool: ...
|
158
|
+
def __contains__(self, item: Key) -> bool: ...
|
159
|
+
def is_disjoint(self, key_range: KeyRange) -> bool:
|
160
|
+
return self.end <= key_range.begin or self.begin >= key_range.end
|
161
|
+
|
162
|
+
@staticmethod
|
163
|
+
def beginning_with(begin: Key) -> KeyRange: ...
|
164
|
+
@staticmethod
|
165
|
+
def ending_with(end: Key) -> KeyRange: ...
|
166
|
+
@staticmethod
|
167
|
+
def full() -> KeyRange: ...
|
168
|
+
|
169
|
+
class KeySpan:
|
170
|
+
"""An exclusive range of keys as indexed by their position in a key space."""
|
171
|
+
|
172
|
+
def __init__(self, *, begin: int, end: int): ...
|
173
|
+
|
174
|
+
begin: int
|
175
|
+
end: int
|
176
|
+
|
177
|
+
def __len__(self) -> int: ...
|
178
|
+
def shift(self, offset: int) -> KeySpan: ...
|
179
|
+
def union(self, other: KeySpan) -> KeySpan: ...
|
180
|
+
def __or__(self, other: KeySpan) -> KeySpan: ...
|
181
|
+
|
182
|
+
class KeyMap:
|
183
|
+
"""Displacement map."""
|
184
|
+
|
185
|
+
class ManifestHandle:
|
186
|
+
def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
|
187
|
+
|
188
|
+
id: str
|
189
|
+
format: FileFormat
|
190
|
+
file_size: int
|
191
|
+
spfs_format_metadata: bytes | None
|
192
|
+
|
193
|
+
class Schema:
|
194
|
+
def to_arrow(self) -> pa.Schema:
|
195
|
+
"""Returns the Arrow schema."""
|
196
|
+
...
|
197
|
+
|
198
|
+
@staticmethod
|
199
|
+
def from_arrow(arrow: pa.Schema) -> Schema:
|
200
|
+
"""Creates a Schema from an Arrow schema."""
|
201
|
+
...
|
202
|
+
|
203
|
+
class VersionedSchema:
|
204
|
+
ts: int
|
205
|
+
schema: Schema
|
206
|
+
column_ids: list[str]
|
207
|
+
|
208
|
+
class KeySpaceWriteOp:
|
209
|
+
ks_id: str
|
210
|
+
manifest_handle: ManifestHandle
|
211
|
+
|
212
|
+
class FragmentSetWriteOp:
|
213
|
+
column_group: ColumnGroup
|
214
|
+
fs_id: str
|
215
|
+
fs_level: FragmentLevel
|
216
|
+
manifest_handle: ManifestHandle
|
217
|
+
key_span: KeySpan
|
218
|
+
key_extent: KeyExtent
|
219
|
+
column_ids: list[str]
|
220
|
+
|
221
|
+
class ConfigurationOp:
|
222
|
+
column_group: ColumnGroup
|
223
|
+
|
224
|
+
class SchemaEvolutionOp:
|
225
|
+
column_group: ColumnGroup
|
226
|
+
|
227
|
+
class SchemaBreakOp:
|
228
|
+
column_group: ColumnGroup
|
229
|
+
|
230
|
+
class WriteAheadLog:
|
231
|
+
def __init__(
|
232
|
+
self,
|
233
|
+
*,
|
234
|
+
entries: list[LogEntry] | None = None,
|
235
|
+
truncated_up_to: int = 0,
|
236
|
+
): ...
|
237
|
+
|
238
|
+
entries: list[LogEntry]
|
239
|
+
truncated_up_to: int
|
240
|
+
ks_manifest_handle: ManifestHandle | None
|
241
|
+
|
242
|
+
@property
|
243
|
+
def last_modified_at(self) -> int:
|
244
|
+
"""Returns the timestamp of the last modification of the log."""
|
245
|
+
|
246
|
+
def filter(
|
247
|
+
self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
|
248
|
+
) -> WriteAheadLog:
|
249
|
+
"""Filters the WAL to entries by the given parameters."""
|
250
|
+
|
251
|
+
def __bytes__(self):
|
252
|
+
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
253
|
+
|
254
|
+
@staticmethod
|
255
|
+
def from_proto(buffer: bytes) -> WriteAheadLog:
|
256
|
+
"""Deserializes a WriteAheadLog from a protobuf buffer."""
|
257
|
+
...
|