pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
spiral/cli/state.py
ADDED
spiral/cli/table.py
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import rich
|
4
|
+
from typer import Option
|
5
|
+
|
6
|
+
from spiral.api.tables import ListTables, Table
|
7
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
8
|
+
from spiral.cli.types import ProjectArg
|
9
|
+
|
10
|
+
app = AsyncTyper()
|
11
|
+
|
12
|
+
|
13
|
+
@app.command(help="List tables.")
|
14
|
+
def ls(
|
15
|
+
project: ProjectArg,
|
16
|
+
dataset: Annotated[OptionalStr, Option(help="Filter by dataset name.")] = None,
|
17
|
+
):
|
18
|
+
"""List tables."""
|
19
|
+
tables = list(state.settings.api.table.list(ListTables.Request(project_id=project, dataset=dataset)))
|
20
|
+
rich.print(printer.table_of_models(Table, tables, fields=["id", "project_id", "dataset", "table"]))
|
spiral/cli/token.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import rich
|
4
|
+
from typer import Argument, Option
|
5
|
+
|
6
|
+
from spiral.api.tokens import ListTokens, RevokeToken, Token
|
7
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
8
|
+
from spiral.cli.types import ProjectArg
|
9
|
+
|
10
|
+
app = AsyncTyper()
|
11
|
+
|
12
|
+
|
13
|
+
@app.command(help="List tokens.")
|
14
|
+
def ls(
|
15
|
+
project: ProjectArg,
|
16
|
+
on_behalf_of: Annotated[OptionalStr, Option(help="Filter by on behalf of.")] = None,
|
17
|
+
):
|
18
|
+
tokens = list(state.settings.api.token.list(ListTokens.Request(project_id=project, on_behalf_of=on_behalf_of)))
|
19
|
+
rich.print(printer.table_of_models(Token, tokens, fields=["id", "project_id", "on_behalf_of"]))
|
20
|
+
|
21
|
+
|
22
|
+
@app.command(help="Revoke a token.")
|
23
|
+
def revoke(token_id: Annotated[str, Argument(help="Token ID.")]):
|
24
|
+
res = state.settings.api.token.revoke(RevokeToken.Request(token_id=token_id))
|
25
|
+
rich.print(
|
26
|
+
f"Revoked token {res.token.id} for project {res.token.project_id} acting on behalf of {res.token.on_behalf_of}"
|
27
|
+
)
|
spiral/cli/types.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import questionary
|
4
|
+
import rich
|
5
|
+
import typer
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Argument
|
8
|
+
|
9
|
+
from spiral.api import OrganizationId, ProjectId
|
10
|
+
from spiral.cli import state
|
11
|
+
|
12
|
+
|
13
|
+
def _project_default():
|
14
|
+
projects = list(state.settings.api.project.list())
|
15
|
+
|
16
|
+
if not projects:
|
17
|
+
rich.print("[red]No projects found[/red]")
|
18
|
+
raise typer.Exit(1)
|
19
|
+
|
20
|
+
return questionary.select(
|
21
|
+
"Select a project",
|
22
|
+
choices=[
|
23
|
+
Choice(title=f"{project.id} - {project.name}" if project.name else project.id, value=project.id)
|
24
|
+
for project in projects
|
25
|
+
],
|
26
|
+
).ask()
|
27
|
+
|
28
|
+
|
29
|
+
ProjectArg = Annotated[ProjectId, Argument(help="Project ID", show_default=False, default_factory=_project_default)]
|
30
|
+
|
31
|
+
|
32
|
+
def _org_default():
|
33
|
+
memberships = list(state.settings.api.organization.list_user_memberships())
|
34
|
+
|
35
|
+
if not memberships:
|
36
|
+
rich.print("[red]No organizations found[/red]")
|
37
|
+
raise typer.Exit(1)
|
38
|
+
|
39
|
+
return questionary.select(
|
40
|
+
"Select an organization",
|
41
|
+
choices=[
|
42
|
+
Choice(
|
43
|
+
title=f"{m.organization.id} - {m.organization.name}" if m.organization.name else m.organization.id,
|
44
|
+
value=m.organization.id,
|
45
|
+
)
|
46
|
+
for m in memberships
|
47
|
+
],
|
48
|
+
).ask()
|
49
|
+
|
50
|
+
|
51
|
+
OrganizationArg = Annotated[
|
52
|
+
OrganizationId, Argument(help="Organization ID", show_default=False, default_factory=_org_default)
|
53
|
+
]
|
spiral/cli/workload.py
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
from typing import Annotated
|
2
|
+
|
3
|
+
import pyperclip
|
4
|
+
import questionary
|
5
|
+
import rich
|
6
|
+
from questionary import Choice
|
7
|
+
from typer import Argument, Option
|
8
|
+
|
9
|
+
from spiral.api.workloads import CreateWorkload, IssueToken, ListWorkloads, Workload
|
10
|
+
from spiral.cli import AsyncTyper, OptionalStr, printer, state
|
11
|
+
from spiral.cli.types import ProjectArg
|
12
|
+
|
13
|
+
app = AsyncTyper()
|
14
|
+
|
15
|
+
|
16
|
+
@app.command(help="Create a new workload.")
|
17
|
+
def create(
|
18
|
+
project: ProjectArg,
|
19
|
+
name: Annotated[OptionalStr, Option(help="Friendly name for the workload.")] = None,
|
20
|
+
):
|
21
|
+
res = state.settings.api.workload.create(CreateWorkload.Request(project_id=project, name=name))
|
22
|
+
rich.print(f"Created workload {res.workload.id}")
|
23
|
+
|
24
|
+
|
25
|
+
@app.command(help="List workloads.")
|
26
|
+
def ls(
|
27
|
+
project: ProjectArg,
|
28
|
+
):
|
29
|
+
workloads = list(state.settings.api.workload.list(ListWorkloads.Request(project_id=project)))
|
30
|
+
rich.print(printer.table_of_models(Workload, workloads, fields=["id", "project_id", "name"]))
|
31
|
+
|
32
|
+
|
33
|
+
@app.command(help="Issue a token.")
|
34
|
+
def token(workload_id: Annotated[str, Argument(help="Workload ID.")]):
|
35
|
+
res = state.settings.api.workload.issue_token(IssueToken.Request(workload_id=workload_id))
|
36
|
+
|
37
|
+
while True:
|
38
|
+
choice = questionary.select(
|
39
|
+
"What would you like to do with the secret? You will not be able to see this secret again!",
|
40
|
+
choices=[
|
41
|
+
Choice(title="Copy to clipboard", value=1),
|
42
|
+
Choice(title="Print to console", value=2),
|
43
|
+
Choice(title="Exit", value=3),
|
44
|
+
],
|
45
|
+
).ask()
|
46
|
+
|
47
|
+
if choice == 1:
|
48
|
+
pyperclip.copy(res.token_secret)
|
49
|
+
rich.print("[green]Secret copied to clipboard![/green]")
|
50
|
+
break
|
51
|
+
elif choice == 2:
|
52
|
+
rich.print(f"[green]Token Secret:[/green] {res.token_secret}")
|
53
|
+
break
|
54
|
+
elif choice == 3:
|
55
|
+
break
|
56
|
+
else:
|
57
|
+
rich.print("[red]Invalid choice. Please try again.[/red]")
|
58
|
+
|
59
|
+
rich.print(f"[green]Token ID:[/green] {res.token_id}")
|
spiral/config.py
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
4
|
+
|
5
|
+
FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
|
6
|
+
|
7
|
+
|
8
|
+
class Config(BaseSettings):
|
9
|
+
model_config = SettingsConfigDict(
|
10
|
+
env_nested_delimiter="__",
|
11
|
+
env_prefix="SPIRAL_CORE__",
|
12
|
+
frozen=True,
|
13
|
+
)
|
14
|
+
|
15
|
+
partition_file_min_size: int = 256 * 1024 * 1024 # 256MB
|
16
|
+
flush_wal_on_write: bool = False
|
17
|
+
|
18
|
+
# TODO(marko): Support config. Unused after migration to Rust.
|
19
|
+
# #: Defaults to ThreadPoolExecutor's default (based on os.cpu_count().
|
20
|
+
# scan_num_threads: int | None = 61 # 61 is used by Golang and Tokio, for some reason...
|
21
|
+
#
|
22
|
+
# #: The duration of WAL that is preserved to allow for txn conflict resolution.
|
23
|
+
# transaction_window: int = 0 if DEV else timedelta(days=1).total_seconds()
|
24
|
+
#
|
25
|
+
# #: Truncation length of string statistics.
|
26
|
+
# string_truncation_length: int = 1024
|
spiral/core/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
from spiral.core.manifests import FragmentManifest
|
5
|
+
from spiral.core.metastore import PyMetastore
|
6
|
+
from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, KeyRange, Schema, WriteAheadLog
|
7
|
+
from spiral.expressions import Expr
|
8
|
+
|
9
|
+
class Table:
|
10
|
+
def __init__(self, metastore: PyMetastore): ...
|
11
|
+
|
12
|
+
id: str
|
13
|
+
root_uri: str
|
14
|
+
key_schema: Schema
|
15
|
+
metastore: PyMetastore
|
16
|
+
|
17
|
+
def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
|
18
|
+
def get_column_group_metadata(self, column_group: ColumnGroup, *, asof: int | None) -> ColumnGroupMetadata: ...
|
19
|
+
def list_column_groups(self, *, asof: int | None) -> list[ColumnGroup] | None: ...
|
20
|
+
def get_schema(self, *, asof: int | None) -> Schema: ...
|
21
|
+
|
22
|
+
class TableScan:
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
projection: Expr,
|
26
|
+
filter: Expr | None = None,
|
27
|
+
asof: int | None = None,
|
28
|
+
exclude_keys: bool = False,
|
29
|
+
aux_schema: pa.Schema | None = None,
|
30
|
+
) -> TableScan: ...
|
31
|
+
def key_schema(self) -> Schema: ...
|
32
|
+
def schema(self) -> Schema: ...
|
33
|
+
def is_empty(self) -> bool: ...
|
34
|
+
def split(self) -> list[KeyRange]: ...
|
35
|
+
def table_ids(self) -> list[str]: ...
|
36
|
+
def column_groups(self) -> list[ColumnGroup]: ...
|
37
|
+
def to_record_batches(self, aux_table: pa.Table | pa.RecordBatch | None = None) -> pa.RecordBatchReader: ...
|
38
|
+
def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
|
39
|
+
def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
|
40
|
+
def metrics(self) -> dict[str, Any]: ...
|
41
|
+
|
42
|
+
class KeySpaceScan:
|
43
|
+
manifest: FragmentManifest
|
44
|
+
|
45
|
+
def key_schema(self) -> Schema: ...
|
46
|
+
|
47
|
+
class ColumnGroupScan:
|
48
|
+
manifest: FragmentManifest
|
49
|
+
|
50
|
+
def schema(self) -> Schema: ...
|
51
|
+
|
52
|
+
def write(table: Table, expr: Expr, format: str = "parquet", *, partition_size: int | None = None): ...
|
53
|
+
def flush_wal(table: Table, manifest_format: str = "parquet"): ...
|
@@ -0,0 +1,53 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
from spiral.core.spec import FileFormat, FragmentLevel, KeyExtent, KeyMap, KeyRange, KeySpan
|
3
|
+
from spiral.types_ import Timestamp
|
4
|
+
|
5
|
+
class FragmentManifest:
|
6
|
+
def __len__(self): ...
|
7
|
+
def __getitem__(self, idx: int): ...
|
8
|
+
def to_arrow(self) -> pa.RecordBatchReader: ...
|
9
|
+
@staticmethod
|
10
|
+
def compute_schema(data_schema: pa.Schema) -> pa.Schema: ...
|
11
|
+
@staticmethod
|
12
|
+
def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
|
13
|
+
@staticmethod
|
14
|
+
def from_arrow(reader: pa.RecordBatchReader) -> FragmentManifest: ...
|
15
|
+
@staticmethod
|
16
|
+
def empty() -> FragmentManifest: ...
|
17
|
+
|
18
|
+
class FragmentFile:
|
19
|
+
id: str
|
20
|
+
committed_at: Timestamp | None
|
21
|
+
compacted_at: Timestamp | None
|
22
|
+
format: FileFormat
|
23
|
+
format_metadata: bytes | None
|
24
|
+
size_bytes: int
|
25
|
+
# NOTE: Empty for keyspace file.
|
26
|
+
column_ids: list[str]
|
27
|
+
fs_id: str
|
28
|
+
fs_level: FragmentLevel
|
29
|
+
ks_id: str
|
30
|
+
key_span: KeySpan
|
31
|
+
key_extent: KeyExtent
|
32
|
+
key_map: KeyMap | None
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
*,
|
37
|
+
id: str,
|
38
|
+
committed_at: Timestamp | None,
|
39
|
+
compacted_at: Timestamp | None,
|
40
|
+
format: FileFormat,
|
41
|
+
format_metadata: bytes,
|
42
|
+
size_bytes: int,
|
43
|
+
column_ids: list[str],
|
44
|
+
fs_id: str,
|
45
|
+
fs_level: FragmentLevel,
|
46
|
+
ks_id: str,
|
47
|
+
key_span: KeySpan,
|
48
|
+
key_extent: KeyExtent,
|
49
|
+
key_map: KeyMap | None,
|
50
|
+
stats: pa.StructArray,
|
51
|
+
): ...
|
52
|
+
@property
|
53
|
+
def key_range(self) -> KeyRange: ...
|
@@ -0,0 +1,91 @@
|
|
1
|
+
"""The SpiralDB metastore API."""
|
2
|
+
|
3
|
+
from collections.abc import Callable
|
4
|
+
|
5
|
+
from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, LogEntry, Schema, WriteAheadLog
|
6
|
+
from spiral.types_ import Timestamp, Uri
|
7
|
+
from spiraldb.proto.spiral.table import ManifestHandle
|
8
|
+
|
9
|
+
class FileHandle:
|
10
|
+
def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
|
11
|
+
|
12
|
+
uri: str
|
13
|
+
format: FileFormat
|
14
|
+
spfs_token: str | None
|
15
|
+
|
16
|
+
class FileRef:
|
17
|
+
def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
|
18
|
+
|
19
|
+
id: str
|
20
|
+
file_type: FileType
|
21
|
+
file_format: FileFormat
|
22
|
+
|
23
|
+
def resolve_uri(self, root_uri: str) -> str:
|
24
|
+
"""Resolves the file reference URI given the root URI."""
|
25
|
+
|
26
|
+
class FileType:
|
27
|
+
FragmentFile: FileType
|
28
|
+
FragmentManifest: FileType
|
29
|
+
ReferenceFile: FileType
|
30
|
+
|
31
|
+
def __int__(self) -> int:
|
32
|
+
"""Returns the protobuf enum int value."""
|
33
|
+
|
34
|
+
class PyMetastore:
|
35
|
+
"""Rust implementation of the metastore API."""
|
36
|
+
|
37
|
+
@property
|
38
|
+
def table_id(self) -> str: ...
|
39
|
+
@property
|
40
|
+
def root_uri(self) -> Uri: ...
|
41
|
+
@property
|
42
|
+
def key_schema(self) -> Schema: ...
|
43
|
+
def get_wal(self) -> WriteAheadLog:
|
44
|
+
"""Return the log for the table."""
|
45
|
+
...
|
46
|
+
|
47
|
+
def append_wal(self, prev_last_modified_at: Timestamp, entries: list[LogEntry]) -> WriteAheadLog:
|
48
|
+
"""Append additional entries into the write-ahead log given the previous write-ahead log timestamp.
|
49
|
+
|
50
|
+
The given entries should have a timestamp of zero and will be assigned an actual timestamp by the server.
|
51
|
+
|
52
|
+
This API is designed to support both a trivial compare-and-swap on the WAL, and also to support more advanced
|
53
|
+
conflict resolution within the metastore.
|
54
|
+
"""
|
55
|
+
...
|
56
|
+
|
57
|
+
def update_wal(
|
58
|
+
self,
|
59
|
+
prev_ks_manifest_handle_id: str,
|
60
|
+
truncate_ts_max: Timestamp | None = None,
|
61
|
+
new_ks_manifest_handle: ManifestHandle | None = None,
|
62
|
+
) -> WriteAheadLog:
|
63
|
+
"""Update the write-ahead log atomically.
|
64
|
+
|
65
|
+
Supports WAL truncation and manifest handle updates necessary for flushing.
|
66
|
+
"""
|
67
|
+
...
|
68
|
+
|
69
|
+
def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
|
70
|
+
"""Return the metadata for column group."""
|
71
|
+
...
|
72
|
+
|
73
|
+
def update_column_group_metadata(
|
74
|
+
self, prev_last_modified_at: Timestamp, column_group_metadata: ColumnGroupMetadata
|
75
|
+
) -> ColumnGroupMetadata:
|
76
|
+
"""Update the column group metadata to the metastore given the previous metadata timestamp."""
|
77
|
+
...
|
78
|
+
|
79
|
+
def list_column_groups(self) -> tuple[list[ColumnGroup], Timestamp]:
|
80
|
+
"""List all column groups in the table, or None if no index is available."""
|
81
|
+
...
|
82
|
+
|
83
|
+
@staticmethod
|
84
|
+
def http(
|
85
|
+
table_id: str, root_uri: str, key_schema: Schema, base_url: str, token_provider: Callable[[], str]
|
86
|
+
) -> PyMetastore:
|
87
|
+
"""Construct a PyMetastore backed by an HTTP metastore service."""
|
88
|
+
|
89
|
+
@staticmethod
|
90
|
+
def test(table_id: str, root_uri: str, key_schema: Schema) -> PyMetastore:
|
91
|
+
"""Construct a PyMetastore backed by an in-memory mock metastore service."""
|
@@ -0,0 +1,257 @@
|
|
1
|
+
"""Type definitions for the spiral.core.spec module shipped as part of the native library."""
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
class ColumnGroup:
|
6
|
+
def __init__(self, path: list[str]): ...
|
7
|
+
@property
|
8
|
+
def table_id(self) -> str: ...
|
9
|
+
@property
|
10
|
+
def path(self) -> list[str]: ...
|
11
|
+
def identifier(self, salt: int) -> str:
|
12
|
+
"""Return the column group identifier based on the given salt."""
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def from_str(path: str) -> ColumnGroup: ...
|
16
|
+
|
17
|
+
class ColumnGroupMetadata:
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
*,
|
21
|
+
column_group: ColumnGroup,
|
22
|
+
manifest_handle: ManifestHandle | None,
|
23
|
+
last_modified_at: int,
|
24
|
+
schema_versions: list[VersionedSchema] | None,
|
25
|
+
immutable_schema: bool,
|
26
|
+
schema_salt: int,
|
27
|
+
): ...
|
28
|
+
|
29
|
+
column_group: ColumnGroup
|
30
|
+
manifest_handle: ManifestHandle | None
|
31
|
+
last_modified_at: int
|
32
|
+
schema_versions: list[VersionedSchema]
|
33
|
+
immutable_schema: bool
|
34
|
+
schema_salt: int
|
35
|
+
|
36
|
+
def latest_schema(self) -> VersionedSchema:
|
37
|
+
"""Returns the latest schema of the column group."""
|
38
|
+
...
|
39
|
+
|
40
|
+
def asof(self, asof: int) -> ColumnGroupMetadata:
|
41
|
+
"""Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
|
42
|
+
...
|
43
|
+
|
44
|
+
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
45
|
+
"""Applies the given WAL to the metadata."""
|
46
|
+
|
47
|
+
def __bytes__(self):
|
48
|
+
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
49
|
+
|
50
|
+
@staticmethod
|
51
|
+
def from_proto(buffer: bytes) -> ColumnGroupMetadata:
|
52
|
+
"""Deserializes a ColumnGroupMetadata from a protobuf buffer."""
|
53
|
+
...
|
54
|
+
|
55
|
+
class LogEntry:
|
56
|
+
ts: int
|
57
|
+
operation: KeySpaceWriteOp | FragmentSetWriteOp | ConfigurationOp | SchemaEvolutionOp | SchemaBreakOp
|
58
|
+
|
59
|
+
def column_group(self) -> ColumnGroup | None:
|
60
|
+
"""Returns the column group of the entry if it is associated with one."""
|
61
|
+
|
62
|
+
def replace_timestamp(self, ts: int) -> LogEntry:
|
63
|
+
"""Returns a copy of the entry with the timestamp replaced."""
|
64
|
+
|
65
|
+
@staticmethod
|
66
|
+
def schema_break(*, column_group: ColumnGroup, removed_column_names: list[str]) -> LogEntry: ...
|
67
|
+
@staticmethod
|
68
|
+
def schema_evolution(*, column_group: ColumnGroup, new_schema: Schema) -> LogEntry: ...
|
69
|
+
@staticmethod
|
70
|
+
def ks_write(
|
71
|
+
*,
|
72
|
+
ks_id: str,
|
73
|
+
manifest_handle: ManifestHandle,
|
74
|
+
) -> LogEntry: ...
|
75
|
+
@staticmethod
|
76
|
+
def fs_write(
|
77
|
+
*,
|
78
|
+
column_group: ColumnGroup,
|
79
|
+
fs_id: str,
|
80
|
+
fs_level: FragmentLevel,
|
81
|
+
manifest_handle: ManifestHandle,
|
82
|
+
key_span: KeySpan,
|
83
|
+
key_extent: KeyExtent,
|
84
|
+
column_ids: list[str],
|
85
|
+
) -> LogEntry: ...
|
86
|
+
|
87
|
+
class FileFormat:
|
88
|
+
def __init__(self, value: int): ...
|
89
|
+
|
90
|
+
Parquet: FileFormat
|
91
|
+
Protobuf: FileFormat
|
92
|
+
BinaryArray: FileFormat
|
93
|
+
|
94
|
+
def __int__(self) -> int:
|
95
|
+
"""Returns the protobuf enum int value."""
|
96
|
+
...
|
97
|
+
|
98
|
+
class FragmentLevel:
|
99
|
+
L0: FragmentLevel
|
100
|
+
L1: FragmentLevel
|
101
|
+
|
102
|
+
def __int__(self) -> int:
|
103
|
+
"""Returns the protobuf enum int value."""
|
104
|
+
...
|
105
|
+
|
106
|
+
class Key:
|
107
|
+
def __init__(self, key: bytes): ...
|
108
|
+
|
109
|
+
key: bytes
|
110
|
+
|
111
|
+
def __add__(self, other: Key) -> Key:
|
112
|
+
"""Concatenates two keys.
|
113
|
+
|
114
|
+
TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
|
115
|
+
|
116
|
+
def __bytes__(self): ...
|
117
|
+
def step(self) -> Key:
|
118
|
+
"""Returns the next key in the key space."""
|
119
|
+
|
120
|
+
@staticmethod
|
121
|
+
def min() -> Key: ...
|
122
|
+
@staticmethod
|
123
|
+
def max() -> Key: ...
|
124
|
+
@staticmethod
|
125
|
+
def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
|
126
|
+
|
127
|
+
class KeyExtent:
|
128
|
+
"""An inclusive range of keys."""
|
129
|
+
|
130
|
+
def __init__(self, *, min: Key, max: Key): ...
|
131
|
+
|
132
|
+
min: Key
|
133
|
+
max: Key
|
134
|
+
|
135
|
+
def to_range(self) -> KeyRange:
|
136
|
+
"""Turn this inclusive key extent into an exclusive key range."""
|
137
|
+
|
138
|
+
def union(self, key_extent: KeyExtent) -> KeyExtent: ...
|
139
|
+
def __or__(self, other: KeyExtent) -> KeyExtent: ...
|
140
|
+
def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
|
141
|
+
def __and__(self, other: KeyExtent) -> KeyExtent | None: ...
|
142
|
+
def contains(self, item: Key) -> bool: ...
|
143
|
+
def __contains__(self, item: Key) -> bool: ...
|
144
|
+
|
145
|
+
class KeyRange:
|
146
|
+
"""A right-exclusive range of keys."""
|
147
|
+
|
148
|
+
def __init__(self, *, begin: Key, end: Key): ...
|
149
|
+
|
150
|
+
begin: Key
|
151
|
+
end: Key
|
152
|
+
|
153
|
+
def union(self, other: KeyRange) -> KeyRange: ...
|
154
|
+
def __or__(self, other: KeyRange) -> KeyRange: ...
|
155
|
+
def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
|
156
|
+
def __and__(self, other: KeyRange) -> KeyRange | None: ...
|
157
|
+
def contains(self, item: Key) -> bool: ...
|
158
|
+
def __contains__(self, item: Key) -> bool: ...
|
159
|
+
def is_disjoint(self, key_range: KeyRange) -> bool:
|
160
|
+
return self.end <= key_range.begin or self.begin >= key_range.end
|
161
|
+
|
162
|
+
@staticmethod
|
163
|
+
def beginning_with(begin: Key) -> KeyRange: ...
|
164
|
+
@staticmethod
|
165
|
+
def ending_with(end: Key) -> KeyRange: ...
|
166
|
+
@staticmethod
|
167
|
+
def full() -> KeyRange: ...
|
168
|
+
|
169
|
+
class KeySpan:
|
170
|
+
"""An exclusive range of keys as indexed by their position in a key space."""
|
171
|
+
|
172
|
+
def __init__(self, *, begin: int, end: int): ...
|
173
|
+
|
174
|
+
begin: int
|
175
|
+
end: int
|
176
|
+
|
177
|
+
def __len__(self) -> int: ...
|
178
|
+
def shift(self, offset: int) -> KeySpan: ...
|
179
|
+
def union(self, other: KeySpan) -> KeySpan: ...
|
180
|
+
def __or__(self, other: KeySpan) -> KeySpan: ...
|
181
|
+
|
182
|
+
class KeyMap:
|
183
|
+
"""Displacement map."""
|
184
|
+
|
185
|
+
class ManifestHandle:
|
186
|
+
def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
|
187
|
+
|
188
|
+
id: str
|
189
|
+
format: FileFormat
|
190
|
+
file_size: int
|
191
|
+
spfs_format_metadata: bytes | None
|
192
|
+
|
193
|
+
class Schema:
|
194
|
+
def to_arrow(self) -> pa.Schema:
|
195
|
+
"""Returns the Arrow schema."""
|
196
|
+
...
|
197
|
+
|
198
|
+
@staticmethod
|
199
|
+
def from_arrow(arrow: pa.Schema) -> Schema:
|
200
|
+
"""Creates a Schema from an Arrow schema."""
|
201
|
+
...
|
202
|
+
|
203
|
+
class VersionedSchema:
|
204
|
+
ts: int
|
205
|
+
schema: Schema
|
206
|
+
column_ids: list[str]
|
207
|
+
|
208
|
+
class KeySpaceWriteOp:
|
209
|
+
ks_id: str
|
210
|
+
manifest_handle: ManifestHandle
|
211
|
+
|
212
|
+
class FragmentSetWriteOp:
|
213
|
+
column_group: ColumnGroup
|
214
|
+
fs_id: str
|
215
|
+
fs_level: FragmentLevel
|
216
|
+
manifest_handle: ManifestHandle
|
217
|
+
key_span: KeySpan
|
218
|
+
key_extent: KeyExtent
|
219
|
+
column_ids: list[str]
|
220
|
+
|
221
|
+
class ConfigurationOp:
|
222
|
+
column_group: ColumnGroup
|
223
|
+
|
224
|
+
class SchemaEvolutionOp:
|
225
|
+
column_group: ColumnGroup
|
226
|
+
|
227
|
+
class SchemaBreakOp:
|
228
|
+
column_group: ColumnGroup
|
229
|
+
|
230
|
+
class WriteAheadLog:
|
231
|
+
def __init__(
|
232
|
+
self,
|
233
|
+
*,
|
234
|
+
entries: list[LogEntry] | None = None,
|
235
|
+
truncated_up_to: int = 0,
|
236
|
+
): ...
|
237
|
+
|
238
|
+
entries: list[LogEntry]
|
239
|
+
truncated_up_to: int
|
240
|
+
ks_manifest_handle: ManifestHandle | None
|
241
|
+
|
242
|
+
@property
|
243
|
+
def last_modified_at(self) -> int:
|
244
|
+
"""Returns the timestamp of the last modification of the log."""
|
245
|
+
|
246
|
+
def filter(
|
247
|
+
self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
|
248
|
+
) -> WriteAheadLog:
|
249
|
+
"""Filters the WAL to entries by the given parameters."""
|
250
|
+
|
251
|
+
def __bytes__(self):
|
252
|
+
"""Serializes the ColumnGroupMetadata to a protobuf buffer."""
|
253
|
+
|
254
|
+
@staticmethod
|
255
|
+
def from_proto(buffer: bytes) -> WriteAheadLog:
|
256
|
+
"""Deserializes a WriteAheadLog from a protobuf buffer."""
|
257
|
+
...
|