pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/METADATA +12 -14
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -7
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +15 -172
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +83 -18
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +22 -21
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/table/__init__.pyi +108 -0
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +49 -92
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +40 -17
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +62 -0
  47. spiral/expressions/png.py +18 -0
  48. spiral/expressions/qoi.py +18 -0
  49. spiral/expressions/refs.py +23 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/tiff.py +88 -88
  53. spiral/expressions/udf.py +3 -3
  54. spiral/iceberg/__init__.py +3 -0
  55. spiral/iceberg/client.py +33 -0
  56. spiral/indexes/__init__.py +5 -0
  57. spiral/indexes/client.py +137 -0
  58. spiral/indexes/index.py +34 -0
  59. spiral/indexes/scan.py +22 -0
  60. spiral/project.py +19 -110
  61. spiral/{proto → protogen}/_/scandal/__init__.py +32 -77
  62. spiral/protogen/_/spiral/table/__init__.py +22 -0
  63. spiral/protogen/substrait/__init__.py +3399 -0
  64. spiral/protogen/substrait/extensions/__init__.py +115 -0
  65. spiral/server.py +17 -0
  66. spiral/settings.py +31 -87
  67. spiral/substrait_.py +10 -6
  68. spiral/tables/__init__.py +12 -0
  69. spiral/tables/client.py +130 -0
  70. spiral/{dataset.py → tables/dataset.py} +36 -25
  71. spiral/tables/debug/manifests.py +70 -0
  72. spiral/tables/debug/metrics.py +56 -0
  73. spiral/{debug.py → tables/debug/scan.py} +6 -9
  74. spiral/tables/maintenance.py +12 -0
  75. spiral/tables/scan.py +193 -0
  76. spiral/tables/snapshot.py +78 -0
  77. spiral/tables/table.py +157 -0
  78. spiral/tables/transaction.py +52 -0
  79. pyspiral-0.2.5.dist-info/RECORD +0 -81
  80. spiral/api/tables.py +0 -94
  81. spiral/api/tokens.py +0 -56
  82. spiral/authn/authn.py +0 -89
  83. spiral/authn/device.py +0 -206
  84. spiral/authn/github_.py +0 -33
  85. spiral/authn/modal_.py +0 -18
  86. spiral/cli/org.py +0 -90
  87. spiral/cli/project.py +0 -107
  88. spiral/cli/table.py +0 -20
  89. spiral/cli/token.py +0 -27
  90. spiral/config.py +0 -26
  91. spiral/core/core/__init__.pyi +0 -53
  92. spiral/core/metastore/__init__.pyi +0 -91
  93. spiral/proto/_/spfs/__init__.py +0 -36
  94. spiral/proto/_/spiral/table/__init__.py +0 -225
  95. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  96. spiral/proto/__init__.py +0 -0
  97. spiral/proto/scandal/__init__.py +0 -45
  98. spiral/proto/spiral/__init__.py +0 -0
  99. spiral/proto/spiral/table/__init__.py +0 -96
  100. spiral/scan_.py +0 -168
  101. spiral/table.py +0 -157
  102. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  103. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  104. /spiral/{core → protogen/_}/__init__.py +0 -0
  105. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  106. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  107. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  109. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  110. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  111. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  112. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  113. /spiral/{proto → protogen}/util.py +0 -0
  114. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
spiral/cli/project.py DELETED
@@ -1,107 +0,0 @@
1
- from typing import Annotated
2
-
3
- import rich
4
- import typer
5
- from typer import Option
6
-
7
- from spiral.api.organizations import OrganizationRole
8
- from spiral.api.projects import CreateProject, Grant, GrantRole, ListGrants, Project
9
- from spiral.cli import AsyncTyper, OptionalStr, printer, state
10
- from spiral.cli.org import current_org_id
11
- from spiral.cli.types import ProjectArg
12
-
13
- app = AsyncTyper()
14
-
15
-
16
- @app.command(help="List projects.")
17
- def ls():
18
- projects = list(state.settings.api.project.list())
19
- rich.print(printer.table_of_models(Project, projects))
20
-
21
-
22
- @app.command(help="Create a new project.")
23
- def create(
24
- id_prefix: Annotated[
25
- OptionalStr, Option(help="An optional ID prefix to which a random number will be appended.")
26
- ] = None,
27
- org_id: Annotated[OptionalStr, Option(help="Organization ID in which to create the project.")] = None,
28
- name: Annotated[OptionalStr, Option(help="Friendly name for the project.")] = None,
29
- ):
30
- res = state.settings.api.project.create(
31
- CreateProject.Request(organization_id=org_id or current_org_id(), id_prefix=id_prefix, name=name)
32
- )
33
- rich.print(f"Created project {res.project.id}")
34
-
35
-
36
- @app.command(help="Grant a role on a project.")
37
- def grant(
38
- project: ProjectArg,
39
- role: Annotated[str, Option(help="Role to grant.")],
40
- org_id: Annotated[
41
- OptionalStr, Option(help="Pass an organization ID to grant a role to an organization user(s).")
42
- ] = None,
43
- user_id: Annotated[
44
- OptionalStr, Option(help="Pass a user ID when using --org-id to grant a role to grant a role to a user.")
45
- ] = None,
46
- org_role: Annotated[
47
- OptionalStr,
48
- Option(help="Pass an organization role when using --org-id to grant a role to all users with that role."),
49
- ] = None,
50
- workload_id: Annotated[OptionalStr, Option(help="Pass a workload ID to grant a role to a workload.")] = None,
51
- github: Annotated[
52
- OptionalStr, Option(help="Pass an `<org>/<repo>` string to grant a role to a job running in GitHub Actions.")
53
- ] = None,
54
- modal: Annotated[
55
- OptionalStr,
56
- Option(help="Pass a `<workspace_id>/<env_name>` string to grant a role to a job running in Modal environment."),
57
- ] = None,
58
- conditions: list[str] = Option(
59
- default_factory=list,
60
- help="`<key>=<value>` token conditions to apply to the grant when using --github or --modal.",
61
- ),
62
- ):
63
- # Check mutual exclusion
64
- if sum(int(bool(opt)) for opt in {org_id, workload_id, github, modal}) != 1:
65
- raise typer.BadParameter("Only one of --org-id, --github or --modal may be specified.")
66
-
67
- if github:
68
- org, repo = github.split("/", 1)
69
- conditions = {GrantRole.GitHubClaim(k): v for k, v in dict(c.split("=", 1) for c in conditions).items()}
70
- principal = GrantRole.GitHubPrincipal(org=org, repo=repo, conditions=conditions)
71
- elif modal:
72
- workspace_id, environment_name = modal.split("/", 1)
73
- conditions = {GrantRole.ModalClaim(k): v for k, v in dict(c.split("=", 1) for c in conditions).items()}
74
- principal = GrantRole.ModalPrincipal(
75
- workspace_id=workspace_id, environment_name=environment_name, conditions=conditions
76
- )
77
- elif org_id:
78
- # Check mutual exclusion
79
- if sum(int(bool(opt)) for opt in {user_id, org_role}) != 1:
80
- raise typer.BadParameter("Only one of --user-id or --org-role may be specified.")
81
-
82
- if user_id is not None:
83
- principal = GrantRole.OrgUserPrincipal(org_id=org_id, user_id=user_id)
84
- elif org_role is None:
85
- principal = GrantRole.OrgRolePrincipal(org_id=org_id, role=OrganizationRole(org_role))
86
- else:
87
- raise NotImplementedError("Only user or role principal is supported at this time.")
88
- elif workload_id:
89
- principal = GrantRole.WorkloadPrincipal(workload_id=workload_id)
90
- else:
91
- raise NotImplementedError("Only organization, GitHub or Modal principal is supported at this time.")
92
-
93
- state.settings.api.project.grant_role(
94
- GrantRole.Request(
95
- project_id=project,
96
- role_id=role,
97
- principal=principal,
98
- )
99
- )
100
-
101
- rich.print(f"Granted role {role} on project {project}")
102
-
103
-
104
- @app.command(help="List project grants.")
105
- def grants(project: ProjectArg):
106
- project_grants = list(state.settings.api.project.list_grants(ListGrants.Request(project_id=project)))
107
- rich.print(printer.table_of_models(Grant, project_grants, title="Project Grants"))
spiral/cli/table.py DELETED
@@ -1,20 +0,0 @@
1
- from typing import Annotated
2
-
3
- import rich
4
- from typer import Option
5
-
6
- from spiral.api.tables import ListTables, Table
7
- from spiral.cli import AsyncTyper, OptionalStr, printer, state
8
- from spiral.cli.types import ProjectArg
9
-
10
- app = AsyncTyper()
11
-
12
-
13
- @app.command(help="List tables.")
14
- def ls(
15
- project: ProjectArg,
16
- dataset: Annotated[OptionalStr, Option(help="Filter by dataset name.")] = None,
17
- ):
18
- """List tables."""
19
- tables = list(state.settings.api.table.list(ListTables.Request(project_id=project, dataset=dataset)))
20
- rich.print(printer.table_of_models(Table, tables, fields=["id", "project_id", "dataset", "table"]))
spiral/cli/token.py DELETED
@@ -1,27 +0,0 @@
1
- from typing import Annotated
2
-
3
- import rich
4
- from typer import Argument, Option
5
-
6
- from spiral.api.tokens import ListTokens, RevokeToken, Token
7
- from spiral.cli import AsyncTyper, OptionalStr, printer, state
8
- from spiral.cli.types import ProjectArg
9
-
10
- app = AsyncTyper()
11
-
12
-
13
- @app.command(help="List tokens.")
14
- def ls(
15
- project: ProjectArg,
16
- on_behalf_of: Annotated[OptionalStr, Option(help="Filter by on behalf of.")] = None,
17
- ):
18
- tokens = list(state.settings.api.token.list(ListTokens.Request(project_id=project, on_behalf_of=on_behalf_of)))
19
- rich.print(printer.table_of_models(Token, tokens, fields=["id", "project_id", "on_behalf_of"]))
20
-
21
-
22
- @app.command(help="Revoke a token.")
23
- def revoke(token_id: Annotated[str, Argument(help="Token ID.")]):
24
- res = state.settings.api.token.revoke(RevokeToken.Request(token_id=token_id))
25
- rich.print(
26
- f"Revoked token {res.token.id} for project {res.token.project_id} acting on behalf of {res.token.on_behalf_of}"
27
- )
spiral/config.py DELETED
@@ -1,26 +0,0 @@
1
- import os
2
-
3
- from pydantic_settings import BaseSettings, SettingsConfigDict
4
-
5
- FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
6
-
7
-
8
- class Config(BaseSettings):
9
- model_config = SettingsConfigDict(
10
- env_nested_delimiter="__",
11
- env_prefix="SPIRAL_CORE__",
12
- frozen=True,
13
- )
14
-
15
- partition_file_min_size: int = 256 * 1024 * 1024 # 256MB
16
- flush_wal_on_write: bool = False
17
-
18
- # TODO(marko): Support config. Unused after migration to Rust.
19
- # #: Defaults to ThreadPoolExecutor's default (based on os.cpu_count().
20
- # scan_num_threads: int | None = 61 # 61 is used by Golang and Tokio, for some reason...
21
- #
22
- # #: The duration of WAL that is preserved to allow for txn conflict resolution.
23
- # transaction_window: int = 0 if DEV else timedelta(days=1).total_seconds()
24
- #
25
- # #: Truncation length of string statistics.
26
- # string_truncation_length: int = 1024
@@ -1,53 +0,0 @@
1
- from typing import Any
2
-
3
- import pyarrow as pa
4
- from spiral.core.manifests import FragmentManifest
5
- from spiral.core.metastore import PyMetastore
6
- from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, KeyRange, Schema, WriteAheadLog
7
- from spiral.expressions import Expr
8
-
9
- class Table:
10
- def __init__(self, metastore: PyMetastore): ...
11
-
12
- id: str
13
- root_uri: str
14
- key_schema: Schema
15
- metastore: PyMetastore
16
-
17
- def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
18
- def get_column_group_metadata(self, column_group: ColumnGroup, *, asof: int | None) -> ColumnGroupMetadata: ...
19
- def list_column_groups(self, *, asof: int | None) -> list[ColumnGroup] | None: ...
20
- def get_schema(self, *, asof: int | None) -> Schema: ...
21
-
22
- class TableScan:
23
- def __init__(
24
- self,
25
- projection: Expr,
26
- filter: Expr | None = None,
27
- asof: int | None = None,
28
- exclude_keys: bool = False,
29
- aux_schema: pa.Schema | None = None,
30
- ) -> TableScan: ...
31
- def key_schema(self) -> Schema: ...
32
- def schema(self) -> Schema: ...
33
- def is_empty(self) -> bool: ...
34
- def split(self) -> list[KeyRange]: ...
35
- def table_ids(self) -> list[str]: ...
36
- def column_groups(self) -> list[ColumnGroup]: ...
37
- def to_record_batches(self, aux_table: pa.Table | pa.RecordBatch | None = None) -> pa.RecordBatchReader: ...
38
- def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
39
- def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
40
- def metrics(self) -> dict[str, Any]: ...
41
-
42
- class KeySpaceScan:
43
- manifest: FragmentManifest
44
-
45
- def key_schema(self) -> Schema: ...
46
-
47
- class ColumnGroupScan:
48
- manifest: FragmentManifest
49
-
50
- def schema(self) -> Schema: ...
51
-
52
- def write(table: Table, expr: Expr, format: str = "parquet", *, partition_size: int | None = None): ...
53
- def flush_wal(table: Table, manifest_format: str = "parquet"): ...
@@ -1,91 +0,0 @@
1
- """The SpiralDB metastore API."""
2
-
3
- from collections.abc import Callable
4
-
5
- from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, LogEntry, Schema, WriteAheadLog
6
- from spiral.types_ import Timestamp, Uri
7
- from spiraldb.proto.spiral.table import ManifestHandle
8
-
9
- class FileHandle:
10
- def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
11
-
12
- uri: str
13
- format: FileFormat
14
- spfs_token: str | None
15
-
16
- class FileRef:
17
- def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
18
-
19
- id: str
20
- file_type: FileType
21
- file_format: FileFormat
22
-
23
- def resolve_uri(self, root_uri: str) -> str:
24
- """Resolves the file reference URI given the root URI."""
25
-
26
- class FileType:
27
- FragmentFile: FileType
28
- FragmentManifest: FileType
29
- ReferenceFile: FileType
30
-
31
- def __int__(self) -> int:
32
- """Returns the protobuf enum int value."""
33
-
34
- class PyMetastore:
35
- """Rust implementation of the metastore API."""
36
-
37
- @property
38
- def table_id(self) -> str: ...
39
- @property
40
- def root_uri(self) -> Uri: ...
41
- @property
42
- def key_schema(self) -> Schema: ...
43
- def get_wal(self) -> WriteAheadLog:
44
- """Return the log for the table."""
45
- ...
46
-
47
- def append_wal(self, prev_last_modified_at: Timestamp, entries: list[LogEntry]) -> WriteAheadLog:
48
- """Append additional entries into the write-ahead log given the previous write-ahead log timestamp.
49
-
50
- The given entries should have a timestamp of zero and will be assigned an actual timestamp by the server.
51
-
52
- This API is designed to support both a trivial compare-and-swap on the WAL, and also to support more advanced
53
- conflict resolution within the metastore.
54
- """
55
- ...
56
-
57
- def update_wal(
58
- self,
59
- prev_ks_manifest_handle_id: str,
60
- truncate_ts_max: Timestamp | None = None,
61
- new_ks_manifest_handle: ManifestHandle | None = None,
62
- ) -> WriteAheadLog:
63
- """Update the write-ahead log atomically.
64
-
65
- Supports WAL truncation and manifest handle updates necessary for flushing.
66
- """
67
- ...
68
-
69
- def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
70
- """Return the metadata for column group."""
71
- ...
72
-
73
- def update_column_group_metadata(
74
- self, prev_last_modified_at: Timestamp, column_group_metadata: ColumnGroupMetadata
75
- ) -> ColumnGroupMetadata:
76
- """Update the column group metadata to the metastore given the previous metadata timestamp."""
77
- ...
78
-
79
- def list_column_groups(self) -> tuple[list[ColumnGroup], Timestamp]:
80
- """List all column groups in the table, or None if no index is available."""
81
- ...
82
-
83
- @staticmethod
84
- def http(
85
- table_id: str, root_uri: str, key_schema: Schema, base_url: str, token_provider: Callable[[], str]
86
- ) -> PyMetastore:
87
- """Construct a PyMetastore backed by an HTTP metastore service."""
88
-
89
- @staticmethod
90
- def test(table_id: str, root_uri: str, key_schema: Schema) -> PyMetastore:
91
- """Construct a PyMetastore backed by an in-memory mock metastore service."""
@@ -1,36 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # sources: spfs/spfs.proto
3
- # plugin: python-betterproto
4
- # This file has been @generated
5
-
6
- from dataclasses import dataclass
7
-
8
- import betterproto
9
-
10
-
11
- @dataclass(eq=False, repr=False)
12
- class FileMetadata(betterproto.Message):
13
- protobuf: "ProtobufFileSpecificMetadata" = betterproto.message_field(
14
- 1, group="format_specific"
15
- )
16
- parquet: "ParquetFileSpecificMetadata" = betterproto.message_field(
17
- 2, group="format_specific"
18
- )
19
- vortex: "VortexFileSpecificMetadata" = betterproto.message_field(
20
- 3, group="format_specific"
21
- )
22
-
23
-
24
- @dataclass(eq=False, repr=False)
25
- class ProtobufFileSpecificMetadata(betterproto.Message):
26
- pass
27
-
28
-
29
- @dataclass(eq=False, repr=False)
30
- class ParquetFileSpecificMetadata(betterproto.Message):
31
- metadata_size_bytes: int = betterproto.uint32_field(1)
32
-
33
-
34
- @dataclass(eq=False, repr=False)
35
- class VortexFileSpecificMetadata(betterproto.Message):
36
- metadata_size_bytes: int = betterproto.uint32_field(1)
@@ -1,225 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # sources: spiral/table/common.proto, spiral/table/metadata.proto, spiral/table/statistics.proto, spiral/table/wal.proto
3
- # plugin: python-betterproto
4
- # This file has been @generated
5
-
6
- from dataclasses import dataclass
7
- from typing import (
8
- List,
9
- Optional,
10
- )
11
-
12
- import betterproto
13
-
14
-
15
- class FileFormat(betterproto.Enum):
16
- UNSPECIFIED = 0
17
- PARQUET = 1
18
- PROTOBUF = 2
19
- BINARY_ARRAY = 3
20
- VORTEX = 4
21
-
22
-
23
- class Level(betterproto.Enum):
24
- UNSPECIFIED = 0
25
- L0 = 1
26
- L1 = 2
27
-
28
-
29
- @dataclass(eq=False, repr=False)
30
- class ApproximateSetMembership(betterproto.Message):
31
- bloom_filter: "BloomFilter" = betterproto.message_field(
32
- 2, group="membership_strategy"
33
- )
34
-
35
-
36
- @dataclass(eq=False, repr=False)
37
- class BloomFilter(betterproto.Message):
38
- bit_vec: bytes = betterproto.bytes_field(1)
39
- bitmap_bits: int = betterproto.uint64_field(2)
40
- k_num: int = betterproto.uint32_field(3)
41
-
42
-
43
- @dataclass(eq=False, repr=False)
44
- class Schema(betterproto.Message):
45
- arrow: bytes = betterproto.bytes_field(1)
46
-
47
-
48
- @dataclass(eq=False, repr=False)
49
- class ColumnGroup(betterproto.Message):
50
- parts: List[str] = betterproto.string_field(1)
51
-
52
-
53
- @dataclass(eq=False, repr=False)
54
- class KeySpan(betterproto.Message):
55
- """/ Span of keys defined as indices into a key space (exclusive end)."""
56
-
57
- begin: int = betterproto.uint64_field(1)
58
- end: int = betterproto.uint64_field(2)
59
-
60
-
61
- @dataclass(eq=False, repr=False)
62
- class KeyExtent(betterproto.Message):
63
- """/ Extent of keys in terms of min and max (an inclusive range)."""
64
-
65
- min: bytes = betterproto.bytes_field(1)
66
- max: bytes = betterproto.bytes_field(2)
67
-
68
-
69
- @dataclass(eq=False, repr=False)
70
- class KeyMap(betterproto.Message):
71
- """/ Displacement map."""
72
-
73
- bitmap: bytes = betterproto.bytes_field(1)
74
-
75
-
76
- @dataclass(eq=False, repr=False)
77
- class ManifestHandle(betterproto.Message):
78
- """
79
- / Handle to a manifest file, with additional metadata to accelerate reads.
80
- """
81
-
82
- id: str = betterproto.string_field(1)
83
- format: "FileFormat" = betterproto.enum_field(2)
84
- file_size: int = betterproto.uint64_field(3)
85
- spfs_format_metadata: Optional[bytes] = betterproto.bytes_field(4, optional=True)
86
-
87
-
88
- @dataclass(eq=False, repr=False)
89
- class VersionedSchema(betterproto.Message):
90
- """/ Timestamped schema."""
91
-
92
- ts: int = betterproto.uint64_field(1)
93
- schema: "Schema" = betterproto.message_field(2)
94
- column_ids: List[str] = betterproto.string_field(3)
95
- """
96
- List of column IDs, in the same order as columns in the schema.
97
- Must have the same length as the number of columns in the schema.
98
- """
99
-
100
-
101
- @dataclass(eq=False, repr=False)
102
- class WriteAheadLog(betterproto.Message):
103
- """
104
- / Table's Write Ahead Log stores sequence of operations and table metadata.
105
- """
106
-
107
- entries: List["LogEntry"] = betterproto.message_field(1)
108
- truncated_up_to: int = betterproto.uint64_field(3)
109
- """
110
- Timestamp of the latest entry that has been removed from the log.
111
- Entries with ts <= truncated_up_to are NOT present in the log.
112
- """
113
-
114
- ks_manifest_handle: Optional["ManifestHandle"] = betterproto.message_field(
115
- 2, optional=True
116
- )
117
- """An absent manifest means that one has not yet been written."""
118
-
119
-
120
- @dataclass(eq=False, repr=False)
121
- class LogEntry(betterproto.Message):
122
- ts: int = betterproto.uint64_field(1)
123
- key_space_write: "KeySpaceWriteOp" = betterproto.message_field(2, group="operation")
124
- fragment_set_write: "FragmentSetWriteOp" = betterproto.message_field(
125
- 3, group="operation"
126
- )
127
- configuration: "ConfigurationOp" = betterproto.message_field(4, group="operation")
128
- schema_evolution: "SchemaEvolutionOp" = betterproto.message_field(
129
- 5, group="operation"
130
- )
131
- schema_break: "SchemaBreakOp" = betterproto.message_field(6, group="operation")
132
- compact_key_space: "CompactKeySpaceOp" = betterproto.message_field(
133
- 7, group="operation"
134
- )
135
- compact_column_group: "CompactColumnGroupOp" = betterproto.message_field(
136
- 8, group="operation"
137
- )
138
-
139
-
140
- @dataclass(eq=False, repr=False)
141
- class KeySpaceWriteOp(betterproto.Message):
142
- ks_id: str = betterproto.string_field(1)
143
- manifest_handle: "ManifestHandle" = betterproto.message_field(2)
144
-
145
-
146
- @dataclass(eq=False, repr=False)
147
- class FragmentSetWriteOp(betterproto.Message):
148
- column_group: "ColumnGroup" = betterproto.message_field(1)
149
- fs_id: str = betterproto.string_field(2)
150
- fs_level: "Level" = betterproto.enum_field(3)
151
- manifest_handle: "ManifestHandle" = betterproto.message_field(4)
152
- key_span: "KeySpan" = betterproto.message_field(5)
153
- key_extent: "KeyExtent" = betterproto.message_field(6)
154
- column_ids: List[str] = betterproto.string_field(7)
155
-
156
-
157
- @dataclass(eq=False, repr=False)
158
- class ConfigurationOp(betterproto.Message):
159
- column_group: "ColumnGroup" = betterproto.message_field(1)
160
- immutable_schema: Optional[bool] = betterproto.bool_field(2, optional=True)
161
- """All column group configuration is stored in column group metadata."""
162
-
163
-
164
- @dataclass(eq=False, repr=False)
165
- class SchemaEvolutionOp(betterproto.Message):
166
- column_group: "ColumnGroup" = betterproto.message_field(1)
167
- new_schema: "VersionedSchema" = betterproto.message_field(2)
168
-
169
-
170
- @dataclass(eq=False, repr=False)
171
- class SchemaBreakOp(betterproto.Message):
172
- column_group: "ColumnGroup" = betterproto.message_field(1)
173
- removed_column_names: List[str] = betterproto.string_field(2)
174
-
175
-
176
- @dataclass(eq=False, repr=False)
177
- class CompactKeySpaceOp(betterproto.Message):
178
- from_ks_ids: List[str] = betterproto.string_field(1)
179
- into_ks_ids: List[str] = betterproto.string_field(2)
180
-
181
-
182
- @dataclass(eq=False, repr=False)
183
- class CompactColumnGroupOp(betterproto.Message):
184
- column_group: "ColumnGroup" = betterproto.message_field(1)
185
- from_fragment_ids: List[str] = betterproto.string_field(2)
186
- """
187
- NOTE: While key space compaction always compacts a full key space,
188
- column group compaction can compact only a subset of the fragment set,
189
- therefore the event specifies the fragment ids.
190
- """
191
-
192
- into_fs_ids: List[str] = betterproto.string_field(3)
193
-
194
-
195
- @dataclass(eq=False, repr=False)
196
- class ColumnGroupMetadata(betterproto.Message):
197
- column_group: "ColumnGroup" = betterproto.message_field(1)
198
- manifest_handle: Optional["ManifestHandle"] = betterproto.message_field(
199
- 2, optional=True
200
- )
201
- """An absent manifest means that one has not yet been written."""
202
-
203
- last_modified_at: int = betterproto.uint64_field(3)
204
- """Timestamp of the last WAL entry that modified this metadata."""
205
-
206
- schema_versions: List["VersionedSchema"] = betterproto.message_field(4)
207
- """
208
- TODO(marko): Add config that truncates this list and breaks time travel.
209
- Versions of the schema. Higher index is more recent, last element is latest
210
- schema. Stored to support time travel through the schema, e.g. reading
211
- deleted column.
212
- """
213
-
214
- immutable_schema: bool = betterproto.bool_field(5)
215
- """
216
- True if schema can NOT be evolved on write.
217
- An explicit schema evolution is required.
218
- """
219
-
220
- schema_salt: int = betterproto.int32_field(6)
221
- """
222
- Schema salt is used to compute column IDs. It is modified on breaking
223
- change, e.g. column deletion. This ensures that if the previously existing
224
- column is added again, it will have a different ID.
225
- """