pyspiral 0.3.1__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/METADATA +9 -13
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -9
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +14 -175
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +82 -17
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +23 -37
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/{core → table}/__init__.pyi +44 -17
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +41 -66
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +26 -18
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +2 -9
  47. spiral/expressions/png.py +1 -1
  48. spiral/expressions/qoi.py +1 -1
  49. spiral/expressions/refs.py +3 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/udf.py +3 -3
  53. spiral/iceberg/__init__.py +3 -0
  54. spiral/iceberg/client.py +33 -0
  55. spiral/indexes/__init__.py +5 -0
  56. spiral/indexes/client.py +137 -0
  57. spiral/indexes/index.py +34 -0
  58. spiral/indexes/scan.py +22 -0
  59. spiral/project.py +19 -110
  60. spiral/{proto → protogen}/_/scandal/__init__.py +23 -135
  61. spiral/protogen/_/spiral/table/__init__.py +22 -0
  62. spiral/protogen/substrait/__init__.py +3399 -0
  63. spiral/protogen/substrait/extensions/__init__.py +115 -0
  64. spiral/server.py +17 -0
  65. spiral/settings.py +29 -91
  66. spiral/substrait_.py +9 -5
  67. spiral/tables/__init__.py +12 -0
  68. spiral/tables/client.py +130 -0
  69. spiral/{dataset.py → tables/dataset.py} +9 -199
  70. spiral/tables/debug/manifests.py +70 -0
  71. spiral/tables/debug/metrics.py +56 -0
  72. spiral/{debug.py → tables/debug/scan.py} +6 -9
  73. spiral/{maintenance.py → tables/maintenance.py} +1 -1
  74. spiral/{scan_.py → tables/scan.py} +63 -89
  75. spiral/tables/snapshot.py +78 -0
  76. spiral/{table.py → tables/table.py} +59 -73
  77. spiral/{txn.py → tables/transaction.py} +7 -3
  78. pyspiral-0.3.1.dist-info/RECORD +0 -85
  79. spiral/api/tables.py +0 -91
  80. spiral/api/tokens.py +0 -56
  81. spiral/authn/authn.py +0 -89
  82. spiral/authn/device.py +0 -206
  83. spiral/authn/github_.py +0 -33
  84. spiral/authn/modal_.py +0 -18
  85. spiral/cli/org.py +0 -90
  86. spiral/cli/project.py +0 -109
  87. spiral/cli/table.py +0 -20
  88. spiral/cli/token.py +0 -27
  89. spiral/core/metastore/__init__.pyi +0 -91
  90. spiral/proto/_/spfs/__init__.py +0 -36
  91. spiral/proto/_/spiral/table/__init__.py +0 -276
  92. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  93. spiral/proto/__init__.py +0 -0
  94. spiral/proto/scandal/__init__.py +0 -45
  95. spiral/proto/spiral/__init__.py +0 -0
  96. spiral/proto/spiral/table/__init__.py +0 -96
  97. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  98. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  99. /spiral/{core → protogen/_}/__init__.py +0 -0
  100. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  101. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  102. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  103. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  104. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  105. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  106. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  107. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/util.py +0 -0
  109. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -0,0 +1,18 @@
1
+ import pyperclip
2
+ import rich
3
+
4
+ from spiral.api.telemetry import IssueExportTokenResponse
5
+ from spiral.cli import AsyncTyper, state
6
+
7
+ app = AsyncTyper(short_help="Client-side telemetry.")
8
+
9
+
10
+ @app.command(help="Issue new telemetry export token.")
11
+ def export():
12
+ res: IssueExportTokenResponse = state.settings.api.telemetry.issue_export_token()
13
+
14
+ command = f"export SPIRAL_OTEL_TOKEN={res.token}"
15
+ pyperclip.copy(command)
16
+
17
+ rich.print("Export command copied to clipboard! Paste and run to set [green]SPIRAL_OTEL_TOKEN[/green].")
18
+ rich.print("[dim]Token is valid for 1h.[/dim]")
spiral/cli/types.py CHANGED
@@ -6,11 +6,11 @@ import typer
6
6
  from questionary import Choice
7
7
  from typer import Argument
8
8
 
9
- from spiral.api import OrganizationId, ProjectId
9
+ from spiral.api.types import OrgId, ProjectId
10
10
  from spiral.cli import state
11
11
 
12
12
 
13
- def _project_default():
13
+ def ask_project(title="Select a project"):
14
14
  projects = list(state.settings.api.project.list())
15
15
 
16
16
  if not projects:
@@ -18,7 +18,7 @@ def _project_default():
18
18
  raise typer.Exit(1)
19
19
 
20
20
  return questionary.select(
21
- "Select a project",
21
+ title,
22
22
  choices=[
23
23
  Choice(title=f"{project.id} - {project.name}" if project.name else project.id, value=project.id)
24
24
  for project in projects
@@ -26,11 +26,11 @@ def _project_default():
26
26
  ).ask()
27
27
 
28
28
 
29
- ProjectArg = Annotated[ProjectId, Argument(help="Project ID", show_default=False, default_factory=_project_default)]
29
+ ProjectArg = Annotated[ProjectId, Argument(help="Project ID", show_default=False, default_factory=ask_project)]
30
30
 
31
31
 
32
32
  def _org_default():
33
- memberships = list(state.settings.api.organization.list_user_memberships())
33
+ memberships = list(state.settings.api.organization.list_memberships())
34
34
 
35
35
  if not memberships:
36
36
  rich.print("[red]No organizations found[/red]")
@@ -40,14 +40,12 @@ def _org_default():
40
40
  "Select an organization",
41
41
  choices=[
42
42
  Choice(
43
- title=f"{m.organization.id} - {m.organization.name}" if m.organization.name else m.organization.id,
44
- value=m.organization.id,
43
+ title=f"{m.org.id} - {m.org.name}" if m.org.name else m.org.id,
44
+ value=m.org.id,
45
45
  )
46
46
  for m in memberships
47
47
  ],
48
48
  ).ask()
49
49
 
50
50
 
51
- OrganizationArg = Annotated[
52
- OrganizationId, Argument(help="Organization ID", show_default=False, default_factory=_org_default)
53
- ]
51
+ OrganizationArg = Annotated[OrgId, Argument(help="Organization ID", show_default=False, default_factory=_org_default)]
@@ -6,8 +6,8 @@ import rich
6
6
  from questionary import Choice
7
7
  from typer import Argument, Option
8
8
 
9
- from spiral.api.workloads import CreateWorkload, IssueToken, ListWorkloads, Workload
10
- from spiral.cli import AsyncTyper, OptionalStr, printer, state
9
+ from spiral.api.workloads import CreateWorkloadRequest, IssueWorkloadCredentialsResponse, Workload
10
+ from spiral.cli import AsyncTyper, printer, state
11
11
  from spiral.cli.types import ProjectArg
12
12
 
13
13
  app = AsyncTyper()
@@ -16,9 +16,9 @@ app = AsyncTyper()
16
16
  @app.command(help="Create a new workload.")
17
17
  def create(
18
18
  project: ProjectArg,
19
- name: Annotated[OptionalStr, Option(help="Friendly name for the workload.")] = None,
19
+ name: Annotated[str | None, Option(help="Friendly name for the workload.")] = None,
20
20
  ):
21
- res = state.settings.api.workload.create(CreateWorkload.Request(project_id=project, name=name))
21
+ res = state.settings.api.workload.create(project, CreateWorkloadRequest(name=name))
22
22
  rich.print(f"Created workload {res.workload.id}")
23
23
 
24
24
 
@@ -26,13 +26,13 @@ def create(
26
26
  def ls(
27
27
  project: ProjectArg,
28
28
  ):
29
- workloads = list(state.settings.api.workload.list(ListWorkloads.Request(project_id=project)))
29
+ workloads = list(state.settings.api.workload.list(project))
30
30
  rich.print(printer.table_of_models(Workload, workloads, fields=["id", "project_id", "name"]))
31
31
 
32
32
 
33
- @app.command(help="Issue a token.")
34
- def token(workload_id: Annotated[str, Argument(help="Workload ID.")]):
35
- res = state.settings.api.workload.issue_token(IssueToken.Request(workload_id=workload_id))
33
+ @app.command(help="Issue new workflow credentials.")
34
+ def issue_credentials(workload_id: Annotated[str, Argument(help="Workload ID.")]):
35
+ res: IssueWorkloadCredentialsResponse = state.settings.api.workload.issue_credentials(workload_id)
36
36
 
37
37
  while True:
38
38
  choice = questionary.select(
@@ -45,15 +45,15 @@ def token(workload_id: Annotated[str, Argument(help="Workload ID.")]):
45
45
  ).ask()
46
46
 
47
47
  if choice == 1:
48
- pyperclip.copy(res.token_secret)
48
+ pyperclip.copy(res.client_secret)
49
49
  rich.print("[green]Secret copied to clipboard![/green]")
50
50
  break
51
51
  elif choice == 2:
52
- rich.print(f"[green]Token Secret:[/green] {res.token_secret}")
52
+ rich.print(f"[green]Token Secret:[/green] {res.client_secret}")
53
53
  break
54
54
  elif choice == 3:
55
55
  break
56
56
  else:
57
57
  rich.print("[red]Invalid choice. Please try again.[/red]")
58
58
 
59
- rich.print(f"[green]Token ID:[/green] {res.token_id}")
59
+ rich.print(f"[green]Token ID:[/green] {res.client_id}")
@@ -3,23 +3,22 @@ from typing import TYPE_CHECKING
3
3
  import jwt
4
4
 
5
5
  from spiral.api import SpiralAPI
6
- from spiral.api.projects import CreateProject
6
+ from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
7
+ from spiral.core.client import Spiral as CoreSpiral
7
8
  from spiral.settings import Settings, settings
8
9
 
9
10
  if TYPE_CHECKING:
10
- from pyiceberg import catalog
11
-
11
+ from spiral.iceberg import Iceberg
12
12
  from spiral.project import Project
13
- from spiral.table import Table
14
-
15
- _default = object()
16
13
 
17
14
 
18
15
  class Spiral:
19
16
  def __init__(self, config: Settings | None = None):
20
17
  self._config = config or settings()
21
18
  self._api = self._config.api
22
-
19
+ self._core = CoreSpiral(
20
+ api_url=self._config.spiraldb.uri, spfs_url=self._config.spfs.uri, authn=self._config.authn
21
+ )
23
22
  self._org = None
24
23
 
25
24
  @property
@@ -33,7 +32,10 @@ class Spiral:
33
32
  @property
34
33
  def organization(self) -> str:
35
34
  if self._org is None:
36
- token_payload = jwt.decode(self._config.authn.token(), options={"verify_signature": False})
35
+ token = self._config.authn.token()
36
+ if token is None:
37
+ raise ValueError("Authentication failed.")
38
+ token_payload = jwt.decode(token.expose_secret(), options={"verify_signature": False})
37
39
  if "org_id" not in token_payload:
38
40
  raise ValueError("Please create an organization.")
39
41
  self._org = token_payload["org_id"]
@@ -45,49 +47,33 @@ class Spiral:
45
47
 
46
48
  return [Project(self, id=p.id, name=p.name) for p in self.api.project.list()]
47
49
 
48
- def list_project_ids(self) -> list[str]:
49
- """List project IDs."""
50
- return [p.id for p in self.list_projects()]
51
-
52
50
  def create_project(
53
51
  self,
54
52
  id_prefix: str | None = None,
55
53
  *,
56
- org: str | None = None,
57
54
  name: str | None = None,
58
55
  ) -> "Project":
59
56
  """Create a project in the current, or given, organization."""
60
57
  from .project import Project
61
58
 
62
- org = org or self.organization
63
- res = self.api.project.create(CreateProject.Request(organization_id=org, id_prefix=id_prefix, name=name))
59
+ res: CreateProjectResponse = self.api.project.create(CreateProjectRequest(id_prefix=id_prefix, name=name))
64
60
  return Project(self, res.project.id, name=res.project.name)
65
61
 
66
62
  def project(self, project_id: str) -> "Project":
67
63
  """Open an existing project."""
68
- from .project import Project
64
+ from spiral.project import Project
69
65
 
70
66
  # We avoid an API call since we'd just be fetching a human-readable name. Seems a waste in most cases.
71
67
  return Project(self, id=project_id, name=project_id)
72
68
 
73
- def table(self, identifier: str) -> "Table":
74
- """Open a table with a "project.dataset.table" identifier."""
75
- parts = identifier.split(".")
76
- if len(parts) != 3:
77
- raise ValueError(f"Invalid table identifier: {identifier}")
78
- project_id, dataset, table = parts
79
-
80
- return self.project(project_id).table(f"{dataset}.{table}")
81
-
82
- def iceberg_catalog(self) -> "catalog.Catalog":
83
- """Open the Iceberg catalog."""
84
- from pyiceberg.catalog import load_catalog
85
-
86
- return load_catalog(
87
- "default",
88
- **{
89
- "type": "rest",
90
- "uri": self._config.spiraldb.uri_iceberg,
91
- "token": self._config.authn.token(),
92
- },
93
- )
69
+ @property
70
+ def iceberg(self) -> "Iceberg":
71
+ """
72
+ Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
73
+ Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
74
+ and ACID transactions to your warehouse.
75
+
76
+ """
77
+ from spiral.iceberg import Iceberg
78
+
79
+ return Iceberg(self)
@@ -0,0 +1,117 @@
1
+ from spiral.api.types import DatasetName, IndexName, OrgId, ProjectId, RootUri, TableName
2
+ from spiral.core.index import SearchScan, TextIndex
3
+ from spiral.core.table import Table, TableMaintenance, TableScan, TableSnapshot, TableTransaction
4
+ from spiral.core.table.spec import Schema
5
+ from spiral.expressions import Expr
6
+
7
+ class Token:
8
+ def __init__(self, value: str): ...
9
+ def expose_secret(self) -> str: ...
10
+
11
+ class Authn:
12
+ @staticmethod
13
+ def from_token(token: Token) -> Authn: ...
14
+ @staticmethod
15
+ def from_fallback() -> Authn: ...
16
+ @staticmethod
17
+ def from_device() -> Authn: ...
18
+ def token(self) -> Token | None: ...
19
+
20
+ class DeviceCodeAuth:
21
+ @staticmethod
22
+ def default() -> DeviceCodeAuth:
23
+ """Return the static device code instance."""
24
+ ...
25
+ def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
26
+ """Authenticate using device code flow."""
27
+ ...
28
+
29
+ def logout(self) -> None:
30
+ """Logout from the device authentication session."""
31
+ ...
32
+
33
+ class Spiral:
34
+ """A client for Spiral database"""
35
+ def __init__(
36
+ self,
37
+ api_url: str | None = None,
38
+ spfs_url: str | None = None,
39
+ authn: Authn | None = None,
40
+ ):
41
+ """Initialize the Spiral client."""
42
+ ...
43
+ def authn(self) -> Authn:
44
+ """Get the current authentication context."""
45
+ ...
46
+ def create_table(
47
+ self,
48
+ project_id: ProjectId,
49
+ dataset: DatasetName,
50
+ table: TableName,
51
+ key_schema: Schema,
52
+ *,
53
+ root_uri: RootUri | None = None,
54
+ exist_ok: bool = False,
55
+ ) -> Table:
56
+ """Create a new table in the specified project."""
57
+ ...
58
+
59
+ def get_table(self, table_id: str) -> Table:
60
+ """Get and open table."""
61
+
62
+ def open_table(self, table_id: str, key_schema: Schema, root_uri: RootUri) -> Table:
63
+ """Open a table. This does not make any network calls."""
64
+ ...
65
+
66
+ def open_table_scan(
67
+ self,
68
+ projection: Expr,
69
+ filter: Expr | None = None,
70
+ asof: int | None = None,
71
+ exclude_keys: bool = False,
72
+ ) -> TableScan:
73
+ """Construct a table scan."""
74
+ ...
75
+
76
+ def open_transaction(self, table: Table, format: str | None = None) -> TableTransaction:
77
+ """Being transaction."""
78
+ ...
79
+
80
+ def open_maintenance(self, table: Table, format: str | None = None) -> TableMaintenance:
81
+ """Access maintenance operations for a table."""
82
+ ...
83
+ def create_text_index(
84
+ self,
85
+ project_id: ProjectId,
86
+ name: IndexName,
87
+ projection: Expr,
88
+ filter: Expr | None = None,
89
+ *,
90
+ root_uri: RootUri | None = None,
91
+ exist_ok: bool = False,
92
+ ) -> TextIndex:
93
+ """Create a new index in the specified project."""
94
+ ...
95
+
96
+ def get_text_index(self, index_id: str) -> TextIndex:
97
+ """Get a text-based index."""
98
+ ...
99
+
100
+ def open_search_scan(
101
+ self,
102
+ rank_by: Expr,
103
+ top_k: int,
104
+ # NOTE(marko): Required for now.
105
+ freshness_window_s: int,
106
+ *,
107
+ filter: Expr | None = None,
108
+ ) -> SearchScan:
109
+ """Query an index."""
110
+ ...
111
+
112
+ def _sync_snapshot(self, index_id: str, snapshot: TableSnapshot) -> None:
113
+ """Synchronize an index with a table snapshot.
114
+
115
+ IMPORTANT: This is only exposed for testing purposes and should not be used.
116
+ """
117
+ ...
@@ -0,0 +1,15 @@
1
+ import pyarrow as pa
2
+
3
+ class IndexStatus:
4
+ status: str
5
+ staleness_s: int | None
6
+ # An extent of keys that are indexed.
7
+ # key_extent: KeyExtent | None
8
+
9
+ class TextIndex:
10
+ id: str
11
+
12
+ def status(self) -> IndexStatus: ...
13
+
14
+ class SearchScan:
15
+ def to_record_batches(self) -> pa.RecordBatchReader: ...
@@ -1,11 +1,36 @@
1
1
  from typing import Any, Literal
2
2
 
3
3
  import pyarrow as pa
4
- from spiral.core.manifests import FragmentManifest
5
- from spiral.core.metastore import PyMetastore
6
- from spiral.core.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, KeyRange, Schema, WriteAheadLog
7
4
  from spiral.expressions import Expr
8
5
 
6
+ from .manifests import FragmentManifest
7
+ from .metastore import PyMetastore
8
+ from .spec import ColumnGroup, Key, Schema, WriteAheadLog
9
+
10
+ class KeyRange:
11
+ """A right-exclusive range of keys."""
12
+
13
+ def __init__(self, *, begin: Key, end: Key): ...
14
+
15
+ begin: Key
16
+ end: Key
17
+
18
+ def union(self, other: KeyRange) -> KeyRange: ...
19
+ def __or__(self, other: KeyRange) -> KeyRange: ...
20
+ def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
21
+ def __and__(self, other: KeyRange) -> KeyRange | None: ...
22
+ def contains(self, item: Key) -> bool: ...
23
+ def __contains__(self, item: Key) -> bool: ...
24
+ def is_disjoint(self, key_range: KeyRange) -> bool:
25
+ return self.end <= key_range.begin or self.begin >= key_range.end
26
+
27
+ @staticmethod
28
+ def beginning_with(begin: Key) -> KeyRange: ...
29
+ @staticmethod
30
+ def ending_with(end: Key) -> KeyRange: ...
31
+ @staticmethod
32
+ def full() -> KeyRange: ...
33
+
9
34
  class Table:
10
35
  def __init__(self, metastore: PyMetastore): ...
11
36
 
@@ -16,18 +41,16 @@ class Table:
16
41
 
17
42
  def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
18
43
  def get_schema(self, *, asof: int | None) -> Schema: ...
19
- def get_column_group_metadata(self, column_group: ColumnGroup, *, asof: int | None) -> ColumnGroupMetadata: ...
20
- def list_column_groups(self, *, asof: int | None) -> list[ColumnGroup] | None: ...
44
+ def get_snapshot(self, *, asof: int | None) -> TableSnapshot: ...
45
+
46
+ class TableSnapshot:
47
+ """A snapshot of a table at a specific point in time."""
48
+
49
+ asof: int
50
+ table: Table
51
+ wal: WriteAheadLog
21
52
 
22
53
  class TableScan:
23
- def __init__(
24
- self,
25
- projection: Expr,
26
- filter: Expr | None = None,
27
- asof: int | None = None,
28
- exclude_keys: bool = False,
29
- aux_schema: pa.Schema | None = None,
30
- ) -> TableScan: ...
31
54
  def key_schema(self) -> Schema: ...
32
55
  def schema(self) -> Schema: ...
33
56
  def is_empty(self) -> bool: ...
@@ -39,6 +62,12 @@ class TableScan:
39
62
  key_table: pa.Table | pa.RecordBatch | None = None,
40
63
  batch_readahead: int | None = None,
41
64
  ) -> pa.RecordBatchReader: ...
65
+ def to_shuffled_record_batches(
66
+ self,
67
+ batch_readahead: int | None = None,
68
+ shuffle_buffer_size: int | None = None,
69
+ shuffle_pool_num_rows: int | None = None,
70
+ ) -> pa.RecordBatchReader: ...
42
71
  def column_group_scan(self, column_group: ColumnGroup) -> ColumnGroupScan: ...
43
72
  def key_space_scan(self, table_id: str) -> KeySpaceScan: ...
44
73
  def metrics(self) -> dict[str, Any]: ...
@@ -54,16 +83,14 @@ class ColumnGroupScan:
54
83
  def schema(self) -> Schema: ...
55
84
 
56
85
  class TableTransaction:
57
- def __init__(self, metastore: PyMetastore, format: FileFormat): ...
58
- @property
59
- def status(self) -> str: ...
86
+ status: str
87
+
60
88
  def write(self, expr: Expr, *, partition_size_bytes: int | None = None): ...
61
89
  def commit(self): ...
62
90
  def abort(self): ...
63
91
  def metrics(self) -> dict[str, Any]: ...
64
92
 
65
93
  class TableMaintenance:
66
- def __init__(self, metastore: PyMetastore, format: FileFormat): ...
67
94
  def flush_wal(self): ...
68
95
  def compact_key_space(
69
96
  self,
@@ -1,5 +1,6 @@
1
1
  import pyarrow as pa
2
- from spiral.core.spec import FileFormat, FragmentLevel, KeyExtent, KeyMap, KeyRange, KeySpan
2
+ from spiral.core.table import KeyRange
3
+ from spiral.core.table.spec import FileFormat, FragmentLevel, KeyExtent, KeySpan
3
4
  from spiral.types_ import Timestamp
4
5
 
5
6
  class FragmentManifest:
@@ -22,32 +23,13 @@ class FragmentFile:
22
23
  format: FileFormat
23
24
  format_metadata: bytes | None
24
25
  size_bytes: int
25
- # NOTE: Empty for keyspace file.
26
26
  column_ids: list[str]
27
- fs_id: str
28
- fs_level: FragmentLevel
27
+ level: FragmentLevel
28
+ # NOTE: Empty for key space files.
29
+ column_ids: list[str]
29
30
  ks_id: str
30
31
  key_span: KeySpan
31
32
  key_extent: KeyExtent
32
- key_map: KeyMap | None
33
33
 
34
- def __init__(
35
- self,
36
- *,
37
- id: str,
38
- committed_at: Timestamp | None,
39
- compacted_at: Timestamp | None,
40
- format: FileFormat,
41
- format_metadata: bytes,
42
- size_bytes: int,
43
- column_ids: list[str],
44
- fs_id: str,
45
- fs_level: FragmentLevel,
46
- ks_id: str,
47
- key_span: KeySpan,
48
- key_extent: KeyExtent,
49
- key_map: KeyMap | None,
50
- stats: pa.StructArray,
51
- ): ...
52
34
  @property
53
35
  def key_range(self) -> KeyRange: ...
@@ -0,0 +1,62 @@
1
+ """The SpiralDB metastore API."""
2
+
3
+ from spiral.core.client import Authn
4
+ from spiral.core.table.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, KeySpaceMetadata, Schema, WriteAheadLog
5
+ from spiral.types_ import Uri
6
+
7
+ class FileHandle:
8
+ def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
9
+
10
+ uri: str
11
+ format: FileFormat
12
+ spfs_token: str | None
13
+
14
+ class FileRef:
15
+ def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
16
+
17
+ id: str
18
+ file_type: FileType
19
+ file_format: FileFormat
20
+
21
+ def resolve_uri(self, root_uri: str) -> str:
22
+ """Resolves the file reference URI given the root URI."""
23
+
24
+ class FileType:
25
+ FragmentFile: FileType
26
+ FragmentManifest: FileType
27
+ ReferenceFile: FileType
28
+
29
+ def __int__(self) -> int:
30
+ """Returns the protobuf enum int value."""
31
+
32
+ class PyMetastore:
33
+ """Rust implementation of the metastore API."""
34
+
35
+ @property
36
+ def table_id(self) -> str: ...
37
+ @property
38
+ def root_uri(self) -> Uri: ...
39
+ @property
40
+ def key_schema(self) -> Schema: ...
41
+ def get_wal(self) -> WriteAheadLog:
42
+ """Return the log for the table."""
43
+ def get_key_space_metadata(self) -> KeySpaceMetadata:
44
+ """Return the metadata for the key space."""
45
+ ...
46
+ def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
47
+ """Return the metadata for a column group."""
48
+ ...
49
+
50
+ @staticmethod
51
+ def http(
52
+ table_id: str,
53
+ root_uri: str,
54
+ key_schema: Schema,
55
+ base_url: str,
56
+ authn: Authn,
57
+ ) -> PyMetastore:
58
+ """Construct a PyMetastore backed by an HTTP metastore service."""
59
+
60
+ @staticmethod
61
+ def test(table_id: str, root_uri: str, key_schema: Schema) -> PyMetastore:
62
+ """Construct a PyMetastore backed by an in-memory mock metastore service."""