pyspiral 0.4.4__cp310-abi3-macosx_11_0_arm64.whl → 0.6.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/METADATA +10 -5
  2. pyspiral-0.6.0.dist-info/RECORD +99 -0
  3. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +10 -3
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +29 -11
  7. spiral/api/__init__.py +14 -0
  8. spiral/api/client.py +5 -1
  9. spiral/api/key_space_indexes.py +23 -0
  10. spiral/api/projects.py +17 -2
  11. spiral/api/text_indexes.py +56 -0
  12. spiral/api/types.py +2 -0
  13. spiral/api/workers.py +40 -0
  14. spiral/cli/__init__.py +15 -6
  15. spiral/cli/admin.py +2 -4
  16. spiral/cli/app.py +4 -2
  17. spiral/cli/fs.py +5 -6
  18. spiral/cli/iceberg.py +97 -0
  19. spiral/cli/key_spaces.py +68 -0
  20. spiral/cli/login.py +6 -7
  21. spiral/cli/orgs.py +7 -8
  22. spiral/cli/printer.py +3 -3
  23. spiral/cli/projects.py +5 -6
  24. spiral/cli/tables.py +131 -0
  25. spiral/cli/telemetry.py +3 -4
  26. spiral/cli/text.py +115 -0
  27. spiral/cli/types.py +3 -4
  28. spiral/cli/workloads.py +7 -8
  29. spiral/client.py +111 -8
  30. spiral/core/authn/__init__.pyi +27 -0
  31. spiral/core/client/__init__.pyi +135 -63
  32. spiral/core/table/__init__.pyi +36 -26
  33. spiral/core/table/metastore/__init__.pyi +0 -4
  34. spiral/core/table/spec/__init__.pyi +0 -2
  35. spiral/{tables/dataset.py → dataset.py} +13 -7
  36. spiral/{tables/debug → debug}/manifests.py +17 -6
  37. spiral/{tables/debug → debug}/scan.py +7 -7
  38. spiral/expressions/base.py +3 -3
  39. spiral/expressions/udf.py +1 -1
  40. spiral/{iceberg/client.py → iceberg.py} +1 -3
  41. spiral/key_space_index.py +44 -0
  42. spiral/project.py +171 -18
  43. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1668 -1110
  44. spiral/protogen/_/google/protobuf/__init__.py +2190 -0
  45. spiral/protogen/_/message_pool.py +3 -0
  46. spiral/protogen/_/py.typed +0 -0
  47. spiral/protogen/_/scandal/__init__.py +138 -126
  48. spiral/protogen/_/spfs/__init__.py +72 -0
  49. spiral/protogen/_/spql/__init__.py +61 -0
  50. spiral/protogen/_/substrait/__init__.py +5256 -2459
  51. spiral/protogen/_/substrait/extensions/__init__.py +103 -49
  52. spiral/{tables/scan.py → scan.py} +37 -44
  53. spiral/settings.py +14 -3
  54. spiral/snapshot.py +55 -0
  55. spiral/streaming_/__init__.py +3 -0
  56. spiral/streaming_/reader.py +117 -0
  57. spiral/streaming_/stream.py +146 -0
  58. spiral/substrait_.py +9 -9
  59. spiral/table.py +257 -0
  60. spiral/text_index.py +17 -0
  61. spiral/{tables/transaction.py → transaction.py} +11 -15
  62. pyspiral-0.4.4.dist-info/RECORD +0 -98
  63. spiral/cli/iceberg/__init__.py +0 -7
  64. spiral/cli/iceberg/namespaces.py +0 -47
  65. spiral/cli/iceberg/tables.py +0 -60
  66. spiral/cli/indexes/__init__.py +0 -19
  67. spiral/cli/tables/__init__.py +0 -121
  68. spiral/core/index/__init__.pyi +0 -15
  69. spiral/iceberg/__init__.py +0 -3
  70. spiral/indexes/__init__.py +0 -5
  71. spiral/indexes/client.py +0 -137
  72. spiral/indexes/index.py +0 -34
  73. spiral/indexes/scan.py +0 -22
  74. spiral/protogen/_/spiral/table/__init__.py +0 -22
  75. spiral/protogen/substrait/__init__.py +0 -3399
  76. spiral/protogen/substrait/extensions/__init__.py +0 -115
  77. spiral/tables/__init__.py +0 -12
  78. spiral/tables/client.py +0 -130
  79. spiral/tables/maintenance.py +0 -12
  80. spiral/tables/snapshot.py +0 -78
  81. spiral/tables/table.py +0 -145
  82. {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/entry_points.txt +0 -0
  83. /spiral/{protogen/_/spiral → debug}/__init__.py +0 -0
  84. /spiral/{tables/debug → debug}/metrics.py +0 -0
  85. /spiral/{tables/debug → protogen/_/google}/__init__.py +0 -0
@@ -1,115 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # sources: substrait/extensions/extensions.proto
3
- # plugin: python-betterproto
4
- # This file has been @generated
5
-
6
- from dataclasses import dataclass
7
- from typing import List
8
-
9
- import betterproto
10
- import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf
11
-
12
-
13
- @dataclass(eq=False, repr=False)
14
- class SimpleExtensionUri(betterproto.Message):
15
- extension_uri_anchor: int = betterproto.uint32_field(1)
16
- """
17
- A surrogate key used in the context of a single plan used to reference the
18
- URI associated with an extension.
19
- """
20
-
21
- uri: str = betterproto.string_field(2)
22
- """
23
- The URI where this extension YAML can be retrieved. This is the "namespace"
24
- of this extension.
25
- """
26
-
27
-
28
- @dataclass(eq=False, repr=False)
29
- class SimpleExtensionDeclaration(betterproto.Message):
30
- """
31
- Describes a mapping between a specific extension entity and the uri where
32
- that extension can be found.
33
- """
34
-
35
- extension_type: "SimpleExtensionDeclarationExtensionType" = (
36
- betterproto.message_field(1, group="mapping_type")
37
- )
38
- extension_type_variation: "SimpleExtensionDeclarationExtensionTypeVariation" = (
39
- betterproto.message_field(2, group="mapping_type")
40
- )
41
- extension_function: "SimpleExtensionDeclarationExtensionFunction" = (
42
- betterproto.message_field(3, group="mapping_type")
43
- )
44
-
45
-
46
- @dataclass(eq=False, repr=False)
47
- class SimpleExtensionDeclarationExtensionType(betterproto.Message):
48
- """Describes a Type"""
49
-
50
- extension_uri_reference: int = betterproto.uint32_field(1)
51
- """
52
- references the extension_uri_anchor defined for a specific extension URI.
53
- """
54
-
55
- type_anchor: int = betterproto.uint32_field(2)
56
- """
57
- A surrogate key used in the context of a single plan to reference a
58
- specific extension type
59
- """
60
-
61
- name: str = betterproto.string_field(3)
62
- """the name of the type in the defined extension YAML."""
63
-
64
-
65
- @dataclass(eq=False, repr=False)
66
- class SimpleExtensionDeclarationExtensionTypeVariation(betterproto.Message):
67
- extension_uri_reference: int = betterproto.uint32_field(1)
68
- """
69
- references the extension_uri_anchor defined for a specific extension URI.
70
- """
71
-
72
- type_variation_anchor: int = betterproto.uint32_field(2)
73
- """
74
- A surrogate key used in the context of a single plan to reference a
75
- specific type variation
76
- """
77
-
78
- name: str = betterproto.string_field(3)
79
- """the name of the type in the defined extension YAML."""
80
-
81
-
82
- @dataclass(eq=False, repr=False)
83
- class SimpleExtensionDeclarationExtensionFunction(betterproto.Message):
84
- extension_uri_reference: int = betterproto.uint32_field(1)
85
- """
86
- references the extension_uri_anchor defined for a specific extension URI.
87
- """
88
-
89
- function_anchor: int = betterproto.uint32_field(2)
90
- """
91
- A surrogate key used in the context of a single plan to reference a
92
- specific function
93
- """
94
-
95
- name: str = betterproto.string_field(3)
96
- """A function signature compound name"""
97
-
98
-
99
- @dataclass(eq=False, repr=False)
100
- class AdvancedExtension(betterproto.Message):
101
- """
102
- A generic object that can be used to embed additional extension information
103
- into the serialized substrait plan.
104
- """
105
-
106
- optimization: List[
107
- "betterproto_lib_google_protobuf.Any"
108
- ] = betterproto.message_field(1)
109
- """
110
- An optimization is helpful information that don't influence semantics. May
111
- be ignored by a consumer.
112
- """
113
-
114
- enhancement: "betterproto_lib_google_protobuf.Any" = betterproto.message_field(2)
115
- """An enhancement alter semantics. Cannot be ignored by a consumer."""
spiral/tables/__init__.py DELETED
@@ -1,12 +0,0 @@
1
- from spiral import _lib
2
- from spiral.tables.client import Tables
3
- from spiral.tables.maintenance import Maintenance
4
- from spiral.tables.scan import Scan
5
- from spiral.tables.snapshot import Snapshot
6
- from spiral.tables.table import Table
7
- from spiral.tables.transaction import Transaction
8
-
9
- # Eagerly import the Spiral library
10
- assert _lib, "Spiral library"
11
-
12
- __all__ = ["Tables", "Table", "Snapshot", "Scan", "Transaction", "Maintenance"]
spiral/tables/client.py DELETED
@@ -1,130 +0,0 @@
1
- from datetime import datetime
2
- from typing import Any
3
-
4
- import pyarrow as pa
5
-
6
- from spiral.api import SpiralAPI
7
- from spiral.api.projects import TableResource
8
- from spiral.core.client import Spiral as CoreSpiral
9
- from spiral.core.table.spec import Schema
10
- from spiral.datetime_ import timestamp_micros
11
- from spiral.expressions import ExprLike
12
- from spiral.tables.scan import Scan
13
- from spiral.tables.table import Table
14
- from spiral.types_ import Uri
15
-
16
-
17
- class Tables:
18
- """
19
- Spiral Tables a powerful and flexible way for storing, analyzing,
20
- and querying massive and/or multimodal datasets.
21
-
22
- The data model will feel familiar to users of SQL- or DataFrame-style systems,
23
- yet is designed to be more flexible, more powerful, and more useful in the context
24
- of modern data processing. Tables are stored and queried directly from object storage.
25
- """
26
-
27
- def __init__(self, api: SpiralAPI, spiral: CoreSpiral, *, project_id: str | None = None):
28
- self._api = api
29
- self._spiral = spiral
30
- self._project_id = project_id
31
-
32
- def table(self, identifier: str) -> Table:
33
- """Open a table with a `dataset.table` identifier, or `table` name using the `default` dataset."""
34
- project_id, dataset, table = self._parse_identifier(identifier)
35
- if project_id is None:
36
- raise ValueError("Must provide a fully qualified table identifier.")
37
-
38
- res = list(self._api.project.list_tables(project_id, dataset=dataset, table=table))
39
- if len(res) == 0:
40
- raise ValueError(f"Table not found: {project_id}.{dataset}.{table}")
41
-
42
- res = res[0]
43
- return Table(self, self._spiral.get_table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}")
44
-
45
- def list_tables(self) -> list[TableResource]:
46
- project_id = self._project_id
47
- if project_id is None:
48
- raise ValueError("Must provide a project ID to list tables.")
49
- return list(self._api.project.list_tables(project_id))
50
-
51
- def create_table(
52
- self,
53
- identifier: str,
54
- *,
55
- key_schema: pa.Schema | Any,
56
- root_uri: Uri | None = None,
57
- exist_ok: bool = False,
58
- ) -> Table:
59
- """Create a new table in the project.
60
-
61
- Args:
62
- identifier: The table identifier, in the form `project.dataset.table`, `dataset.table` or `table`.
63
- key_schema: The schema of the table's keys.
64
- root_uri: The root URI for the table.
65
- exist_ok: If True, do not raise an error if the table already exists.
66
- """
67
- project_id, dataset, table = self._parse_identifier(identifier)
68
- if project_id is None:
69
- raise ValueError("Must provide a fully qualified table identifier.")
70
-
71
- if not isinstance(key_schema, pa.Schema):
72
- key_schema = pa.schema(key_schema)
73
- key_schema = Schema.from_arrow(key_schema)
74
-
75
- core_table = self._spiral.create_table(
76
- project_id,
77
- dataset=dataset,
78
- table=table,
79
- key_schema=key_schema,
80
- root_uri=root_uri,
81
- exist_ok=exist_ok,
82
- )
83
-
84
- return Table(self, core_table, identifier=f"{project_id}.{dataset}.{table}")
85
-
86
- def _parse_identifier(self, identifier: str) -> tuple[str | None, str, str]:
87
- parts = identifier.split(".")
88
- if len(parts) == 1:
89
- return self._project_id, "default", parts[0]
90
- elif len(parts) == 2:
91
- return self._project_id, parts[0], parts[1]
92
- elif len(parts) == 3:
93
- return parts[0], parts[1], parts[2]
94
- else:
95
- raise ValueError(f"Invalid table identifier: {identifier}")
96
-
97
- def scan(
98
- self,
99
- *projections: ExprLike,
100
- where: ExprLike | None = None,
101
- asof: datetime | int | None = None,
102
- exclude_keys: bool = False,
103
- ) -> Scan:
104
- """Starts a read transaction on the Spiral.
105
-
106
- Args:
107
- projections: a set of expressions that return struct arrays.
108
- where: a query expression to apply to the data.
109
- asof: only data written before the given timestamp will be returned, caveats around compaction.
110
- exclude_keys: whether to exclude the key columns in the scan result, defaults to False.
111
- Note that if a projection includes a key column, it will be included in the result.
112
- """
113
- from spiral import expressions as se
114
-
115
- if isinstance(asof, datetime):
116
- asof = timestamp_micros(asof)
117
-
118
- # Combine all projections into a single struct.
119
- projection = se.merge(*projections)
120
- if where is not None:
121
- where = se.lift(where)
122
-
123
- return Scan(
124
- self._spiral.open_table_scan(
125
- projection.__expr__,
126
- filter=where.__expr__ if where else None,
127
- asof=asof,
128
- exclude_keys=exclude_keys,
129
- ),
130
- )
@@ -1,12 +0,0 @@
1
- from spiral.core.table import TableMaintenance
2
-
3
-
4
- class Maintenance:
5
- """Spiral table maintenance."""
6
-
7
- def __init__(self, maintenance: TableMaintenance):
8
- self._maintenance = maintenance
9
-
10
- def flush_wal(self):
11
- """Flush the write-ahead log."""
12
- self._maintenance.flush_wal()
spiral/tables/snapshot.py DELETED
@@ -1,78 +0,0 @@
1
- from typing import TYPE_CHECKING
2
-
3
- from spiral.core.table import TableSnapshot
4
- from spiral.expressions import ExprLike
5
- from spiral.tables.scan import Scan
6
- from spiral.types_ import Timestamp
7
-
8
- if TYPE_CHECKING:
9
- import duckdb
10
- import polars as pl
11
- import pyarrow.dataset
12
-
13
- from spiral.tables import Tables
14
- from spiral.tables.table import Table
15
-
16
-
17
- class Snapshot:
18
- """Spiral table snapshot.
19
-
20
- A snapshot represents a point-in-time view of a table.
21
- """
22
-
23
- def __init__(self, tables: "Tables", snapshot: TableSnapshot):
24
- self._tables = tables
25
- self._snapshot = snapshot
26
-
27
- @property
28
- def asof(self) -> Timestamp:
29
- """Returns the asof timestamp of the snapshot."""
30
- return self._snapshot.asof
31
-
32
- @property
33
- def client(self) -> "Tables":
34
- """Returns the client used by the snapshot."""
35
- return self._tables
36
-
37
- @property
38
- def table(self) -> "Table":
39
- """Returns the table associated with the snapshot."""
40
- from spiral.tables.table import Table
41
-
42
- return Table(self._tables, self._snapshot.table)
43
-
44
- def to_dataset(self) -> "pyarrow.dataset.Dataset":
45
- """Returns a PyArrow Dataset representing the table."""
46
- from .dataset import TableDataset
47
-
48
- return TableDataset(self)
49
-
50
- def to_polars(self) -> "pl.LazyFrame":
51
- """Returns a Polars LazyFrame for the Spiral table."""
52
- import polars as pl
53
-
54
- return pl.scan_pyarrow_dataset(self.to_dataset())
55
-
56
- def to_duckdb(self) -> "duckdb.DuckDBPyRelation":
57
- """Returns a DuckDB relation for the Spiral table."""
58
- import duckdb
59
-
60
- return duckdb.from_arrow(self.to_dataset())
61
-
62
- def scan(
63
- self,
64
- *projections: ExprLike,
65
- where: ExprLike | None = None,
66
- exclude_keys: bool = False,
67
- ) -> Scan:
68
- """Reads the snapshot. If projections are not provided, the entire table is read."""
69
- if not projections:
70
- # Use table as the default projection.
71
- projections = [self._snapshot.table.__expr__]
72
-
73
- return self._tables.scan(
74
- *projections,
75
- where=where,
76
- asof=self._snapshot.asof,
77
- exclude_keys=exclude_keys,
78
- )
spiral/tables/table.py DELETED
@@ -1,145 +0,0 @@
1
- from datetime import datetime
2
- from typing import TYPE_CHECKING
3
-
4
- from spiral.core.table import Table as CoreTable
5
- from spiral.core.table.spec import Schema
6
- from spiral.expressions.base import Expr, ExprLike
7
- from spiral.settings import settings
8
- from spiral.tables.maintenance import Maintenance
9
- from spiral.tables.scan import Scan
10
- from spiral.tables.snapshot import Snapshot
11
- from spiral.tables.transaction import Transaction
12
-
13
- if TYPE_CHECKING:
14
- from spiral.tables import Tables
15
-
16
-
17
- class Table(Expr):
18
- """API for interacting with a SpiralDB's Table.
19
-
20
- Different catalog implementations should ultimately construct a Table object.
21
- """
22
-
23
- # TODO(marko): Make identifier required.
24
- def __init__(self, tables: "Tables", table: CoreTable, *, identifier: str | None = None):
25
- super().__init__(table.__expr__)
26
-
27
- self._tables = tables
28
- self._table = table
29
- self._identifier = identifier
30
- self._key_schema = self._table.key_schema
31
- self._key_columns = set(self._key_schema.names)
32
-
33
- @property
34
- def client(self) -> "Tables":
35
- """Returns the client used by the table."""
36
- return self._tables
37
-
38
- @property
39
- def table_id(self) -> str:
40
- return self._table.id
41
-
42
- @property
43
- def identifier(self) -> str:
44
- """Returns the fully qualified identifier of the table."""
45
- return self._identifier or self._table.id
46
-
47
- @property
48
- def dataset(self) -> str | None:
49
- """Returns the dataset of the table."""
50
- if self._identifier is None:
51
- return None
52
- _, dataset, _ = self._identifier.split(".")
53
- return dataset
54
-
55
- @property
56
- def name(self) -> str | None:
57
- """Returns the name of the table."""
58
- if self._identifier is None:
59
- return None
60
- _, _, name = self._identifier.split(".")
61
- return name
62
-
63
- @property
64
- def last_modified_at(self) -> int:
65
- return self._table.get_wal(asof=None).last_modified_at
66
-
67
- def __str__(self):
68
- return self.identifier
69
-
70
- def __repr__(self):
71
- return f'Table("{self.identifier}")'
72
-
73
- def __getitem__(self, item: str) -> Expr:
74
- return super().__getitem__(item)
75
-
76
- def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
77
- # Override an expression select in the root column group to split between keys and columns.
78
- if exclude is not None:
79
- if set(exclude) & self._key_columns:
80
- raise ValueError(
81
- "Cannot use 'exclude' arg with key columns. Use 'exclude_keys' and an explicit select of keys."
82
- )
83
-
84
- return super().select(*paths, exclude=exclude)
85
-
86
- @property
87
- def key_schema(self) -> Schema:
88
- """Returns the key schema of the table."""
89
- return self._key_schema
90
-
91
- @property
92
- def schema(self) -> Schema:
93
- """Returns the FULL schema of the table.
94
-
95
- NOTE: This can be expensive for large tables.
96
- """
97
- return self._table.get_schema(asof=None)
98
-
99
- def scan(
100
- self,
101
- *projections: ExprLike,
102
- where: ExprLike | None = None,
103
- asof: datetime | int | None = None,
104
- exclude_keys: bool = False,
105
- ) -> Scan:
106
- """Reads the table. If projections are not provided, the entire table is read."""
107
- if not projections:
108
- projections = [self]
109
-
110
- return self._tables.scan(*projections, where=where, asof=asof, exclude_keys=exclude_keys)
111
-
112
- def write(
113
- self,
114
- expr: ExprLike,
115
- *,
116
- partition_size_bytes: int | None = None,
117
- ) -> None:
118
- """Write an item to the table inside a single transaction.
119
-
120
- :param expr: The expression to write. Must evaluate to a struct array.
121
- :param partition_size_bytes: The maximum partition size in bytes.
122
- """
123
- with self.txn() as txn:
124
- txn.write(
125
- expr,
126
- partition_size_bytes=partition_size_bytes,
127
- )
128
-
129
- def snapshot(self, asof: datetime | int | None = None) -> Snapshot:
130
- """Returns a snapshot of the table at the given timestamp."""
131
- if isinstance(asof, datetime):
132
- asof = int(asof.timestamp() * 1_000_000)
133
- return Snapshot(self._tables, self._table.get_snapshot(asof=asof))
134
-
135
- def txn(self) -> Transaction:
136
- """Begins a new transaction. Transaction must be committed for writes to become visible.
137
-
138
- IMPORTANT: While transaction can be used to atomically write data to the table,
139
- it is important that the primary key columns are unique within the transaction.
140
- """
141
- return Transaction(self._tables._spiral.open_transaction(self._table, settings().file_format))
142
-
143
- def maintenance(self) -> Maintenance:
144
- """Access maintenance operations for a table."""
145
- return Maintenance(self._tables._spiral.open_maintenance(self._table, settings().file_format))
File without changes
File without changes
File without changes