pyspiral 0.4.4__cp310-abi3-macosx_11_0_arm64.whl → 0.6.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/METADATA +10 -5
- pyspiral-0.6.0.dist-info/RECORD +99 -0
- {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/WHEEL +1 -1
- spiral/__init__.py +10 -3
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +29 -11
- spiral/api/__init__.py +14 -0
- spiral/api/client.py +5 -1
- spiral/api/key_space_indexes.py +23 -0
- spiral/api/projects.py +17 -2
- spiral/api/text_indexes.py +56 -0
- spiral/api/types.py +2 -0
- spiral/api/workers.py +40 -0
- spiral/cli/__init__.py +15 -6
- spiral/cli/admin.py +2 -4
- spiral/cli/app.py +4 -2
- spiral/cli/fs.py +5 -6
- spiral/cli/iceberg.py +97 -0
- spiral/cli/key_spaces.py +68 -0
- spiral/cli/login.py +6 -7
- spiral/cli/orgs.py +7 -8
- spiral/cli/printer.py +3 -3
- spiral/cli/projects.py +5 -6
- spiral/cli/tables.py +131 -0
- spiral/cli/telemetry.py +3 -4
- spiral/cli/text.py +115 -0
- spiral/cli/types.py +3 -4
- spiral/cli/workloads.py +7 -8
- spiral/client.py +111 -8
- spiral/core/authn/__init__.pyi +27 -0
- spiral/core/client/__init__.pyi +135 -63
- spiral/core/table/__init__.pyi +36 -26
- spiral/core/table/metastore/__init__.pyi +0 -4
- spiral/core/table/spec/__init__.pyi +0 -2
- spiral/{tables/dataset.py → dataset.py} +13 -7
- spiral/{tables/debug → debug}/manifests.py +17 -6
- spiral/{tables/debug → debug}/scan.py +7 -7
- spiral/expressions/base.py +3 -3
- spiral/expressions/udf.py +1 -1
- spiral/{iceberg/client.py → iceberg.py} +1 -3
- spiral/key_space_index.py +44 -0
- spiral/project.py +171 -18
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1668 -1110
- spiral/protogen/_/google/protobuf/__init__.py +2190 -0
- spiral/protogen/_/message_pool.py +3 -0
- spiral/protogen/_/py.typed +0 -0
- spiral/protogen/_/scandal/__init__.py +138 -126
- spiral/protogen/_/spfs/__init__.py +72 -0
- spiral/protogen/_/spql/__init__.py +61 -0
- spiral/protogen/_/substrait/__init__.py +5256 -2459
- spiral/protogen/_/substrait/extensions/__init__.py +103 -49
- spiral/{tables/scan.py → scan.py} +37 -44
- spiral/settings.py +14 -3
- spiral/snapshot.py +55 -0
- spiral/streaming_/__init__.py +3 -0
- spiral/streaming_/reader.py +117 -0
- spiral/streaming_/stream.py +146 -0
- spiral/substrait_.py +9 -9
- spiral/table.py +257 -0
- spiral/text_index.py +17 -0
- spiral/{tables/transaction.py → transaction.py} +11 -15
- pyspiral-0.4.4.dist-info/RECORD +0 -98
- spiral/cli/iceberg/__init__.py +0 -7
- spiral/cli/iceberg/namespaces.py +0 -47
- spiral/cli/iceberg/tables.py +0 -60
- spiral/cli/indexes/__init__.py +0 -19
- spiral/cli/tables/__init__.py +0 -121
- spiral/core/index/__init__.pyi +0 -15
- spiral/iceberg/__init__.py +0 -3
- spiral/indexes/__init__.py +0 -5
- spiral/indexes/client.py +0 -137
- spiral/indexes/index.py +0 -34
- spiral/indexes/scan.py +0 -22
- spiral/protogen/_/spiral/table/__init__.py +0 -22
- spiral/protogen/substrait/__init__.py +0 -3399
- spiral/protogen/substrait/extensions/__init__.py +0 -115
- spiral/tables/__init__.py +0 -12
- spiral/tables/client.py +0 -130
- spiral/tables/maintenance.py +0 -12
- spiral/tables/snapshot.py +0 -78
- spiral/tables/table.py +0 -145
- {pyspiral-0.4.4.dist-info → pyspiral-0.6.0.dist-info}/entry_points.txt +0 -0
- /spiral/{protogen/_/spiral → debug}/__init__.py +0 -0
- /spiral/{tables/debug → debug}/metrics.py +0 -0
- /spiral/{tables/debug → protogen/_/google}/__init__.py +0 -0
@@ -1,12 +1,12 @@
|
|
1
1
|
from datetime import datetime
|
2
2
|
|
3
|
-
from spiral.core.table import
|
3
|
+
from spiral.core.table import Scan
|
4
4
|
from spiral.core.table.manifests import FragmentFile, FragmentManifest
|
5
5
|
from spiral.core.table.spec import Key
|
6
6
|
from spiral.types_ import Timestamp
|
7
7
|
|
8
8
|
|
9
|
-
def show_scan(scan:
|
9
|
+
def show_scan(scan: Scan):
|
10
10
|
"""Displays a scan in a way that is useful for debugging."""
|
11
11
|
table_ids = scan.table_ids()
|
12
12
|
if len(table_ids) > 1:
|
@@ -14,18 +14,18 @@ def show_scan(scan: TableScan):
|
|
14
14
|
table_id = table_ids[0]
|
15
15
|
column_groups = scan.column_groups()
|
16
16
|
|
17
|
-
splits = scan.
|
18
|
-
|
17
|
+
splits = scan.splits()
|
18
|
+
key_space_state = scan.key_space_state(table_id)
|
19
19
|
|
20
20
|
# Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
|
21
21
|
key_points = set()
|
22
|
-
key_space_manifest =
|
22
|
+
key_space_manifest = key_space_state.manifest
|
23
23
|
for i in range(len(key_space_manifest)):
|
24
24
|
fragment_file = key_space_manifest[i]
|
25
25
|
key_points.add(fragment_file.key_extent.min)
|
26
26
|
key_points.add(fragment_file.key_extent.max)
|
27
27
|
for cg in column_groups:
|
28
|
-
cg_scan = scan.
|
28
|
+
cg_scan = scan.column_group_state(cg)
|
29
29
|
cg_manifest = cg_scan.manifest
|
30
30
|
for i in range(len(cg_manifest)):
|
31
31
|
fragment_file = cg_manifest[i]
|
@@ -39,7 +39,7 @@ def show_scan(scan: TableScan):
|
|
39
39
|
|
40
40
|
show_manifest(key_space_manifest, scope="Key space", key_points=key_points, splits=splits)
|
41
41
|
for cg in scan.column_groups():
|
42
|
-
cg_scan = scan.
|
42
|
+
cg_scan = scan.column_group_state(cg)
|
43
43
|
# Skip table id from the start of the column group.
|
44
44
|
show_manifest(cg_scan.manifest, scope=".".join(cg.path[1:]), key_points=key_points, splits=splits)
|
45
45
|
|
spiral/expressions/base.py
CHANGED
@@ -132,7 +132,7 @@ class Expr:
|
|
132
132
|
packed = packed.select(exclude=exclude)
|
133
133
|
return packed
|
134
134
|
|
135
|
-
if not paths:
|
135
|
+
if not paths and not exclude:
|
136
136
|
return self
|
137
137
|
|
138
138
|
return se.select(self, names=list(paths), exclude=exclude)
|
@@ -145,5 +145,5 @@ class Expr:
|
|
145
145
|
return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
146
|
|
147
147
|
|
148
|
-
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
149
|
-
ExprLike: TypeAlias = Expr | dict | ScalarLike
|
148
|
+
ScalarLike: TypeAlias = bool | int | float | str | list["ScalarLike"] | datetime.datetime | None
|
149
|
+
ExprLike: TypeAlias = Expr | dict["ExprLike", "ExprLike"] | ScalarLike
|
spiral/expressions/udf.py
CHANGED
@@ -38,7 +38,7 @@ class RefUDF(BaseUDF):
|
|
38
38
|
super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
|
39
39
|
|
40
40
|
@abc.abstractmethod
|
41
|
-
def invoke(self, fp
|
41
|
+
def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
|
42
42
|
"""Invoke the UDF with the given arguments.
|
43
43
|
|
44
44
|
NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
|
@@ -13,10 +13,8 @@ class Iceberg:
|
|
13
13
|
and ACID transactions to your warehouse.
|
14
14
|
"""
|
15
15
|
|
16
|
-
def __init__(self, spiral: "Spiral"
|
16
|
+
def __init__(self, spiral: "Spiral"):
|
17
17
|
self._spiral = spiral
|
18
|
-
self._project_id = project_id
|
19
|
-
|
20
18
|
self._api = self._spiral.config.api
|
21
19
|
|
22
20
|
def catalog(self) -> "Catalog":
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from spiral.core.client import KeySpaceIndex as CoreKeySpaceIndex
|
2
|
+
from spiral.expressions import Expr
|
3
|
+
from spiral.types_ import Timestamp
|
4
|
+
|
5
|
+
|
6
|
+
class KeySpaceIndex:
|
7
|
+
"""
|
8
|
+
KeysIndex represents an optionally materialized key space, defined by a projection and a filter over a table.
|
9
|
+
It can be used to efficiently and precisely shard the table for parallel processing or distributed training.
|
10
|
+
|
11
|
+
An index is defined by:
|
12
|
+
- A granularity that defines the target size of key ranges in the index.
|
13
|
+
IMPORTANT: Actual key ranges may be smaller, but will not exceed twice the granularity.
|
14
|
+
- A projection expression that defines which columns are included in the resulting key space.
|
15
|
+
- An optional filter expression that defines which rows are included in the index.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self, core: CoreKeySpaceIndex, *, name: str | None = None):
|
19
|
+
self.core = core
|
20
|
+
self._name = name
|
21
|
+
|
22
|
+
@property
|
23
|
+
def index_id(self) -> str:
|
24
|
+
return self.core.id
|
25
|
+
|
26
|
+
@property
|
27
|
+
def table_id(self) -> str:
|
28
|
+
return self.core.table_id
|
29
|
+
|
30
|
+
@property
|
31
|
+
def name(self) -> str:
|
32
|
+
return self._name or self.index_id
|
33
|
+
|
34
|
+
@property
|
35
|
+
def asof(self) -> Timestamp:
|
36
|
+
return self.core.asof
|
37
|
+
|
38
|
+
@property
|
39
|
+
def projection(self) -> Expr:
|
40
|
+
return Expr(self.core.projection)
|
41
|
+
|
42
|
+
@property
|
43
|
+
def filter(self) -> Expr | None:
|
44
|
+
return Expr(self.core.filter) if self.core.filter is not None else None
|
spiral/project.py
CHANGED
@@ -1,16 +1,23 @@
|
|
1
|
-
from typing import TYPE_CHECKING
|
1
|
+
from typing import TYPE_CHECKING, Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
from spiral.api.projects import KeySpaceIndexResource, TableResource, TextIndexResource
|
6
|
+
from spiral.core.table.spec import Schema
|
7
|
+
from spiral.expressions import ExprLike
|
8
|
+
from spiral.key_space_index import KeySpaceIndex
|
9
|
+
from spiral.table import Table
|
10
|
+
from spiral.text_index import TextIndex
|
11
|
+
from spiral.types_ import Uri
|
2
12
|
|
3
13
|
if TYPE_CHECKING:
|
4
14
|
from spiral.client import Spiral
|
5
|
-
from spiral.iceberg import Iceberg
|
6
|
-
from spiral.indexes import Indexes
|
7
|
-
from spiral.tables import Tables
|
8
15
|
|
9
16
|
|
10
17
|
class Project:
|
11
|
-
def __init__(self, spiral: "Spiral",
|
18
|
+
def __init__(self, spiral: "Spiral", project_id: str, name: str | None = None):
|
12
19
|
self._spiral = spiral
|
13
|
-
self._id =
|
20
|
+
self._id = project_id
|
14
21
|
self._name = name
|
15
22
|
|
16
23
|
def __str__(self):
|
@@ -27,20 +34,166 @@ class Project:
|
|
27
34
|
def name(self) -> str:
|
28
35
|
return self._name or self._id
|
29
36
|
|
30
|
-
|
31
|
-
|
32
|
-
from spiral.tables import Tables
|
37
|
+
def list_tables(self) -> list[TableResource]:
|
38
|
+
return list(self._spiral.api.project.list_tables(self._id))
|
33
39
|
|
34
|
-
|
40
|
+
def list_text_indexes(self) -> list[TextIndexResource]:
|
41
|
+
return list(self._spiral.api.project.list_text_indexes(self._id))
|
35
42
|
|
36
|
-
|
37
|
-
|
38
|
-
from spiral.indexes.client import Indexes
|
43
|
+
def list_key_space_indexes(self) -> list[KeySpaceIndexResource]:
|
44
|
+
return list(self._spiral.api.project.list_key_space_indexes(self._id))
|
39
45
|
|
40
|
-
|
46
|
+
def table(self, identifier: str) -> Table:
|
47
|
+
"""Open a table with a `dataset.table` identifier, or `table` name using the `default` dataset."""
|
48
|
+
dataset, table = self._parse_table_identifier(identifier)
|
41
49
|
|
42
|
-
|
43
|
-
|
44
|
-
|
50
|
+
res = list(self._spiral.api.project.list_tables(project_id=self._id, dataset=dataset, table=table))
|
51
|
+
if len(res) == 0:
|
52
|
+
raise ValueError(f"Table not found: {self._id}.{dataset}.{table}")
|
53
|
+
res = res[0]
|
54
|
+
|
55
|
+
return Table(
|
56
|
+
self._spiral, self._spiral._core.table(res.id), identifier=f"{res.project_id}.{res.dataset}.{res.table}"
|
57
|
+
)
|
58
|
+
|
59
|
+
def create_table(
|
60
|
+
self,
|
61
|
+
identifier: str,
|
62
|
+
*,
|
63
|
+
key_schema: pa.Schema | Any,
|
64
|
+
root_uri: Uri | None = None,
|
65
|
+
exist_ok: bool = False,
|
66
|
+
) -> Table:
|
67
|
+
"""Create a new table in the project.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
identifier: The table identifier, in the form `dataset.table` or `table`.
|
71
|
+
key_schema: The schema of the table's keys.
|
72
|
+
root_uri: The root URI for the table.
|
73
|
+
exist_ok: If True, do not raise an error if the table already exists.
|
74
|
+
"""
|
75
|
+
dataset, table = self._parse_table_identifier(identifier)
|
76
|
+
|
77
|
+
if not isinstance(key_schema, pa.Schema):
|
78
|
+
key_schema = pa.schema(key_schema)
|
79
|
+
key_schema = Schema.from_arrow(key_schema)
|
80
|
+
|
81
|
+
core_table = self._spiral._core.create_table(
|
82
|
+
project_id=self._id,
|
83
|
+
dataset=dataset,
|
84
|
+
table=table,
|
85
|
+
key_schema=key_schema,
|
86
|
+
root_uri=root_uri,
|
87
|
+
exist_ok=exist_ok,
|
88
|
+
)
|
89
|
+
|
90
|
+
return Table(self._spiral, core_table, identifier=f"{self._id}.{dataset}.{table}")
|
91
|
+
|
92
|
+
def _parse_table_identifier(self, identifier: str) -> tuple[str, str]:
|
93
|
+
parts = identifier.split(".")
|
94
|
+
if len(parts) == 1:
|
95
|
+
return "default", parts[0]
|
96
|
+
elif len(parts) == 2:
|
97
|
+
return parts[0], parts[1]
|
98
|
+
else:
|
99
|
+
raise ValueError(f"Invalid table identifier: {self._id}.{identifier}")
|
100
|
+
|
101
|
+
def text_index(self, name: str) -> TextIndex:
|
102
|
+
"""Returns the index with the given name."""
|
103
|
+
res = list(self._spiral.api.project.list_text_indexes(project_id=self._id, name=name))
|
104
|
+
if len(res) == 0:
|
105
|
+
raise ValueError(f"Index not found: {name}")
|
106
|
+
res = res[0]
|
107
|
+
|
108
|
+
return TextIndex(self._spiral._core.text_index(res.id), name=name)
|
109
|
+
|
110
|
+
def create_text_index(
|
111
|
+
self,
|
112
|
+
name: str,
|
113
|
+
*projections: ExprLike,
|
114
|
+
where: ExprLike | None = None,
|
115
|
+
root_uri: Uri | None = None,
|
116
|
+
exist_ok: bool = False,
|
117
|
+
) -> TextIndex:
|
118
|
+
"""Creates a text index over the table projection.
|
119
|
+
|
120
|
+
See `se.text.field` for how to create and configure indexable fields.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
name: The index name. Must be unique within the project.
|
124
|
+
projections: At least one projection expression is required.
|
125
|
+
All projections must reference the same table.
|
126
|
+
where: An optional filter expression to apply to the index.
|
127
|
+
root_uri: The root URI for the index.
|
128
|
+
exist_ok: If True, do not raise an error if the index already exists.
|
129
|
+
"""
|
130
|
+
from spiral import expressions as se
|
131
|
+
|
132
|
+
if not projections:
|
133
|
+
raise ValueError("At least one projection is required.")
|
134
|
+
projection = se.merge(*projections)
|
135
|
+
if where is not None:
|
136
|
+
where = se.lift(where)
|
137
|
+
|
138
|
+
core_index = self._spiral._core.create_text_index(
|
139
|
+
project_id=self._id,
|
140
|
+
name=name,
|
141
|
+
projection=projection.__expr__,
|
142
|
+
filter=where.__expr__ if where else None,
|
143
|
+
root_uri=root_uri,
|
144
|
+
# TODO(marko): Validate that if an index exists, it's the same?
|
145
|
+
exist_ok=exist_ok,
|
146
|
+
)
|
147
|
+
|
148
|
+
return TextIndex(core_index, name=name)
|
149
|
+
|
150
|
+
def key_space_index(self, name: str) -> KeySpaceIndex:
|
151
|
+
"""Returns the index with the given name."""
|
152
|
+
res = list(self._spiral.api.project.list_key_space_indexes(project_id=self._id, name=name))
|
153
|
+
if len(res) == 0:
|
154
|
+
raise ValueError(f"Index not found: {name}")
|
155
|
+
res = res[0]
|
156
|
+
|
157
|
+
return KeySpaceIndex(self._spiral._core.key_space_index(res.id), name=name)
|
158
|
+
|
159
|
+
def create_key_space_index(
|
160
|
+
self,
|
161
|
+
name: str,
|
162
|
+
granularity: int,
|
163
|
+
*projections: ExprLike,
|
164
|
+
where: ExprLike | None = None,
|
165
|
+
root_uri: Uri | None = None,
|
166
|
+
exist_ok: bool = False,
|
167
|
+
) -> KeySpaceIndex:
|
168
|
+
"""Creates a key space index over the table projection.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
name: The index name. Must be unique within the project.
|
172
|
+
granularity: The granularity at which to store keys, i.e. the size of desired key ranges.
|
173
|
+
The key ranges will not be greater than 2x the granularity, but may be smaller.
|
174
|
+
projections: At least one projection expression is required.
|
175
|
+
All projections must reference the same table.
|
176
|
+
where: An optional filter expression to apply to the index.
|
177
|
+
root_uri: The root URI for the index.
|
178
|
+
exist_ok: If True, do not raise an error if the index already exists.
|
179
|
+
"""
|
180
|
+
from spiral import expressions as se
|
181
|
+
|
182
|
+
if not projections:
|
183
|
+
raise ValueError("At least one projection is required.")
|
184
|
+
projection = se.merge(*projections)
|
185
|
+
if where is not None:
|
186
|
+
where = se.lift(where)
|
187
|
+
|
188
|
+
core_index = self._spiral._core.create_key_space_index(
|
189
|
+
project_id=self._id,
|
190
|
+
name=name,
|
191
|
+
granularity=granularity,
|
192
|
+
projection=projection.__expr__,
|
193
|
+
filter=where.__expr__ if where else None,
|
194
|
+
root_uri=root_uri,
|
195
|
+
# TODO(marko): Validate that if an index exists, it's the same?
|
196
|
+
exist_ok=exist_ok,
|
197
|
+
)
|
45
198
|
|
46
|
-
return
|
199
|
+
return KeySpaceIndex(core_index, name=name)
|