pyspiral 0.5.0__cp310-abi3-macosx_11_0_arm64.whl → 0.6.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.5.0.dist-info → pyspiral-0.6.0.dist-info}/METADATA +7 -3
- pyspiral-0.6.0.dist-info/RECORD +99 -0
- {pyspiral-0.5.0.dist-info → pyspiral-0.6.0.dist-info}/WHEEL +1 -1
- spiral/__init__.py +10 -3
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +6 -6
- spiral/api/__init__.py +8 -2
- spiral/api/client.py +1 -1
- spiral/api/key_space_indexes.py +23 -0
- spiral/api/projects.py +15 -0
- spiral/api/text_indexes.py +1 -1
- spiral/cli/__init__.py +15 -6
- spiral/cli/admin.py +2 -4
- spiral/cli/app.py +4 -2
- spiral/cli/fs.py +5 -6
- spiral/cli/iceberg.py +97 -0
- spiral/cli/key_spaces.py +68 -0
- spiral/cli/login.py +6 -7
- spiral/cli/orgs.py +7 -8
- spiral/cli/printer.py +3 -3
- spiral/cli/projects.py +5 -6
- spiral/cli/tables.py +131 -0
- spiral/cli/telemetry.py +3 -4
- spiral/cli/text.py +115 -0
- spiral/cli/types.py +3 -4
- spiral/cli/workloads.py +7 -8
- spiral/client.py +111 -8
- spiral/core/authn/__init__.pyi +27 -0
- spiral/core/client/__init__.pyi +135 -63
- spiral/core/table/__init__.pyi +34 -24
- spiral/core/table/metastore/__init__.pyi +0 -4
- spiral/core/table/spec/__init__.pyi +0 -2
- spiral/{tables/dataset.py → dataset.py} +13 -7
- spiral/{tables/debug → debug}/manifests.py +17 -6
- spiral/{tables/debug → debug}/scan.py +3 -3
- spiral/expressions/base.py +3 -3
- spiral/expressions/udf.py +1 -1
- spiral/{iceberg/client.py → iceberg.py} +1 -3
- spiral/key_space_index.py +44 -0
- spiral/project.py +171 -18
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1668 -1110
- spiral/protogen/_/google/protobuf/__init__.py +2190 -0
- spiral/protogen/_/message_pool.py +3 -0
- spiral/protogen/_/py.typed +0 -0
- spiral/protogen/_/scandal/__init__.py +138 -126
- spiral/protogen/_/spfs/__init__.py +72 -0
- spiral/protogen/_/spql/__init__.py +61 -0
- spiral/protogen/_/substrait/__init__.py +5256 -2459
- spiral/protogen/_/substrait/extensions/__init__.py +103 -49
- spiral/{tables/scan.py → scan.py} +37 -44
- spiral/settings.py +14 -3
- spiral/snapshot.py +55 -0
- spiral/streaming_/__init__.py +3 -0
- spiral/streaming_/reader.py +117 -0
- spiral/streaming_/stream.py +146 -0
- spiral/substrait_.py +9 -9
- spiral/table.py +257 -0
- spiral/text_index.py +17 -0
- spiral/{tables/transaction.py → transaction.py} +11 -15
- pyspiral-0.5.0.dist-info/RECORD +0 -103
- spiral/cli/iceberg/__init__.py +0 -7
- spiral/cli/iceberg/namespaces.py +0 -47
- spiral/cli/iceberg/tables.py +0 -60
- spiral/cli/indexes/__init__.py +0 -40
- spiral/cli/indexes/args.py +0 -39
- spiral/cli/indexes/workers.py +0 -59
- spiral/cli/tables/__init__.py +0 -88
- spiral/cli/tables/args.py +0 -42
- spiral/core/index/__init__.pyi +0 -7
- spiral/iceberg/__init__.py +0 -3
- spiral/indexes/__init__.py +0 -5
- spiral/indexes/client.py +0 -137
- spiral/indexes/index.py +0 -28
- spiral/indexes/scan.py +0 -22
- spiral/protogen/_/spiral/table/__init__.py +0 -22
- spiral/protogen/substrait/__init__.py +0 -3399
- spiral/protogen/substrait/extensions/__init__.py +0 -115
- spiral/tables/__init__.py +0 -12
- spiral/tables/client.py +0 -133
- spiral/tables/maintenance.py +0 -12
- spiral/tables/snapshot.py +0 -78
- spiral/tables/table.py +0 -145
- {pyspiral-0.5.0.dist-info → pyspiral-0.6.0.dist-info}/entry_points.txt +0 -0
- /spiral/{protogen/_/spiral → debug}/__init__.py +0 -0
- /spiral/{tables/debug → debug}/metrics.py +0 -0
- /spiral/{tables/debug → protogen/_/google}/__init__.py +0 -0
spiral/core/client/__init__.pyi
CHANGED
@@ -1,34 +1,11 @@
|
|
1
|
-
from
|
2
|
-
from spiral.core.index import SearchScan, TextIndex
|
3
|
-
from spiral.core.table import Table, TableMaintenance, TableScan, TableSnapshot, TableTransaction
|
4
|
-
from spiral.core.table.spec import Schema
|
5
|
-
from spiral.expressions import Expr
|
6
|
-
|
7
|
-
class Token:
|
8
|
-
def __init__(self, value: str): ...
|
9
|
-
def expose_secret(self) -> str: ...
|
10
|
-
|
11
|
-
class Authn:
|
12
|
-
@staticmethod
|
13
|
-
def from_token(token: Token) -> Authn: ...
|
14
|
-
@staticmethod
|
15
|
-
def from_fallback() -> Authn: ...
|
16
|
-
@staticmethod
|
17
|
-
def from_device() -> Authn: ...
|
18
|
-
def token(self) -> Token | None: ...
|
19
|
-
|
20
|
-
class DeviceCodeAuth:
|
21
|
-
@staticmethod
|
22
|
-
def default() -> DeviceCodeAuth:
|
23
|
-
"""Return the static device code instance."""
|
24
|
-
...
|
25
|
-
def authenticate(self, force: bool = False, org_id: OrgId | None = None) -> Token:
|
26
|
-
"""Authenticate using device code flow."""
|
27
|
-
...
|
1
|
+
from typing import Any, Literal
|
28
2
|
|
29
|
-
|
30
|
-
|
31
|
-
|
3
|
+
import pyarrow as pa
|
4
|
+
from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
|
5
|
+
from spiral.core.authn import Authn
|
6
|
+
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
|
7
|
+
from spiral.core.table.spec import ColumnGroup, Schema
|
8
|
+
from spiral.expressions import Expr
|
32
9
|
|
33
10
|
class Spiral:
|
34
11
|
"""A client for Spiral database"""
|
@@ -43,6 +20,39 @@ class Spiral:
|
|
43
20
|
def authn(self) -> Authn:
|
44
21
|
"""Get the current authentication context."""
|
45
22
|
...
|
23
|
+
|
24
|
+
def scan(
|
25
|
+
self,
|
26
|
+
projection: Expr,
|
27
|
+
filter: Expr | None = None,
|
28
|
+
asof: int | None = None,
|
29
|
+
exclude_keys: bool = False,
|
30
|
+
) -> Scan:
|
31
|
+
"""Construct a table scan."""
|
32
|
+
...
|
33
|
+
|
34
|
+
def transaction(self, table: Table, format: str | None = None) -> Transaction:
|
35
|
+
"""Being a table transaction."""
|
36
|
+
...
|
37
|
+
|
38
|
+
def search(
|
39
|
+
self,
|
40
|
+
top_k: int,
|
41
|
+
rank_by: Expr,
|
42
|
+
*,
|
43
|
+
filters: Expr | None = None,
|
44
|
+
freshness_window_s: int | None = None,
|
45
|
+
) -> pa.RecordBatchReader:
|
46
|
+
"""Search an index.
|
47
|
+
|
48
|
+
Searching an index returns a stream of record batches that match table's key schema + float score column.
|
49
|
+
"""
|
50
|
+
...
|
51
|
+
|
52
|
+
def table(self, table_id: str) -> Table:
|
53
|
+
"""Get a table."""
|
54
|
+
...
|
55
|
+
|
46
56
|
def create_table(
|
47
57
|
self,
|
48
58
|
project_id: ProjectId,
|
@@ -56,63 +66,125 @@ class Spiral:
|
|
56
66
|
"""Create a new table in the specified project."""
|
57
67
|
...
|
58
68
|
|
59
|
-
def
|
60
|
-
"""Get
|
61
|
-
|
62
|
-
def open_table(self, table_id: str, key_schema: Schema, root_uri: RootUri) -> Table:
|
63
|
-
"""Open a table. This does not make any network calls."""
|
69
|
+
def text_index(self, index_id: str) -> TextIndex:
|
70
|
+
"""Get a text index."""
|
64
71
|
...
|
65
72
|
|
66
|
-
def
|
73
|
+
def create_text_index(
|
67
74
|
self,
|
75
|
+
project_id: ProjectId,
|
76
|
+
name: IndexName,
|
68
77
|
projection: Expr,
|
69
78
|
filter: Expr | None = None,
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
79
|
+
*,
|
80
|
+
root_uri: RootUri | None = None,
|
81
|
+
exist_ok: bool = False,
|
82
|
+
) -> TextIndex:
|
83
|
+
"""Create a new index in the specified project."""
|
74
84
|
...
|
75
85
|
|
76
|
-
def
|
77
|
-
"""
|
86
|
+
def key_space_index(self, index_id: str) -> KeySpaceIndex:
|
87
|
+
"""Get a key space index."""
|
78
88
|
...
|
79
89
|
|
80
|
-
def
|
81
|
-
"""Access maintenance operations for a table."""
|
82
|
-
...
|
83
|
-
def create_text_index(
|
90
|
+
def create_key_space_index(
|
84
91
|
self,
|
85
92
|
project_id: ProjectId,
|
86
93
|
name: IndexName,
|
94
|
+
granularity: int,
|
87
95
|
projection: Expr,
|
88
96
|
filter: Expr | None = None,
|
89
97
|
*,
|
90
98
|
root_uri: RootUri | None = None,
|
91
99
|
exist_ok: bool = False,
|
92
|
-
) ->
|
93
|
-
"""Create a new index in the specified project."""
|
100
|
+
) -> KeySpaceIndex:
|
101
|
+
"""Create a new key space index in the specified project."""
|
94
102
|
...
|
95
103
|
|
96
|
-
def
|
97
|
-
"""
|
104
|
+
def _ops(self, *, format: str | None = None) -> Operations:
|
105
|
+
"""Access maintenance operations.
|
106
|
+
|
107
|
+
IMPORTANT: This API is internal and is currently exposed for development & testing.
|
108
|
+
Maintenance operations are run by SpiralDB.
|
109
|
+
"""
|
98
110
|
...
|
99
111
|
|
100
|
-
|
112
|
+
class TextIndex:
|
113
|
+
id: str
|
114
|
+
|
115
|
+
class KeySpaceIndex:
|
116
|
+
id: str
|
117
|
+
table_id: str
|
118
|
+
granularity: int
|
119
|
+
projection: Expr
|
120
|
+
filter: Expr
|
121
|
+
asof: int
|
122
|
+
|
123
|
+
class Shard:
|
124
|
+
key_range: KeyRange
|
125
|
+
cardinality: int
|
126
|
+
|
127
|
+
class Operations:
|
128
|
+
def flush_wal(self, table: Table) -> None:
|
129
|
+
"""
|
130
|
+
Flush the write-ahead log of the table.
|
131
|
+
"""
|
132
|
+
...
|
133
|
+
def compact_key_space(
|
101
134
|
self,
|
102
|
-
rank_by: Expr,
|
103
|
-
top_k: int,
|
104
|
-
# NOTE(marko): Required for now.
|
105
|
-
freshness_window_s: int,
|
106
135
|
*,
|
107
|
-
|
108
|
-
|
109
|
-
|
136
|
+
table: Table,
|
137
|
+
mode: Literal["plan", "read", "write"] | None = None,
|
138
|
+
partition_bytes_min: int | None = None,
|
139
|
+
):
|
140
|
+
"""
|
141
|
+
Compact the key space of the table.
|
142
|
+
"""
|
110
143
|
...
|
111
|
-
|
112
|
-
|
144
|
+
def compact_column_group(
|
145
|
+
self,
|
146
|
+
table: Table,
|
147
|
+
column_group: ColumnGroup,
|
148
|
+
*,
|
149
|
+
mode: Literal["plan", "read", "write"] | None = None,
|
150
|
+
partition_bytes_min: int | None = None,
|
151
|
+
):
|
113
152
|
"""
|
114
|
-
|
115
|
-
|
116
|
-
|
153
|
+
Compact a column group in the table.
|
154
|
+
"""
|
155
|
+
...
|
156
|
+
def update_text_index(self, index: TextIndex, snapshot: Snapshot) -> None:
|
157
|
+
"""
|
158
|
+
Index table changes up to the given snapshot.
|
159
|
+
"""
|
160
|
+
...
|
161
|
+
def update_key_space_index(self, index: KeySpaceIndex, snapshot: Snapshot) -> None:
|
162
|
+
"""
|
163
|
+
Index table changes up to the given snapshot.
|
164
|
+
"""
|
165
|
+
...
|
166
|
+
def key_space_state(self, table: Table, *, asof: int | None = None) -> KeySpaceState:
|
167
|
+
"""
|
168
|
+
The key space state for the table.
|
169
|
+
"""
|
170
|
+
...
|
171
|
+
def column_group_state(
|
172
|
+
self, table: Table, key_space_state: KeySpaceState, column_group: ColumnGroup, *, asof: int | None = None
|
173
|
+
) -> ColumnGroupState:
|
174
|
+
"""
|
175
|
+
The state the column group of the table.
|
176
|
+
"""
|
177
|
+
...
|
178
|
+
def column_groups_states(
|
179
|
+
self, table: Table, key_space_state: KeySpaceState, *, asof: int | None = None
|
180
|
+
) -> list[ColumnGroupState]:
|
181
|
+
"""
|
182
|
+
The state of each column group of the table.
|
183
|
+
"""
|
184
|
+
...
|
185
|
+
def compute_shards(self, index: KeySpaceIndex) -> list[Shard]:
|
186
|
+
"""
|
187
|
+
Compute the scan shards from a key space index.
|
117
188
|
"""
|
118
189
|
...
|
190
|
+
def metrics(self) -> dict[str, Any]: ...
|
spiral/core/table/__init__.pyi
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
2
|
|
3
3
|
import pyarrow as pa
|
4
4
|
from spiral.expressions import Expr
|
@@ -42,22 +42,24 @@ class Table:
|
|
42
42
|
|
43
43
|
def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
|
44
44
|
def get_schema(self, *, asof: int | None) -> Schema: ...
|
45
|
-
def get_snapshot(self, *, asof: int | None) ->
|
45
|
+
def get_snapshot(self, *, asof: int | None) -> Snapshot: ...
|
46
46
|
|
47
|
-
class
|
47
|
+
class Snapshot:
|
48
48
|
"""A snapshot of a table at a specific point in time."""
|
49
49
|
|
50
50
|
asof: int
|
51
51
|
table: Table
|
52
52
|
wal: WriteAheadLog
|
53
53
|
|
54
|
-
class
|
54
|
+
class Scan:
|
55
55
|
def key_schema(self) -> Schema: ...
|
56
56
|
def schema(self) -> Schema: ...
|
57
57
|
def is_empty(self) -> bool: ...
|
58
|
-
def
|
58
|
+
def splits(self) -> list[KeyRange]: ...
|
59
59
|
def table_ids(self) -> list[str]: ...
|
60
60
|
def column_groups(self) -> list[ColumnGroup]: ...
|
61
|
+
def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
|
62
|
+
def key_space_state(self, table_id: str) -> KeySpaceState: ...
|
61
63
|
def to_record_batches(
|
62
64
|
self,
|
63
65
|
key_table: pa.Table | pa.RecordBatch | None = None,
|
@@ -65,13 +67,16 @@ class TableScan:
|
|
65
67
|
) -> pa.RecordBatchReader: ...
|
66
68
|
def to_shuffled_record_batches(
|
67
69
|
self,
|
70
|
+
strategy: ShuffleStrategy | None = None,
|
68
71
|
batch_readahead: int | None = None,
|
69
|
-
shuffle_buffer_size: int | None = None,
|
70
|
-
shuffle_pool_num_rows: int | None = None,
|
71
72
|
) -> pa.RecordBatchReader: ...
|
72
|
-
def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
|
73
|
-
def key_space_state(self, table_id: str) -> KeySpaceState: ...
|
74
73
|
def metrics(self) -> dict[str, Any]: ...
|
74
|
+
def _prepare_shard(
|
75
|
+
self,
|
76
|
+
output_path: str,
|
77
|
+
key_range: KeyRange,
|
78
|
+
expected_cardinality: int | None = None,
|
79
|
+
) -> None: ...
|
75
80
|
|
76
81
|
class KeySpaceState:
|
77
82
|
manifest: FragmentManifest
|
@@ -80,10 +85,11 @@ class KeySpaceState:
|
|
80
85
|
|
81
86
|
class ColumnGroupState:
|
82
87
|
manifest: FragmentManifest
|
88
|
+
column_group: ColumnGroup
|
83
89
|
|
84
90
|
def schema(self) -> Schema: ...
|
85
91
|
|
86
|
-
class
|
92
|
+
class Transaction:
|
87
93
|
status: str
|
88
94
|
|
89
95
|
def write(self, expr: Expr, *, partition_size_bytes: int | None = None): ...
|
@@ -91,19 +97,23 @@ class TableTransaction:
|
|
91
97
|
def abort(self): ...
|
92
98
|
def metrics(self) -> dict[str, Any]: ...
|
93
99
|
|
94
|
-
class
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
100
|
+
class ShuffleStrategy:
|
101
|
+
# Results are buffered in a pool of `buffer_size` rows and shuffled again.
|
102
|
+
shuffle_buffer_size: int
|
103
|
+
|
104
|
+
# All randomness is derived from this seed. If None, a random seed is generated from the OS.
|
105
|
+
seed: int | None
|
106
|
+
|
107
|
+
# `approximate_batch_size` controls the maximum approximate size of each shard. Shards that
|
108
|
+
# are larger than this size are further split assuming uniform distribution of keys. Note
|
109
|
+
# that this is a best-effort and can be widely off. The purpose of this is to improve
|
110
|
+
# shuffling, rather than to support sharding. If not present, splits derived from the table
|
111
|
+
# are used in the attempt to minimize wasted reads.
|
112
|
+
approximate_buffer_size: int | None
|
113
|
+
|
114
|
+
def __init__(
|
103
115
|
self,
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
partition_bytes_min: int | None = None,
|
116
|
+
seed: int | None = None,
|
117
|
+
shard_size: int | None = None,
|
118
|
+
buffer_size: int | None = None,
|
108
119
|
): ...
|
109
|
-
def metrics(self) -> dict[str, Any]: ...
|
@@ -56,7 +56,3 @@ class PyMetastore:
|
|
56
56
|
authn: Authn,
|
57
57
|
) -> PyMetastore:
|
58
58
|
"""Construct a PyMetastore backed by an HTTP metastore service."""
|
59
|
-
|
60
|
-
@staticmethod
|
61
|
-
def test(table_id: str, root_uri: str, key_schema: Schema) -> PyMetastore:
|
62
|
-
"""Construct a PyMetastore backed by an in-memory mock metastore service."""
|
@@ -4,14 +4,15 @@ import pyarrow as pa
|
|
4
4
|
import pyarrow.compute as pc
|
5
5
|
import pyarrow.dataset as ds
|
6
6
|
|
7
|
-
from spiral.
|
7
|
+
from spiral.scan import Scan
|
8
|
+
from spiral.snapshot import Snapshot
|
8
9
|
|
9
10
|
|
10
|
-
class
|
11
|
+
class Dataset(ds.Dataset):
|
11
12
|
def __init__(self, snapshot: Snapshot):
|
12
13
|
self._snapshot = snapshot
|
13
14
|
self._table = snapshot.table
|
14
|
-
self._schema: pa.Schema = self._snapshot.
|
15
|
+
self._schema: pa.Schema = self._snapshot.schema().to_arrow()
|
15
16
|
|
16
17
|
# We don't actually initialize a Dataset, we just implement enough of the API
|
17
18
|
# to fool both DuckDB and Polars.
|
@@ -43,7 +44,7 @@ class TableDataset(ds.Dataset):
|
|
43
44
|
memory_pool,
|
44
45
|
).count_rows()
|
45
46
|
|
46
|
-
def filter(self, expression: pc.Expression) -> "
|
47
|
+
def filter(self, expression: pc.Expression) -> "Dataset":
|
47
48
|
raise NotImplementedError("filter not implemented")
|
48
49
|
|
49
50
|
def get_fragments(self, filter: pc.Expression | None = None):
|
@@ -89,7 +90,7 @@ class TableDataset(ds.Dataset):
|
|
89
90
|
def join_asof(self, right_dataset, on, by, tolerance, right_on=None, right_by=None):
|
90
91
|
raise NotImplementedError("join_asof not implemented")
|
91
92
|
|
92
|
-
def replace_schema(self, schema: pa.Schema) -> "
|
93
|
+
def replace_schema(self, schema: pa.Schema) -> "Dataset":
|
93
94
|
raise NotImplementedError("replace_schema not implemented")
|
94
95
|
|
95
96
|
def scanner(
|
@@ -112,13 +113,18 @@ class TableDataset(ds.Dataset):
|
|
112
113
|
)
|
113
114
|
|
114
115
|
scan = (
|
115
|
-
self.
|
116
|
+
self._table.spiral.scan(
|
116
117
|
{c: self._table[c] for c in columns},
|
117
118
|
where=filter,
|
119
|
+
asof=self._snapshot.asof,
|
118
120
|
exclude_keys=True,
|
119
121
|
)
|
120
122
|
if columns
|
121
|
-
else self.
|
123
|
+
else self._table.spiral.scan(
|
124
|
+
self._table,
|
125
|
+
where=filter,
|
126
|
+
asof=self._snapshot.asof,
|
127
|
+
)
|
122
128
|
)
|
123
129
|
self._last_scan = scan
|
124
130
|
|
@@ -1,23 +1,34 @@
|
|
1
|
+
from collections.abc import Iterable
|
2
|
+
|
1
3
|
from spiral import datetime_
|
2
|
-
from spiral.core.table import
|
4
|
+
from spiral.core.table import Scan
|
3
5
|
from spiral.core.table.manifests import FragmentManifest
|
4
|
-
from spiral.
|
6
|
+
from spiral.core.table.spec import ColumnGroup
|
7
|
+
from spiral.debug.metrics import _format_bytes
|
5
8
|
|
6
9
|
|
7
|
-
def
|
10
|
+
def display_scan_manifests(scan: Scan):
|
8
11
|
"""Display all manifests in a scan."""
|
9
12
|
if len(scan.table_ids()) != 1:
|
10
13
|
raise NotImplementedError("Multiple table scans are not supported.")
|
11
14
|
table_id = scan.table_ids()[0]
|
15
|
+
key_space_manifest = scan.key_space_state(table_id).manifest
|
16
|
+
column_group_manifests = (
|
17
|
+
(column_group, scan.column_group_state(column_group).manifest) for column_group in scan.column_groups()
|
18
|
+
)
|
19
|
+
|
20
|
+
display_manifests(key_space_manifest, column_group_manifests)
|
21
|
+
|
12
22
|
|
13
|
-
|
23
|
+
def display_manifests(
|
24
|
+
key_space_manifest: FragmentManifest, column_group_manifests: Iterable[tuple[ColumnGroup, FragmentManifest]]
|
25
|
+
):
|
14
26
|
_table_of_fragments(
|
15
27
|
key_space_manifest,
|
16
28
|
title="Key Space manifest",
|
17
29
|
)
|
18
30
|
|
19
|
-
for column_group in
|
20
|
-
column_group_manifest: FragmentManifest = scan.column_group_state(column_group).manifest
|
31
|
+
for column_group, column_group_manifest in column_group_manifests:
|
21
32
|
_table_of_fragments(
|
22
33
|
column_group_manifest,
|
23
34
|
title=f"Column Group manifest for {str(column_group)}",
|
@@ -1,12 +1,12 @@
|
|
1
1
|
from datetime import datetime
|
2
2
|
|
3
|
-
from spiral.core.table import
|
3
|
+
from spiral.core.table import Scan
|
4
4
|
from spiral.core.table.manifests import FragmentFile, FragmentManifest
|
5
5
|
from spiral.core.table.spec import Key
|
6
6
|
from spiral.types_ import Timestamp
|
7
7
|
|
8
8
|
|
9
|
-
def show_scan(scan:
|
9
|
+
def show_scan(scan: Scan):
|
10
10
|
"""Displays a scan in a way that is useful for debugging."""
|
11
11
|
table_ids = scan.table_ids()
|
12
12
|
if len(table_ids) > 1:
|
@@ -14,7 +14,7 @@ def show_scan(scan: TableScan):
|
|
14
14
|
table_id = table_ids[0]
|
15
15
|
column_groups = scan.column_groups()
|
16
16
|
|
17
|
-
splits = scan.
|
17
|
+
splits = scan.splits()
|
18
18
|
key_space_state = scan.key_space_state(table_id)
|
19
19
|
|
20
20
|
# Collect all key bounds from all manifests. This makes sure all visualizations are aligned.
|
spiral/expressions/base.py
CHANGED
@@ -132,7 +132,7 @@ class Expr:
|
|
132
132
|
packed = packed.select(exclude=exclude)
|
133
133
|
return packed
|
134
134
|
|
135
|
-
if not paths:
|
135
|
+
if not paths and not exclude:
|
136
136
|
return self
|
137
137
|
|
138
138
|
return se.select(self, names=list(paths), exclude=exclude)
|
@@ -145,5 +145,5 @@ class Expr:
|
|
145
145
|
return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
|
146
146
|
|
147
147
|
|
148
|
-
ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
|
149
|
-
ExprLike: TypeAlias = Expr | dict | ScalarLike
|
148
|
+
ScalarLike: TypeAlias = bool | int | float | str | list["ScalarLike"] | datetime.datetime | None
|
149
|
+
ExprLike: TypeAlias = Expr | dict["ExprLike", "ExprLike"] | ScalarLike
|
spiral/expressions/udf.py
CHANGED
@@ -38,7 +38,7 @@ class RefUDF(BaseUDF):
|
|
38
38
|
super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
|
39
39
|
|
40
40
|
@abc.abstractmethod
|
41
|
-
def invoke(self, fp
|
41
|
+
def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
|
42
42
|
"""Invoke the UDF with the given arguments.
|
43
43
|
|
44
44
|
NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
|
@@ -13,10 +13,8 @@ class Iceberg:
|
|
13
13
|
and ACID transactions to your warehouse.
|
14
14
|
"""
|
15
15
|
|
16
|
-
def __init__(self, spiral: "Spiral"
|
16
|
+
def __init__(self, spiral: "Spiral"):
|
17
17
|
self._spiral = spiral
|
18
|
-
self._project_id = project_id
|
19
|
-
|
20
18
|
self._api = self._spiral.config.api
|
21
19
|
|
22
20
|
def catalog(self) -> "Catalog":
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from spiral.core.client import KeySpaceIndex as CoreKeySpaceIndex
|
2
|
+
from spiral.expressions import Expr
|
3
|
+
from spiral.types_ import Timestamp
|
4
|
+
|
5
|
+
|
6
|
+
class KeySpaceIndex:
|
7
|
+
"""
|
8
|
+
KeysIndex represents an optionally materialized key space, defined by a projection and a filter over a table.
|
9
|
+
It can be used to efficiently and precisely shard the table for parallel processing or distributed training.
|
10
|
+
|
11
|
+
An index is defined by:
|
12
|
+
- A granularity that defines the target size of key ranges in the index.
|
13
|
+
IMPORTANT: Actual key ranges may be smaller, but will not exceed twice the granularity.
|
14
|
+
- A projection expression that defines which columns are included in the resulting key space.
|
15
|
+
- An optional filter expression that defines which rows are included in the index.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self, core: CoreKeySpaceIndex, *, name: str | None = None):
|
19
|
+
self.core = core
|
20
|
+
self._name = name
|
21
|
+
|
22
|
+
@property
|
23
|
+
def index_id(self) -> str:
|
24
|
+
return self.core.id
|
25
|
+
|
26
|
+
@property
|
27
|
+
def table_id(self) -> str:
|
28
|
+
return self.core.table_id
|
29
|
+
|
30
|
+
@property
|
31
|
+
def name(self) -> str:
|
32
|
+
return self._name or self.index_id
|
33
|
+
|
34
|
+
@property
|
35
|
+
def asof(self) -> Timestamp:
|
36
|
+
return self.core.asof
|
37
|
+
|
38
|
+
@property
|
39
|
+
def projection(self) -> Expr:
|
40
|
+
return Expr(self.core.projection)
|
41
|
+
|
42
|
+
@property
|
43
|
+
def filter(self) -> Expr | None:
|
44
|
+
return Expr(self.core.filter) if self.core.filter is not None else None
|