pyspiral 0.7.18__cp312-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. pyspiral-0.7.18.dist-info/METADATA +52 -0
  2. pyspiral-0.7.18.dist-info/RECORD +110 -0
  3. pyspiral-0.7.18.dist-info/WHEEL +4 -0
  4. pyspiral-0.7.18.dist-info/entry_points.txt +3 -0
  5. spiral/__init__.py +55 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +411 -0
  8. spiral/api/__init__.py +78 -0
  9. spiral/api/admin.py +15 -0
  10. spiral/api/client.py +164 -0
  11. spiral/api/filesystems.py +134 -0
  12. spiral/api/key_space_indexes.py +23 -0
  13. spiral/api/organizations.py +77 -0
  14. spiral/api/projects.py +219 -0
  15. spiral/api/telemetry.py +19 -0
  16. spiral/api/text_indexes.py +56 -0
  17. spiral/api/types.py +23 -0
  18. spiral/api/workers.py +40 -0
  19. spiral/api/workloads.py +52 -0
  20. spiral/arrow_.py +216 -0
  21. spiral/cli/__init__.py +88 -0
  22. spiral/cli/__main__.py +4 -0
  23. spiral/cli/admin.py +14 -0
  24. spiral/cli/app.py +108 -0
  25. spiral/cli/console.py +95 -0
  26. spiral/cli/fs.py +76 -0
  27. spiral/cli/iceberg.py +97 -0
  28. spiral/cli/key_spaces.py +103 -0
  29. spiral/cli/login.py +25 -0
  30. spiral/cli/orgs.py +90 -0
  31. spiral/cli/printer.py +53 -0
  32. spiral/cli/projects.py +147 -0
  33. spiral/cli/state.py +7 -0
  34. spiral/cli/tables.py +197 -0
  35. spiral/cli/telemetry.py +17 -0
  36. spiral/cli/text.py +115 -0
  37. spiral/cli/types.py +50 -0
  38. spiral/cli/workloads.py +58 -0
  39. spiral/client.py +256 -0
  40. spiral/core/__init__.pyi +0 -0
  41. spiral/core/_tools/__init__.pyi +5 -0
  42. spiral/core/authn/__init__.pyi +21 -0
  43. spiral/core/client/__init__.pyi +285 -0
  44. spiral/core/config/__init__.pyi +35 -0
  45. spiral/core/expr/__init__.pyi +15 -0
  46. spiral/core/expr/images/__init__.pyi +3 -0
  47. spiral/core/expr/list_/__init__.pyi +4 -0
  48. spiral/core/expr/refs/__init__.pyi +4 -0
  49. spiral/core/expr/str_/__init__.pyi +3 -0
  50. spiral/core/expr/struct_/__init__.pyi +6 -0
  51. spiral/core/expr/text/__init__.pyi +5 -0
  52. spiral/core/expr/udf/__init__.pyi +14 -0
  53. spiral/core/expr/video/__init__.pyi +3 -0
  54. spiral/core/table/__init__.pyi +141 -0
  55. spiral/core/table/manifests/__init__.pyi +35 -0
  56. spiral/core/table/metastore/__init__.pyi +58 -0
  57. spiral/core/table/spec/__init__.pyi +215 -0
  58. spiral/dataloader.py +299 -0
  59. spiral/dataset.py +264 -0
  60. spiral/datetime_.py +27 -0
  61. spiral/debug/__init__.py +0 -0
  62. spiral/debug/manifests.py +87 -0
  63. spiral/debug/metrics.py +56 -0
  64. spiral/debug/scan.py +266 -0
  65. spiral/enrichment.py +306 -0
  66. spiral/expressions/__init__.py +274 -0
  67. spiral/expressions/base.py +167 -0
  68. spiral/expressions/file.py +17 -0
  69. spiral/expressions/http.py +17 -0
  70. spiral/expressions/list_.py +68 -0
  71. spiral/expressions/s3.py +16 -0
  72. spiral/expressions/str_.py +39 -0
  73. spiral/expressions/struct.py +59 -0
  74. spiral/expressions/text.py +62 -0
  75. spiral/expressions/tiff.py +222 -0
  76. spiral/expressions/udf.py +60 -0
  77. spiral/grpc_.py +32 -0
  78. spiral/iceberg.py +31 -0
  79. spiral/iterable_dataset.py +106 -0
  80. spiral/key_space_index.py +44 -0
  81. spiral/project.py +227 -0
  82. spiral/protogen/_/__init__.py +0 -0
  83. spiral/protogen/_/arrow/__init__.py +0 -0
  84. spiral/protogen/_/arrow/flight/__init__.py +0 -0
  85. spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
  86. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +2548 -0
  87. spiral/protogen/_/google/__init__.py +0 -0
  88. spiral/protogen/_/google/protobuf/__init__.py +2310 -0
  89. spiral/protogen/_/message_pool.py +3 -0
  90. spiral/protogen/_/py.typed +0 -0
  91. spiral/protogen/_/scandal/__init__.py +190 -0
  92. spiral/protogen/_/spfs/__init__.py +72 -0
  93. spiral/protogen/_/spql/__init__.py +61 -0
  94. spiral/protogen/_/substrait/__init__.py +6196 -0
  95. spiral/protogen/_/substrait/extensions/__init__.py +169 -0
  96. spiral/protogen/__init__.py +0 -0
  97. spiral/protogen/util.py +41 -0
  98. spiral/py.typed +0 -0
  99. spiral/scan.py +363 -0
  100. spiral/server.py +17 -0
  101. spiral/settings.py +36 -0
  102. spiral/snapshot.py +56 -0
  103. spiral/streaming_/__init__.py +3 -0
  104. spiral/streaming_/reader.py +133 -0
  105. spiral/streaming_/stream.py +157 -0
  106. spiral/substrait_.py +274 -0
  107. spiral/table.py +224 -0
  108. spiral/text_index.py +17 -0
  109. spiral/transaction.py +155 -0
  110. spiral/types_.py +6 -0
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def substr(expr: Expr, begin: int, end: int | None) -> Expr: ...
@@ -0,0 +1,6 @@
1
+ from .. import Expr
2
+
3
+ def getitem(expr: Expr, item: str) -> Expr: ...
4
+ def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
5
+ def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
6
+ def merge(names: list[Expr]) -> Expr: ...
@@ -0,0 +1,5 @@
1
+ from .. import Expr
2
+
3
+ def field(expr: Expr, tokeneizer: str | None) -> Expr: ...
4
+ def find(expr: Expr, term: str) -> Expr: ...
5
+ def boost(expr: Expr, factor: float) -> Expr: ...
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+
3
+ from pyarrow import Array, DataType, Scalar
4
+
5
+ from .. import Expr
6
+
7
+ class UDF:
8
+ def __call__(self, args: list[Expr]) -> Expr: ...
9
+
10
+ def create(
11
+ name: str,
12
+ return_type: Callable[[tuple[DataType, ...]], DataType],
13
+ invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
14
+ ) -> UDF: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def read(expr: Expr, ranges: Expr, crops: Expr, format: str) -> Expr: ...
@@ -0,0 +1,141 @@
1
+ from typing import Any
2
+
3
+ import pyarrow as pa
4
+ from spiral.core.client import Shard, ShuffleConfig
5
+
6
+ from .manifests import FragmentManifest
7
+ from .metastore import PyMetastore
8
+ from .spec import ColumnGroup, Key, Operation, Schema, WriteAheadLog
9
+
10
+ class KeyRange:
11
+ """A right-exclusive range of keys."""
12
+
13
+ def __init__(self, *, begin: Key, end: Key): ...
14
+
15
+ begin: Key
16
+ end: Key
17
+
18
+ def union(self, other: KeyRange) -> KeyRange: ...
19
+ def __or__(self, other: KeyRange) -> KeyRange: ...
20
+ def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
21
+ def __and__(self, other: KeyRange) -> KeyRange | None: ...
22
+ def contains(self, item: Key) -> bool: ...
23
+ def __contains__(self, item: Key) -> bool: ...
24
+ def is_disjoint(self, key_range: KeyRange) -> bool:
25
+ return self.end <= key_range.begin or self.begin >= key_range.end
26
+
27
+ @staticmethod
28
+ def beginning_with(begin: Key) -> KeyRange: ...
29
+ @staticmethod
30
+ def ending_with(end: Key) -> KeyRange: ...
31
+ @staticmethod
32
+ def full() -> KeyRange: ...
33
+ def __reduce__(self) -> tuple[type[KeyRange], tuple[Key, Key]]: ...
34
+
35
+ class Table:
36
+ def __init__(self, metastore: PyMetastore): ...
37
+
38
+ id: str
39
+ root_uri: str
40
+ mount_id: str | None
41
+ key_schema: Schema
42
+ metastore: PyMetastore
43
+
44
+ def get_wal(self, *, asof: int | None) -> WriteAheadLog: ...
45
+ def get_schema(self, *, asof: int | None) -> Schema: ...
46
+ def get_snapshot(self, *, asof: int | None) -> Snapshot: ...
47
+
48
+ class Snapshot:
49
+ """A snapshot of a table at a specific point in time."""
50
+
51
+ asof: int
52
+ table: Table
53
+ wal: WriteAheadLog
54
+
55
+ class ScanState:
56
+ def to_json(self) -> str: ...
57
+ @staticmethod
58
+ def from_json(json: str) -> ScanState: ...
59
+
60
+ class MaterializablePlan:
61
+ pass
62
+
63
+ class EvaluatedExecutablePlan:
64
+ pass
65
+
66
+ class EvaluatedPlanStream:
67
+ def __next__(self) -> EvaluatedExecutablePlan: ...
68
+ def __iter__(self) -> EvaluatedPlanStream: ...
69
+
70
+ class Scan:
71
+ def key_schema(self) -> Schema: ...
72
+ def schema(self) -> Schema: ...
73
+ def is_empty(self) -> bool: ...
74
+ def splits(self) -> list[KeyRange]: ...
75
+ def shards(self) -> list[Shard]: ...
76
+ def table_ids(self) -> list[str]: ...
77
+ def column_groups(self) -> list[ColumnGroup]: ...
78
+ def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
79
+ def key_space_state(self, table_id: str) -> KeySpaceState: ...
80
+ def plan_state(self) -> ScanState: ...
81
+ def materializable_plan(self) -> MaterializablePlan: ...
82
+ def to_record_batches(
83
+ self,
84
+ key_range: KeyRange | None = None,
85
+ key_table: pa.Table | pa.RecordBatch | None = None,
86
+ batch_readahead: int | None = None,
87
+ progress: bool = True,
88
+ ) -> pa.RecordBatchReader: ...
89
+ def to_unordered_record_batches(
90
+ self,
91
+ key_table: pa.Table | pa.RecordBatch | None = None,
92
+ batch_readahead: int | None = None,
93
+ progress: bool = True,
94
+ ) -> pa.RecordBatchReader: ...
95
+ def to_shuffled_record_batches(
96
+ self,
97
+ shards: list[Shard] | None = None,
98
+ shuffle: ShuffleConfig | None = None,
99
+ max_batch_size: int | None = None,
100
+ batch_readahead: int | None = None,
101
+ infinite: bool = False,
102
+ ) -> pa.RecordBatchReader:
103
+ # If `infinite` is True, shards are shuffled after exhausted but not before the first pass.
104
+ # Otherwise, shards are not shuffle and shuffle config is only used for shuffle buffer.
105
+ ...
106
+
107
+ def evaluate_analyze(
108
+ self, key_table: pa.Table | pa.RecordBatch | None = None, batch_readahead: int | None = None
109
+ ) -> EvaluatedPlanStream: ...
110
+ def metrics(self) -> dict[str, Any]: ...
111
+
112
+ class KeySpaceState:
113
+ manifest: FragmentManifest
114
+
115
+ def key_schema(self) -> Schema: ...
116
+
117
+ class ColumnGroupState:
118
+ manifest: FragmentManifest
119
+ column_group: ColumnGroup
120
+
121
+ def schema(self) -> Schema: ...
122
+
123
+ class Transaction:
124
+ status: str
125
+
126
+ def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
127
+ def writeback(
128
+ self,
129
+ scan: Scan,
130
+ *,
131
+ key_range: KeyRange | None = None,
132
+ partition_size_bytes: int | None = None,
133
+ batch_readahead: int | None = None,
134
+ ): ...
135
+ def drop_columns(self, column_paths: list[str]): ...
136
+ def ops(self) -> list[Operation]: ...
137
+ def take(self) -> list[Operation]: ...
138
+ def include(self, ops: list[Operation]): ...
139
+ def commit(self, *, compact: bool = False, manifest_rows: int | None = None): ...
140
+ def abort(self): ...
141
+ def is_empty(self) -> bool: ...
@@ -0,0 +1,35 @@
1
+ import pyarrow as pa
2
+ from spiral.core.table import KeyRange
3
+ from spiral.core.table.spec import FileFormat, FragmentLevel, KeyExtent, KeySpan
4
+ from spiral.types_ import Timestamp
5
+
6
+ class FragmentManifest:
7
+ def __len__(self): ...
8
+ def __getitem__(self, idx: int): ...
9
+ def to_arrow(self) -> pa.RecordBatchReader: ...
10
+ @staticmethod
11
+ def compute_schema() -> pa.Schema: ...
12
+ @staticmethod
13
+ def from_fragment(fragment_file: FragmentFile) -> FragmentManifest: ...
14
+ @staticmethod
15
+ def from_arrow(reader: pa.RecordBatchReader) -> FragmentManifest: ...
16
+ @staticmethod
17
+ def empty() -> FragmentManifest: ...
18
+
19
+ class FragmentFile:
20
+ id: str
21
+ committed_at: Timestamp | None
22
+ compacted_at: Timestamp | None
23
+ format: FileFormat
24
+ format_metadata: bytes | None
25
+ size_bytes: int
26
+ column_ids: list[str]
27
+ level: FragmentLevel
28
+ # NOTE: Empty for key space files.
29
+ column_ids: list[str]
30
+ ks_id: str
31
+ key_span: KeySpan
32
+ key_extent: KeyExtent
33
+
34
+ @property
35
+ def key_range(self) -> KeyRange: ...
@@ -0,0 +1,58 @@
1
+ """The SpiralDB metastore API."""
2
+
3
+ from spiral.core.client import Authn
4
+ from spiral.core.table.spec import ColumnGroup, ColumnGroupMetadata, FileFormat, KeySpaceMetadata, Schema, WriteAheadLog
5
+ from spiral.types_ import Uri
6
+
7
+ class FileHandle:
8
+ def __init__(self, *, uri: str, format: FileFormat, spfs_token: str | None): ...
9
+
10
+ uri: str
11
+ format: FileFormat
12
+ spfs_token: str | None
13
+
14
+ class FileRef:
15
+ def __init__(self, *, id: str, file_type: FileType, file_format: FileFormat): ...
16
+
17
+ id: str
18
+ file_type: FileType
19
+ file_format: FileFormat
20
+
21
+ def resolve_uri(self, root_uri: str) -> str:
22
+ """Resolves the file reference URI given the root URI."""
23
+
24
+ class FileType:
25
+ FragmentFile: FileType
26
+ FragmentManifest: FileType
27
+ ReferenceFile: FileType
28
+
29
+ def __int__(self) -> int:
30
+ """Returns the protobuf enum int value."""
31
+
32
+ class PyMetastore:
33
+ """Rust implementation of the metastore API."""
34
+
35
+ @property
36
+ def table_id(self) -> str: ...
37
+ @property
38
+ def root_uri(self) -> Uri: ...
39
+ @property
40
+ def key_schema(self) -> Schema: ...
41
+ def get_wal(self) -> WriteAheadLog:
42
+ """Return the log for the table."""
43
+ def get_key_space_metadata(self) -> KeySpaceMetadata:
44
+ """Return the metadata for the key space."""
45
+ ...
46
+ def get_column_group_metadata(self, column_group: ColumnGroup) -> ColumnGroupMetadata:
47
+ """Return the metadata for a column group."""
48
+ ...
49
+
50
+ @staticmethod
51
+ def http(
52
+ table_id: str,
53
+ root_uri: str,
54
+ key_schema: Schema,
55
+ base_url: str,
56
+ authn: Authn,
57
+ ) -> PyMetastore:
58
+ """Construct a PyMetastore backed by an HTTP metastore service."""
@@ -0,0 +1,215 @@
1
+ """Type definitions for the spiral.core.spec module shipped as part of the native library."""
2
+
3
+ import pyarrow as pa
4
+
5
+ class ColumnGroup:
6
+ def __init__(self, path: list[str]): ...
7
+ @property
8
+ def table_id(self) -> str: ...
9
+ @property
10
+ def path(self) -> list[str]: ...
11
+ def identifier(self, salt: int) -> str:
12
+ """Return the column group identifier based on the given salt."""
13
+
14
+ @staticmethod
15
+ def from_str(path: str) -> ColumnGroup: ...
16
+
17
+ class KeySpaceMetadata:
18
+ def __init__(
19
+ self,
20
+ *,
21
+ manifest_handle: ManifestHandle | None,
22
+ last_modified_at: int,
23
+ ): ...
24
+
25
+ manifest_handle: ManifestHandle | None
26
+ last_modified_at: int
27
+
28
+ def asof(self, asof: int) -> KeySpaceMetadata:
29
+ """Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
30
+ ...
31
+
32
+ def apply_wal(self, wal: WriteAheadLog) -> KeySpaceMetadata:
33
+ """Applies the given WAL to the metadata."""
34
+
35
+ class ColumnGroupMetadata:
36
+ def __init__(
37
+ self,
38
+ *,
39
+ column_group: ColumnGroup,
40
+ manifest_handle: ManifestHandle | None,
41
+ last_modified_at: int,
42
+ schema_versions: list[VersionedSchema] | None,
43
+ immutable_schema: bool,
44
+ schema_salt: int,
45
+ ): ...
46
+
47
+ column_group: ColumnGroup
48
+ manifest_handle: ManifestHandle | None
49
+ last_modified_at: int
50
+ schema_versions: list[VersionedSchema]
51
+ immutable_schema: bool
52
+ schema_salt: int
53
+
54
+ def latest_schema(self) -> VersionedSchema:
55
+ """Returns the latest schema of the column group."""
56
+ ...
57
+
58
+ def asof(self, asof: int) -> ColumnGroupMetadata:
59
+ """Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
60
+ ...
61
+
62
+ def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
63
+ """Applies the given WAL to the metadata."""
64
+
65
+ class Operation:
66
+ # Base class for all operations in the WAL.
67
+ def to_json(self) -> str: ...
68
+ @staticmethod
69
+ def from_json(json: str) -> Operation: ...
70
+
71
+ class LogEntry:
72
+ ts: int
73
+ operation: (
74
+ KeySpaceWriteOp
75
+ | ColumnGroupWriteOp
76
+ | SchemaEvolutionOp
77
+ | SchemaBreakOp
78
+ | KeySpaceCompactOp
79
+ | ColumnGroupCompactOp
80
+ )
81
+
82
+ def column_group(self) -> ColumnGroup | None:
83
+ """Returns the column group of the entry if it is associated with one."""
84
+
85
+ class FileFormat:
86
+ def __init__(self, value: int): ...
87
+
88
+ Parquet: FileFormat
89
+ Protobuf: FileFormat
90
+ BinaryArray: FileFormat
91
+ Vortex: FileFormat
92
+
93
+ def __int__(self) -> int:
94
+ """Returns the protobuf enum int value."""
95
+ ...
96
+
97
+ def __str__(self) -> str:
98
+ """Returns the string representation of the file format."""
99
+ ...
100
+
101
+ class FragmentLevel:
102
+ L0: FragmentLevel
103
+ L1: FragmentLevel
104
+
105
+ def __int__(self) -> int:
106
+ """Returns the protobuf enum int value."""
107
+ ...
108
+
109
+ class Key:
110
+ def __init__(self, key: bytes): ...
111
+ def __bytes__(self): ...
112
+ def step(self) -> Key:
113
+ """Returns the next key in the key space."""
114
+
115
+ @staticmethod
116
+ def min() -> Key: ...
117
+ @staticmethod
118
+ def max() -> Key: ...
119
+ def __reduce__(self) -> tuple[type[Key], tuple[bytes]]: ...
120
+
121
+ class KeyExtent:
122
+ """An inclusive range of keys."""
123
+
124
+ def __init__(self, *, min: Key, max: Key): ...
125
+
126
+ min: Key
127
+ max: Key
128
+
129
+ def union(self, key_extent: KeyExtent) -> KeyExtent: ...
130
+ def __or__(self, other: KeyExtent) -> KeyExtent: ...
131
+ def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
132
+ def __and__(self, other: KeyExtent) -> KeyExtent | None: ...
133
+ def contains(self, item: Key) -> bool: ...
134
+ def __contains__(self, item: Key) -> bool: ...
135
+
136
+ class KeySpan:
137
+ """An exclusive range of keys as indexed by their position in a key space."""
138
+
139
+ def __init__(self, *, begin: int, end: int): ...
140
+
141
+ begin: int
142
+ end: int
143
+
144
+ def __len__(self) -> int: ...
145
+ def shift(self, offset: int) -> KeySpan: ...
146
+ def union(self, other: KeySpan) -> KeySpan: ...
147
+ def __or__(self, other: KeySpan) -> KeySpan: ...
148
+
149
+ class ManifestHandle:
150
+ id: str
151
+ format: FileFormat
152
+ file_size: int
153
+
154
+ class Schema:
155
+ def to_arrow(self) -> pa.Schema:
156
+ """Returns the Arrow schema."""
157
+ ...
158
+ @staticmethod
159
+ def from_arrow(arrow: pa.Schema) -> Schema:
160
+ """Creates a Schema from an Arrow schema."""
161
+ ...
162
+ def __len__(self):
163
+ """Returns the number of columns in the schema."""
164
+ ...
165
+ @property
166
+ def names(self) -> list[str]:
167
+ """Returns the names of the columns in the schema."""
168
+ ...
169
+
170
+ class VersionedSchema:
171
+ ts: int
172
+ schema: Schema
173
+ column_ids: list[str]
174
+
175
+ class KeySpaceWriteOp:
176
+ ks_id: str
177
+ manifest_handle: ManifestHandle
178
+
179
+ class ColumnGroupWriteOp:
180
+ column_group: ColumnGroup
181
+ manifest_handle: ManifestHandle
182
+
183
+ class SchemaEvolutionOp:
184
+ column_group: ColumnGroup
185
+
186
+ class SchemaBreakOp:
187
+ column_group: ColumnGroup
188
+
189
+ class KeySpaceCompactOp:
190
+ ks_ids: list[str]
191
+ moved_ks_ids: list[str]
192
+
193
+ class ColumnGroupCompactOp:
194
+ column_group: ColumnGroup
195
+ fragment_ids: list[int]
196
+
197
+ class WriteAheadLog:
198
+ def __init__(
199
+ self,
200
+ *,
201
+ entries: list[LogEntry] | None = None,
202
+ truncated_up_to: int = 0,
203
+ ): ...
204
+
205
+ entries: list[LogEntry]
206
+ truncated_up_to: int
207
+
208
+ @property
209
+ def last_modified_at(self) -> int:
210
+ """Returns the timestamp of the last modification of the log."""
211
+
212
+ def filter(
213
+ self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
214
+ ) -> WriteAheadLog:
215
+ """Filters the WAL to entries by the given parameters."""