pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. pyspiral-0.1.0.dist-info/METADATA +48 -0
  2. pyspiral-0.1.0.dist-info/RECORD +81 -0
  3. pyspiral-0.1.0.dist-info/WHEEL +4 -0
  4. pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
  5. spiral/__init__.py +11 -0
  6. spiral/_lib.abi3.so +0 -0
  7. spiral/adbc.py +386 -0
  8. spiral/api/__init__.py +221 -0
  9. spiral/api/admin.py +29 -0
  10. spiral/api/filesystems.py +125 -0
  11. spiral/api/organizations.py +90 -0
  12. spiral/api/projects.py +160 -0
  13. spiral/api/tables.py +94 -0
  14. spiral/api/tokens.py +56 -0
  15. spiral/api/workloads.py +45 -0
  16. spiral/arrow.py +209 -0
  17. spiral/authn/__init__.py +0 -0
  18. spiral/authn/authn.py +89 -0
  19. spiral/authn/device.py +206 -0
  20. spiral/authn/github_.py +33 -0
  21. spiral/authn/modal_.py +18 -0
  22. spiral/catalog.py +78 -0
  23. spiral/cli/__init__.py +82 -0
  24. spiral/cli/__main__.py +4 -0
  25. spiral/cli/admin.py +21 -0
  26. spiral/cli/app.py +48 -0
  27. spiral/cli/console.py +95 -0
  28. spiral/cli/fs.py +47 -0
  29. spiral/cli/login.py +13 -0
  30. spiral/cli/org.py +90 -0
  31. spiral/cli/printer.py +45 -0
  32. spiral/cli/project.py +107 -0
  33. spiral/cli/state.py +3 -0
  34. spiral/cli/table.py +20 -0
  35. spiral/cli/token.py +27 -0
  36. spiral/cli/types.py +53 -0
  37. spiral/cli/workload.py +59 -0
  38. spiral/config.py +26 -0
  39. spiral/core/__init__.py +0 -0
  40. spiral/core/core/__init__.pyi +53 -0
  41. spiral/core/manifests/__init__.pyi +53 -0
  42. spiral/core/metastore/__init__.pyi +91 -0
  43. spiral/core/spec/__init__.pyi +257 -0
  44. spiral/dataset.py +239 -0
  45. spiral/debug.py +251 -0
  46. spiral/expressions/__init__.py +222 -0
  47. spiral/expressions/base.py +149 -0
  48. spiral/expressions/http.py +86 -0
  49. spiral/expressions/io.py +100 -0
  50. spiral/expressions/list_.py +68 -0
  51. spiral/expressions/refs.py +44 -0
  52. spiral/expressions/str_.py +39 -0
  53. spiral/expressions/struct.py +57 -0
  54. spiral/expressions/tiff.py +223 -0
  55. spiral/expressions/udf.py +46 -0
  56. spiral/grpc_.py +32 -0
  57. spiral/project.py +137 -0
  58. spiral/proto/_/__init__.py +0 -0
  59. spiral/proto/_/arrow/__init__.py +0 -0
  60. spiral/proto/_/arrow/flight/__init__.py +0 -0
  61. spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
  62. spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
  63. spiral/proto/_/scandal/__init__.py +223 -0
  64. spiral/proto/_/spfs/__init__.py +36 -0
  65. spiral/proto/_/spiral/__init__.py +0 -0
  66. spiral/proto/_/spiral/table/__init__.py +225 -0
  67. spiral/proto/_/spiraldb/__init__.py +0 -0
  68. spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
  69. spiral/proto/__init__.py +0 -0
  70. spiral/proto/scandal/__init__.py +45 -0
  71. spiral/proto/spiral/__init__.py +0 -0
  72. spiral/proto/spiral/table/__init__.py +96 -0
  73. spiral/proto/substrait/__init__.py +3399 -0
  74. spiral/proto/substrait/extensions/__init__.py +115 -0
  75. spiral/proto/util.py +41 -0
  76. spiral/py.typed +0 -0
  77. spiral/scan_.py +168 -0
  78. spiral/settings.py +157 -0
  79. spiral/substrait_.py +275 -0
  80. spiral/table.py +157 -0
  81. spiral/types_.py +6 -0
@@ -0,0 +1,223 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # sources: scandal/scandal.proto
3
+ # plugin: python-betterproto
4
+ # This file has been @generated
5
+
6
+ from dataclasses import dataclass
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Dict,
10
+ List,
11
+ Optional,
12
+ )
13
+
14
+ import betterproto
15
+ import grpclib
16
+ from betterproto.grpc.grpclib_server import ServiceBase
17
+
18
+
19
+ if TYPE_CHECKING:
20
+ import grpclib.server
21
+ from betterproto.grpc.grpclib_client import MetadataLike
22
+ from grpclib.metadata import Deadline
23
+
24
+
25
+ @dataclass(eq=False, repr=False)
26
+ class Source(betterproto.Message):
27
+ url: str = betterproto.string_field(1)
28
+ content_type: Optional[str] = betterproto.string_field(2, optional=True)
29
+ size: Optional[int] = betterproto.int64_field(3, optional=True)
30
+ parquet: "MetadataParquet" = betterproto.message_field(10, group="metadata")
31
+
32
+
33
+ @dataclass(eq=False, repr=False)
34
+ class Fetch(betterproto.Message):
35
+ """Let's make "fetch" happen."""
36
+
37
+ pass
38
+
39
+
40
+ @dataclass(eq=False, repr=False)
41
+ class FetchRequest(betterproto.Message):
42
+ uri: str = betterproto.string_field(1)
43
+ """
44
+ A signed request to read an spfs://<fsid>/path?token=<jwt> URI.
45
+ * Declares the MIME types the client can read directly.
46
+ * Declares whether the client has connectivity to the FileSystem.
47
+ """
48
+
49
+ connectivity: "Connectivity" = betterproto.message_field(2)
50
+ accepts: List[str] = betterproto.string_field(3)
51
+
52
+
53
+ @dataclass(eq=False, repr=False)
54
+ class FetchResponse(betterproto.Message):
55
+ sources: List["Source"] = betterproto.message_field(1)
56
+ """
57
+ The sources the client should use to attempt to read the file.
58
+ The client should try each source in order until it finds one that works.
59
+ """
60
+
61
+
62
+ @dataclass(eq=False, repr=False)
63
+ class Sink(betterproto.Message):
64
+ url: str = betterproto.string_field(1)
65
+
66
+
67
+ @dataclass(eq=False, repr=False)
68
+ class Put(betterproto.Message):
69
+ pass
70
+
71
+
72
+ @dataclass(eq=False, repr=False)
73
+ class PutRequest(betterproto.Message):
74
+ uri: str = betterproto.string_field(1)
75
+ connectivity: "Connectivity" = betterproto.message_field(2)
76
+
77
+
78
+ @dataclass(eq=False, repr=False)
79
+ class PutResponse(betterproto.Message):
80
+ sinks: List["Sink"] = betterproto.message_field(1)
81
+
82
+
83
+ @dataclass(eq=False, repr=False)
84
+ class Delete(betterproto.Message):
85
+ pass
86
+
87
+
88
+ @dataclass(eq=False, repr=False)
89
+ class DeleteRequest(betterproto.Message):
90
+ uri: str = betterproto.string_field(1)
91
+
92
+
93
+ @dataclass(eq=False, repr=False)
94
+ class DeleteResponse(betterproto.Message):
95
+ url: str = betterproto.string_field(1)
96
+ """Returns signed URL to delete the resource."""
97
+
98
+
99
+ @dataclass(eq=False, repr=False)
100
+ class Connectivity(betterproto.Message):
101
+ """
102
+ Information about the client's perceived connectivity to a FileSystem.
103
+ """
104
+
105
+ unreachable: bool = betterproto.bool_field(1)
106
+ round_trip_time_us: Optional[int] = betterproto.int32_field(2, optional=True)
107
+
108
+
109
+ @dataclass(eq=False, repr=False)
110
+ class Metadata(betterproto.Message):
111
+ pass
112
+
113
+
114
+ @dataclass(eq=False, repr=False)
115
+ class MetadataParquet(betterproto.Message):
116
+ pass
117
+
118
+
119
+ class ScandalServiceStub(betterproto.ServiceStub):
120
+ async def fetch(
121
+ self,
122
+ fetch_request: "FetchRequest",
123
+ *,
124
+ timeout: Optional[float] = None,
125
+ deadline: Optional["Deadline"] = None,
126
+ metadata: Optional["MetadataLike"] = None
127
+ ) -> "FetchResponse":
128
+ return await self._unary_unary(
129
+ "/scandal.ScandalService/Fetch",
130
+ fetch_request,
131
+ FetchResponse,
132
+ timeout=timeout,
133
+ deadline=deadline,
134
+ metadata=metadata,
135
+ )
136
+
137
+ async def put(
138
+ self,
139
+ put_request: "PutRequest",
140
+ *,
141
+ timeout: Optional[float] = None,
142
+ deadline: Optional["Deadline"] = None,
143
+ metadata: Optional["MetadataLike"] = None
144
+ ) -> "PutResponse":
145
+ return await self._unary_unary(
146
+ "/scandal.ScandalService/Put",
147
+ put_request,
148
+ PutResponse,
149
+ timeout=timeout,
150
+ deadline=deadline,
151
+ metadata=metadata,
152
+ )
153
+
154
+ async def delete(
155
+ self,
156
+ delete_request: "DeleteRequest",
157
+ *,
158
+ timeout: Optional[float] = None,
159
+ deadline: Optional["Deadline"] = None,
160
+ metadata: Optional["MetadataLike"] = None
161
+ ) -> "DeleteResponse":
162
+ return await self._unary_unary(
163
+ "/scandal.ScandalService/Delete",
164
+ delete_request,
165
+ DeleteResponse,
166
+ timeout=timeout,
167
+ deadline=deadline,
168
+ metadata=metadata,
169
+ )
170
+
171
+
172
+ class ScandalServiceBase(ServiceBase):
173
+ async def fetch(self, fetch_request: "FetchRequest") -> "FetchResponse":
174
+ raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED)
175
+
176
+ async def put(self, put_request: "PutRequest") -> "PutResponse":
177
+ raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED)
178
+
179
+ async def delete(self, delete_request: "DeleteRequest") -> "DeleteResponse":
180
+ raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED)
181
+
182
+ async def __rpc_fetch(
183
+ self, stream: "grpclib.server.Stream[FetchRequest, FetchResponse]"
184
+ ) -> None:
185
+ request = await stream.recv_message()
186
+ response = await self.fetch(request)
187
+ await stream.send_message(response)
188
+
189
+ async def __rpc_put(
190
+ self, stream: "grpclib.server.Stream[PutRequest, PutResponse]"
191
+ ) -> None:
192
+ request = await stream.recv_message()
193
+ response = await self.put(request)
194
+ await stream.send_message(response)
195
+
196
+ async def __rpc_delete(
197
+ self, stream: "grpclib.server.Stream[DeleteRequest, DeleteResponse]"
198
+ ) -> None:
199
+ request = await stream.recv_message()
200
+ response = await self.delete(request)
201
+ await stream.send_message(response)
202
+
203
+ def __mapping__(self) -> Dict[str, grpclib.const.Handler]:
204
+ return {
205
+ "/scandal.ScandalService/Fetch": grpclib.const.Handler(
206
+ self.__rpc_fetch,
207
+ grpclib.const.Cardinality.UNARY_UNARY,
208
+ FetchRequest,
209
+ FetchResponse,
210
+ ),
211
+ "/scandal.ScandalService/Put": grpclib.const.Handler(
212
+ self.__rpc_put,
213
+ grpclib.const.Cardinality.UNARY_UNARY,
214
+ PutRequest,
215
+ PutResponse,
216
+ ),
217
+ "/scandal.ScandalService/Delete": grpclib.const.Handler(
218
+ self.__rpc_delete,
219
+ grpclib.const.Cardinality.UNARY_UNARY,
220
+ DeleteRequest,
221
+ DeleteResponse,
222
+ ),
223
+ }
@@ -0,0 +1,36 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # sources: spfs/spfs.proto
3
+ # plugin: python-betterproto
4
+ # This file has been @generated
5
+
6
+ from dataclasses import dataclass
7
+
8
+ import betterproto
9
+
10
+
11
+ @dataclass(eq=False, repr=False)
12
+ class FileMetadata(betterproto.Message):
13
+ protobuf: "ProtobufFileSpecificMetadata" = betterproto.message_field(
14
+ 1, group="format_specific"
15
+ )
16
+ parquet: "ParquetFileSpecificMetadata" = betterproto.message_field(
17
+ 2, group="format_specific"
18
+ )
19
+ vortex: "VortexFileSpecificMetadata" = betterproto.message_field(
20
+ 3, group="format_specific"
21
+ )
22
+
23
+
24
+ @dataclass(eq=False, repr=False)
25
+ class ProtobufFileSpecificMetadata(betterproto.Message):
26
+ pass
27
+
28
+
29
+ @dataclass(eq=False, repr=False)
30
+ class ParquetFileSpecificMetadata(betterproto.Message):
31
+ metadata_size_bytes: int = betterproto.uint32_field(1)
32
+
33
+
34
+ @dataclass(eq=False, repr=False)
35
+ class VortexFileSpecificMetadata(betterproto.Message):
36
+ metadata_size_bytes: int = betterproto.uint32_field(1)
File without changes
@@ -0,0 +1,225 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # sources: spiral/table/common.proto, spiral/table/metadata.proto, spiral/table/statistics.proto, spiral/table/wal.proto
3
+ # plugin: python-betterproto
4
+ # This file has been @generated
5
+
6
+ from dataclasses import dataclass
7
+ from typing import (
8
+ List,
9
+ Optional,
10
+ )
11
+
12
+ import betterproto
13
+
14
+
15
+ class FileFormat(betterproto.Enum):
16
+ UNSPECIFIED = 0
17
+ PARQUET = 1
18
+ PROTOBUF = 2
19
+ BINARY_ARRAY = 3
20
+ VORTEX = 4
21
+
22
+
23
+ class Level(betterproto.Enum):
24
+ UNSPECIFIED = 0
25
+ L0 = 1
26
+ L1 = 2
27
+
28
+
29
+ @dataclass(eq=False, repr=False)
30
+ class ApproximateSetMembership(betterproto.Message):
31
+ bloom_filter: "BloomFilter" = betterproto.message_field(
32
+ 2, group="membership_strategy"
33
+ )
34
+
35
+
36
+ @dataclass(eq=False, repr=False)
37
+ class BloomFilter(betterproto.Message):
38
+ bit_vec: bytes = betterproto.bytes_field(1)
39
+ bitmap_bits: int = betterproto.uint64_field(2)
40
+ k_num: int = betterproto.uint32_field(3)
41
+
42
+
43
+ @dataclass(eq=False, repr=False)
44
+ class Schema(betterproto.Message):
45
+ arrow: bytes = betterproto.bytes_field(1)
46
+
47
+
48
+ @dataclass(eq=False, repr=False)
49
+ class ColumnGroup(betterproto.Message):
50
+ parts: List[str] = betterproto.string_field(1)
51
+
52
+
53
+ @dataclass(eq=False, repr=False)
54
+ class KeySpan(betterproto.Message):
55
+ """/ Span of keys defined as indices into a key space (exclusive end)."""
56
+
57
+ begin: int = betterproto.uint64_field(1)
58
+ end: int = betterproto.uint64_field(2)
59
+
60
+
61
+ @dataclass(eq=False, repr=False)
62
+ class KeyExtent(betterproto.Message):
63
+ """/ Extent of keys in terms of min and max (an inclusive range)."""
64
+
65
+ min: bytes = betterproto.bytes_field(1)
66
+ max: bytes = betterproto.bytes_field(2)
67
+
68
+
69
+ @dataclass(eq=False, repr=False)
70
+ class KeyMap(betterproto.Message):
71
+ """/ Displacement map."""
72
+
73
+ bitmap: bytes = betterproto.bytes_field(1)
74
+
75
+
76
+ @dataclass(eq=False, repr=False)
77
+ class ManifestHandle(betterproto.Message):
78
+ """
79
+ / Handle to a manifest file, with additional metadata to accelerate reads.
80
+ """
81
+
82
+ id: str = betterproto.string_field(1)
83
+ format: "FileFormat" = betterproto.enum_field(2)
84
+ file_size: int = betterproto.uint64_field(3)
85
+ spfs_format_metadata: Optional[bytes] = betterproto.bytes_field(4, optional=True)
86
+
87
+
88
+ @dataclass(eq=False, repr=False)
89
+ class VersionedSchema(betterproto.Message):
90
+ """/ Timestamped schema."""
91
+
92
+ ts: int = betterproto.uint64_field(1)
93
+ schema: "Schema" = betterproto.message_field(2)
94
+ column_ids: List[str] = betterproto.string_field(3)
95
+ """
96
+ List of column IDs, in the same order as columns in the schema.
97
+ Must have the same length as the number of columns in the schema.
98
+ """
99
+
100
+
101
+ @dataclass(eq=False, repr=False)
102
+ class WriteAheadLog(betterproto.Message):
103
+ """
104
+ / Table's Write Ahead Log stores sequence of operations and table metadata.
105
+ """
106
+
107
+ entries: List["LogEntry"] = betterproto.message_field(1)
108
+ truncated_up_to: int = betterproto.uint64_field(3)
109
+ """
110
+ Timestamp of the latest entry that has been removed from the log.
111
+ Entries with ts <= truncated_up_to are NOT present in the log.
112
+ """
113
+
114
+ ks_manifest_handle: Optional["ManifestHandle"] = betterproto.message_field(
115
+ 2, optional=True
116
+ )
117
+ """An absent manifest means that one has not yet been written."""
118
+
119
+
120
+ @dataclass(eq=False, repr=False)
121
+ class LogEntry(betterproto.Message):
122
+ ts: int = betterproto.uint64_field(1)
123
+ key_space_write: "KeySpaceWriteOp" = betterproto.message_field(2, group="operation")
124
+ fragment_set_write: "FragmentSetWriteOp" = betterproto.message_field(
125
+ 3, group="operation"
126
+ )
127
+ configuration: "ConfigurationOp" = betterproto.message_field(4, group="operation")
128
+ schema_evolution: "SchemaEvolutionOp" = betterproto.message_field(
129
+ 5, group="operation"
130
+ )
131
+ schema_break: "SchemaBreakOp" = betterproto.message_field(6, group="operation")
132
+ compact_key_space: "CompactKeySpaceOp" = betterproto.message_field(
133
+ 7, group="operation"
134
+ )
135
+ compact_column_group: "CompactColumnGroupOp" = betterproto.message_field(
136
+ 8, group="operation"
137
+ )
138
+
139
+
140
+ @dataclass(eq=False, repr=False)
141
+ class KeySpaceWriteOp(betterproto.Message):
142
+ ks_id: str = betterproto.string_field(1)
143
+ manifest_handle: "ManifestHandle" = betterproto.message_field(2)
144
+
145
+
146
+ @dataclass(eq=False, repr=False)
147
+ class FragmentSetWriteOp(betterproto.Message):
148
+ column_group: "ColumnGroup" = betterproto.message_field(1)
149
+ fs_id: str = betterproto.string_field(2)
150
+ fs_level: "Level" = betterproto.enum_field(3)
151
+ manifest_handle: "ManifestHandle" = betterproto.message_field(4)
152
+ key_span: "KeySpan" = betterproto.message_field(5)
153
+ key_extent: "KeyExtent" = betterproto.message_field(6)
154
+ column_ids: List[str] = betterproto.string_field(7)
155
+
156
+
157
+ @dataclass(eq=False, repr=False)
158
+ class ConfigurationOp(betterproto.Message):
159
+ column_group: "ColumnGroup" = betterproto.message_field(1)
160
+ immutable_schema: Optional[bool] = betterproto.bool_field(2, optional=True)
161
+ """All column group configuration is stored in column group metadata."""
162
+
163
+
164
+ @dataclass(eq=False, repr=False)
165
+ class SchemaEvolutionOp(betterproto.Message):
166
+ column_group: "ColumnGroup" = betterproto.message_field(1)
167
+ new_schema: "VersionedSchema" = betterproto.message_field(2)
168
+
169
+
170
+ @dataclass(eq=False, repr=False)
171
+ class SchemaBreakOp(betterproto.Message):
172
+ column_group: "ColumnGroup" = betterproto.message_field(1)
173
+ removed_column_names: List[str] = betterproto.string_field(2)
174
+
175
+
176
+ @dataclass(eq=False, repr=False)
177
+ class CompactKeySpaceOp(betterproto.Message):
178
+ from_ks_ids: List[str] = betterproto.string_field(1)
179
+ into_ks_ids: List[str] = betterproto.string_field(2)
180
+
181
+
182
+ @dataclass(eq=False, repr=False)
183
+ class CompactColumnGroupOp(betterproto.Message):
184
+ column_group: "ColumnGroup" = betterproto.message_field(1)
185
+ from_fragment_ids: List[str] = betterproto.string_field(2)
186
+ """
187
+ NOTE: While key space compaction always compacts a full key space,
188
+ column group compaction can compact only a subset of the fragment set,
189
+ therefore the event specifies the fragment ids.
190
+ """
191
+
192
+ into_fs_ids: List[str] = betterproto.string_field(3)
193
+
194
+
195
+ @dataclass(eq=False, repr=False)
196
+ class ColumnGroupMetadata(betterproto.Message):
197
+ column_group: "ColumnGroup" = betterproto.message_field(1)
198
+ manifest_handle: Optional["ManifestHandle"] = betterproto.message_field(
199
+ 2, optional=True
200
+ )
201
+ """An absent manifest means that one has not yet been written."""
202
+
203
+ last_modified_at: int = betterproto.uint64_field(3)
204
+ """Timestamp of the last WAL entry that modified this metadata."""
205
+
206
+ schema_versions: List["VersionedSchema"] = betterproto.message_field(4)
207
+ """
208
+ TODO(marko): Add config that truncates this list and breaks time travel.
209
+ Versions of the schema. Higher index is more recent, last element is latest
210
+ schema. Stored to support time travel through the schema, e.g. reading
211
+ deleted column.
212
+ """
213
+
214
+ immutable_schema: bool = betterproto.bool_field(5)
215
+ """
216
+ True if schema can NOT be evolved on write.
217
+ An explicit schema evolution is required.
218
+ """
219
+
220
+ schema_salt: int = betterproto.int32_field(6)
221
+ """
222
+ Schema salt is used to compute column IDs. It is modified on breaking
223
+ change, e.g. column deletion. This ensures that if the previously existing
224
+ column is added again, it will have a different ID.
225
+ """
File without changes