pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/METADATA +12 -14
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -7
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +15 -172
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +83 -18
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +22 -21
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/table/__init__.pyi +108 -0
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +49 -92
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +40 -17
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +62 -0
  47. spiral/expressions/png.py +18 -0
  48. spiral/expressions/qoi.py +18 -0
  49. spiral/expressions/refs.py +23 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/tiff.py +88 -88
  53. spiral/expressions/udf.py +3 -3
  54. spiral/iceberg/__init__.py +3 -0
  55. spiral/iceberg/client.py +33 -0
  56. spiral/indexes/__init__.py +5 -0
  57. spiral/indexes/client.py +137 -0
  58. spiral/indexes/index.py +34 -0
  59. spiral/indexes/scan.py +22 -0
  60. spiral/project.py +19 -110
  61. spiral/{proto → protogen}/_/scandal/__init__.py +32 -77
  62. spiral/protogen/_/spiral/table/__init__.py +22 -0
  63. spiral/protogen/substrait/__init__.py +3399 -0
  64. spiral/protogen/substrait/extensions/__init__.py +115 -0
  65. spiral/server.py +17 -0
  66. spiral/settings.py +31 -87
  67. spiral/substrait_.py +10 -6
  68. spiral/tables/__init__.py +12 -0
  69. spiral/tables/client.py +130 -0
  70. spiral/{dataset.py → tables/dataset.py} +36 -25
  71. spiral/tables/debug/manifests.py +70 -0
  72. spiral/tables/debug/metrics.py +56 -0
  73. spiral/{debug.py → tables/debug/scan.py} +6 -9
  74. spiral/tables/maintenance.py +12 -0
  75. spiral/tables/scan.py +193 -0
  76. spiral/tables/snapshot.py +78 -0
  77. spiral/tables/table.py +157 -0
  78. spiral/tables/transaction.py +52 -0
  79. pyspiral-0.2.5.dist-info/RECORD +0 -81
  80. spiral/api/tables.py +0 -94
  81. spiral/api/tokens.py +0 -56
  82. spiral/authn/authn.py +0 -89
  83. spiral/authn/device.py +0 -206
  84. spiral/authn/github_.py +0 -33
  85. spiral/authn/modal_.py +0 -18
  86. spiral/cli/org.py +0 -90
  87. spiral/cli/project.py +0 -107
  88. spiral/cli/table.py +0 -20
  89. spiral/cli/token.py +0 -27
  90. spiral/config.py +0 -26
  91. spiral/core/core/__init__.pyi +0 -53
  92. spiral/core/metastore/__init__.pyi +0 -91
  93. spiral/proto/_/spfs/__init__.py +0 -36
  94. spiral/proto/_/spiral/table/__init__.py +0 -225
  95. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  96. spiral/proto/__init__.py +0 -0
  97. spiral/proto/scandal/__init__.py +0 -45
  98. spiral/proto/spiral/__init__.py +0 -0
  99. spiral/proto/spiral/table/__init__.py +0 -96
  100. spiral/scan_.py +0 -168
  101. spiral/table.py +0 -157
  102. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  103. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  104. /spiral/{core → protogen/_}/__init__.py +0 -0
  105. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  106. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  107. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  109. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  110. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  111. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  112. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  113. /spiral/{proto → protogen}/util.py +0 -0
  114. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
spiral/project.py CHANGED
@@ -1,26 +1,18 @@
1
- from typing import TYPE_CHECKING, Any
2
-
3
- import pyarrow as pa
4
-
5
- from spiral import Table
6
- from spiral.api.tables import CreateTable, FindTable
7
- from spiral.core.core import Table as CoreTable
8
- from spiral.core.metastore import PyMetastore
9
- from spiral.core.spec import Schema
10
- from spiral.types_ import Uri
1
+ from typing import TYPE_CHECKING
11
2
 
12
3
  if TYPE_CHECKING:
13
- from spiral.catalog import Spiral
4
+ from spiral.client import Spiral
5
+ from spiral.iceberg import Iceberg
6
+ from spiral.indexes import Indexes
7
+ from spiral.tables import Tables
14
8
 
15
9
 
16
10
  class Project:
17
- def __init__(self, spiral_db: "Spiral", id: str, name: str | None = None):
18
- self._spiral_db = spiral_db
11
+ def __init__(self, spiral: "Spiral", id: str, name: str | None = None):
12
+ self._spiral = spiral
19
13
  self._id = id
20
14
  self._name = name
21
15
 
22
- self._api = self._spiral_db.config.api
23
-
24
16
  def __str__(self):
25
17
  return self._id
26
18
 
@@ -35,103 +27,20 @@ class Project:
35
27
  def name(self) -> str:
36
28
  return self._name or self._id
37
29
 
38
- def list_table_names(self) -> list[(str, str)]:
39
- """List tuples of (dataset, table) names in the project."""
40
- return [(t.dataset, t.table) for t in self._api.table.list(FindTable.Request(project_id=self.id))]
41
-
42
- def list_tables(self) -> list[Table]:
43
- """List tables in the project."""
44
- return [
45
- Table(
46
- CoreTable(
47
- PyMetastore.http(
48
- table_id=t.id,
49
- root_uri=t.metadata.root_uri,
50
- key_schema=Schema.from_arrow(t.metadata.key_schema),
51
- base_url=self._api.base_url + "/metastore/",
52
- token_provider=self._spiral_db.config.authn.token,
53
- ),
54
- ),
55
- name=f"{self.id}.{t.dataset}.{t.table}",
56
- )
57
- for t in self._api.table.list(FindTable.Request(project_id=self.id))
58
- ]
59
-
60
- def create_table(
61
- self,
62
- identifier: str,
63
- *,
64
- key_schema: pa.Schema | Any,
65
- uri: Uri | None = None,
66
- exist_ok: bool = False,
67
- ) -> Table:
68
- """Create a new table in the project."""
69
- dataset, table = self._parse_identifier(identifier)
70
-
71
- if not isinstance(key_schema, pa.Schema):
72
- key_schema = pa.schema(key_schema)
73
-
74
- res = self._api.table.create(
75
- CreateTable.Request(
76
- project_id=self.id,
77
- dataset=dataset,
78
- table=table,
79
- key_schema=key_schema,
80
- root_uri=uri,
81
- exist_ok=exist_ok,
82
- )
83
- )
84
-
85
- # Must have the same schema as provided, even if the table already exists.
86
- expected_key_schema = res.table.metadata.key_schema
87
- if key_schema != expected_key_schema:
88
- raise ValueError(f"Table already exists with different key schema: {expected_key_schema} != {key_schema}")
89
- if uri and res.table.metadata.root_uri != uri:
90
- raise ValueError(f"Table already exists with different root URI: {res.table.metadata.root_uri} != {uri}")
91
-
92
- # Set up a metastore backed by SpiralDB
93
- metastore = PyMetastore.http(
94
- table_id=res.table.id,
95
- root_uri=res.table.metadata.root_uri,
96
- key_schema=Schema.from_arrow(res.table.metadata.key_schema),
97
- base_url=self._api.base_url + "/metastore/",
98
- token_provider=self._spiral_db.config.authn.token,
99
- )
100
-
101
- return Table(CoreTable(metastore), name=f"{self.id}.{res.table.dataset}.{res.table.table}")
30
+ @property
31
+ def tables(self) -> "Tables":
32
+ from spiral.tables import Tables
102
33
 
103
- def table(self, identifier: str) -> Table:
104
- """Open a table with a `dataset.table` identifier, or `table` name using the `default` dataset."""
105
- dataset, table = self._parse_identifier(identifier)
34
+ return Tables(self._spiral._api, self._spiral._core, project_id=self.id)
106
35
 
107
- # TODO(ngates): why does the client _need_ this information? Can we defer it?
108
- res = self._api.table.find(
109
- FindTable.Request(
110
- project_id=self.id,
111
- dataset=dataset,
112
- table=table,
113
- )
114
- )
115
- if res.table is None:
116
- raise ValueError(f"Table not found: {self.id}.{dataset}.{table}")
36
+ @property
37
+ def indexes(self) -> "Indexes":
38
+ from spiral.indexes.client import Indexes
117
39
 
118
- # Set up a metastore backed by SpiralDB
119
- metastore = PyMetastore.http(
120
- table_id=res.table.id,
121
- root_uri=res.table.metadata.root_uri,
122
- key_schema=Schema.from_arrow(res.table.metadata.key_schema),
123
- base_url=self._api.base_url + "/metastore/",
124
- token_provider=self._spiral_db.config.authn.token,
125
- )
40
+ return Indexes(self._spiral._api, self._spiral._core, project_id=self._id)
126
41
 
127
- return Table(CoreTable(metastore), name=f"{self.id}.{res.table.dataset}.{res.table.table}")
42
+ @property
43
+ def iceberg(self) -> "Iceberg":
44
+ from spiral.iceberg import Iceberg
128
45
 
129
- @staticmethod
130
- def _parse_identifier(identifier: str) -> tuple[str, str]:
131
- parts = identifier.split(".")
132
- if len(parts) == 1:
133
- return "default", parts[0]
134
- elif len(parts) == 2:
135
- return parts[0], parts[1]
136
- else:
137
- raise ValueError(f"Invalid table identifier: {identifier}")
46
+ return Iceberg(self._spiral, project_id=self._id)
@@ -30,6 +30,31 @@ class Source(betterproto.Message):
30
30
  parquet: "MetadataParquet" = betterproto.message_field(10, group="metadata")
31
31
 
32
32
 
33
+ @dataclass(eq=False, repr=False)
34
+ class Metadata(betterproto.Message):
35
+ pass
36
+
37
+
38
+ @dataclass(eq=False, repr=False)
39
+ class MetadataParquet(betterproto.Message):
40
+ pass
41
+
42
+
43
+ @dataclass(eq=False, repr=False)
44
+ class Sink(betterproto.Message):
45
+ url: str = betterproto.string_field(1)
46
+
47
+
48
+ @dataclass(eq=False, repr=False)
49
+ class Connectivity(betterproto.Message):
50
+ """
51
+ Information about the client's perceived connectivity to a FileSystem.
52
+ """
53
+
54
+ unreachable: bool = betterproto.bool_field(1)
55
+ round_trip_time_us: Optional[int] = betterproto.int32_field(2, optional=True)
56
+
57
+
33
58
  @dataclass(eq=False, repr=False)
34
59
  class Fetch(betterproto.Message):
35
60
  """Let's make "fetch" happen."""
@@ -39,15 +64,19 @@ class Fetch(betterproto.Message):
39
64
 
40
65
  @dataclass(eq=False, repr=False)
41
66
  class FetchRequest(betterproto.Message):
67
+ """TODO(ngates): include projection expression."""
68
+
42
69
  uri: str = betterproto.string_field(1)
43
70
  """
44
- A signed request to read an spfs://<fsid>/path?token=<jwt> URI.
45
- * Declares the MIME types the client can read directly.
46
- * Declares whether the client has connectivity to the FileSystem.
71
+ A signed request to read an
72
+ spfs://&lt;fsid&gt;/path?token=&lt;jwt&gt URI.
47
73
  """
48
74
 
49
75
  connectivity: "Connectivity" = betterproto.message_field(2)
76
+ """Declares whether the client has connectivity to the FileSystem."""
77
+
50
78
  accepts: List[str] = betterproto.string_field(3)
79
+ """Declares the MIME types the client can read directly."""
51
80
 
52
81
 
53
82
  @dataclass(eq=False, repr=False)
@@ -59,11 +88,6 @@ class FetchResponse(betterproto.Message):
59
88
  """
60
89
 
61
90
 
62
- @dataclass(eq=False, repr=False)
63
- class Sink(betterproto.Message):
64
- url: str = betterproto.string_field(1)
65
-
66
-
67
91
  @dataclass(eq=False, repr=False)
68
92
  class Put(betterproto.Message):
69
93
  pass
@@ -80,42 +104,6 @@ class PutResponse(betterproto.Message):
80
104
  sinks: List["Sink"] = betterproto.message_field(1)
81
105
 
82
106
 
83
- @dataclass(eq=False, repr=False)
84
- class Delete(betterproto.Message):
85
- pass
86
-
87
-
88
- @dataclass(eq=False, repr=False)
89
- class DeleteRequest(betterproto.Message):
90
- uri: str = betterproto.string_field(1)
91
-
92
-
93
- @dataclass(eq=False, repr=False)
94
- class DeleteResponse(betterproto.Message):
95
- url: str = betterproto.string_field(1)
96
- """Returns signed URL to delete the resource."""
97
-
98
-
99
- @dataclass(eq=False, repr=False)
100
- class Connectivity(betterproto.Message):
101
- """
102
- Information about the client's perceived connectivity to a FileSystem.
103
- """
104
-
105
- unreachable: bool = betterproto.bool_field(1)
106
- round_trip_time_us: Optional[int] = betterproto.int32_field(2, optional=True)
107
-
108
-
109
- @dataclass(eq=False, repr=False)
110
- class Metadata(betterproto.Message):
111
- pass
112
-
113
-
114
- @dataclass(eq=False, repr=False)
115
- class MetadataParquet(betterproto.Message):
116
- pass
117
-
118
-
119
107
  class ScandalServiceStub(betterproto.ServiceStub):
120
108
  async def fetch(
121
109
  self,
@@ -151,23 +139,6 @@ class ScandalServiceStub(betterproto.ServiceStub):
151
139
  metadata=metadata,
152
140
  )
153
141
 
154
- async def delete(
155
- self,
156
- delete_request: "DeleteRequest",
157
- *,
158
- timeout: Optional[float] = None,
159
- deadline: Optional["Deadline"] = None,
160
- metadata: Optional["MetadataLike"] = None
161
- ) -> "DeleteResponse":
162
- return await self._unary_unary(
163
- "/scandal.ScandalService/Delete",
164
- delete_request,
165
- DeleteResponse,
166
- timeout=timeout,
167
- deadline=deadline,
168
- metadata=metadata,
169
- )
170
-
171
142
 
172
143
  class ScandalServiceBase(ServiceBase):
173
144
  async def fetch(self, fetch_request: "FetchRequest") -> "FetchResponse":
@@ -176,9 +147,6 @@ class ScandalServiceBase(ServiceBase):
176
147
  async def put(self, put_request: "PutRequest") -> "PutResponse":
177
148
  raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED)
178
149
 
179
- async def delete(self, delete_request: "DeleteRequest") -> "DeleteResponse":
180
- raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED)
181
-
182
150
  async def __rpc_fetch(
183
151
  self, stream: "grpclib.server.Stream[FetchRequest, FetchResponse]"
184
152
  ) -> None:
@@ -193,13 +161,6 @@ class ScandalServiceBase(ServiceBase):
193
161
  response = await self.put(request)
194
162
  await stream.send_message(response)
195
163
 
196
- async def __rpc_delete(
197
- self, stream: "grpclib.server.Stream[DeleteRequest, DeleteResponse]"
198
- ) -> None:
199
- request = await stream.recv_message()
200
- response = await self.delete(request)
201
- await stream.send_message(response)
202
-
203
164
  def __mapping__(self) -> Dict[str, grpclib.const.Handler]:
204
165
  return {
205
166
  "/scandal.ScandalService/Fetch": grpclib.const.Handler(
@@ -214,10 +175,4 @@ class ScandalServiceBase(ServiceBase):
214
175
  PutRequest,
215
176
  PutResponse,
216
177
  ),
217
- "/scandal.ScandalService/Delete": grpclib.const.Handler(
218
- self.__rpc_delete,
219
- grpclib.const.Cardinality.UNARY_UNARY,
220
- DeleteRequest,
221
- DeleteResponse,
222
- ),
223
178
  }
@@ -0,0 +1,22 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # sources: spiral/table/statistics.proto
3
+ # plugin: python-betterproto
4
+ # This file has been @generated
5
+
6
+ from dataclasses import dataclass
7
+
8
+ import betterproto
9
+
10
+
11
+ @dataclass(eq=False, repr=False)
12
+ class ApproximateSetMembership(betterproto.Message):
13
+ bloom_filter: "BloomFilter" = betterproto.message_field(
14
+ 2, group="membership_strategy"
15
+ )
16
+
17
+
18
+ @dataclass(eq=False, repr=False)
19
+ class BloomFilter(betterproto.Message):
20
+ bit_vec: bytes = betterproto.bytes_field(1)
21
+ bitmap_bits: int = betterproto.uint64_field(2)
22
+ k_num: int = betterproto.uint32_field(3)