pyspiral 0.1.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
spiral/substrait_.py
ADDED
@@ -0,0 +1,275 @@
|
|
1
|
+
import betterproto
|
2
|
+
import pyarrow as pa
|
3
|
+
|
4
|
+
import spiral.expressions as se
|
5
|
+
from spiral.expressions.base import Expr
|
6
|
+
from spiral.proto.substrait import (
|
7
|
+
Expression,
|
8
|
+
ExpressionFieldReference,
|
9
|
+
ExpressionLiteral,
|
10
|
+
ExpressionLiteralList,
|
11
|
+
ExpressionLiteralStruct,
|
12
|
+
ExpressionLiteralUserDefined,
|
13
|
+
ExpressionMaskExpression,
|
14
|
+
ExpressionReferenceSegment,
|
15
|
+
ExpressionReferenceSegmentListElement,
|
16
|
+
ExpressionReferenceSegmentStructField,
|
17
|
+
ExpressionScalarFunction,
|
18
|
+
ExtendedExpression,
|
19
|
+
)
|
20
|
+
from spiral.proto.substrait.extensions import (
|
21
|
+
SimpleExtensionDeclaration,
|
22
|
+
SimpleExtensionDeclarationExtensionFunction,
|
23
|
+
SimpleExtensionDeclarationExtensionType,
|
24
|
+
SimpleExtensionDeclarationExtensionTypeVariation,
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
class SubstraitConverter:
|
29
|
+
def __init__(self, scope: Expr, schema: pa.Schema, key_schema: pa.Schema):
|
30
|
+
self.scope = scope
|
31
|
+
self.schema = schema
|
32
|
+
self.key_names = set(key_schema.names)
|
33
|
+
|
34
|
+
# Extension URIs, keyed by extension URI anchor
|
35
|
+
self.extension_uris = {}
|
36
|
+
|
37
|
+
# Functions, keyed by function_anchor
|
38
|
+
self.functions = {}
|
39
|
+
|
40
|
+
# Types, keyed by type anchor
|
41
|
+
self.type_factories = {}
|
42
|
+
|
43
|
+
def convert(self, buffer: pa.Buffer) -> Expr:
|
44
|
+
"""Convert a Substrait Extended Expression into a Spiral expression."""
|
45
|
+
|
46
|
+
expr: ExtendedExpression = ExtendedExpression().parse(buffer.to_pybytes())
|
47
|
+
assert len(expr.referred_expr) == 1, "Only one expression is supported"
|
48
|
+
|
49
|
+
# Parse the extension URIs from the plan.
|
50
|
+
for ext_uri in expr.extension_uris:
|
51
|
+
self.extension_uris[ext_uri.extension_uri_anchor] = ext_uri.uri
|
52
|
+
|
53
|
+
# Parse the extensions from the plan.
|
54
|
+
for ext in expr.extensions:
|
55
|
+
self._extension_declaration(ext)
|
56
|
+
|
57
|
+
# Convert the expression
|
58
|
+
return self._expr(expr.referred_expr[0].expression)
|
59
|
+
|
60
|
+
def _extension_declaration(self, ext: SimpleExtensionDeclaration):
|
61
|
+
match betterproto.which_one_of(ext, "mapping_type"):
|
62
|
+
case "extension_function", ext_func:
|
63
|
+
self._extension_function(ext_func)
|
64
|
+
case "extension_type", ext_type:
|
65
|
+
self._extension_type(ext_type)
|
66
|
+
case "extension_type_variation", ext_type_variation:
|
67
|
+
self._extension_type_variation(ext_type_variation)
|
68
|
+
case _:
|
69
|
+
raise AssertionError("Invalid substrait plan")
|
70
|
+
|
71
|
+
def _extension_function(self, ext: SimpleExtensionDeclarationExtensionFunction):
|
72
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
73
|
+
match ext_uri:
|
74
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml":
|
75
|
+
match ext.name:
|
76
|
+
case "or":
|
77
|
+
self.functions[ext.function_anchor] = se.or_
|
78
|
+
case "and":
|
79
|
+
self.functions[ext.function_anchor] = se.and_
|
80
|
+
case "xor":
|
81
|
+
self.functions[ext.function_anchor] = se.xor
|
82
|
+
case "not":
|
83
|
+
self.functions[ext.function_anchor] = se.not_
|
84
|
+
case _:
|
85
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
86
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml":
|
87
|
+
match ext.name:
|
88
|
+
case "equal":
|
89
|
+
self.functions[ext.function_anchor] = se.eq
|
90
|
+
case "not_equal":
|
91
|
+
self.functions[ext.function_anchor] = se.neq
|
92
|
+
case "lt":
|
93
|
+
self.functions[ext.function_anchor] = se.lt
|
94
|
+
case "lte":
|
95
|
+
self.functions[ext.function_anchor] = se.lte
|
96
|
+
case "gt":
|
97
|
+
self.functions[ext.function_anchor] = se.gt
|
98
|
+
case "gte":
|
99
|
+
self.functions[ext.function_anchor] = se.gte
|
100
|
+
case "is_null":
|
101
|
+
self.functions[ext.function_anchor] = se.is_null
|
102
|
+
case "is_not_null":
|
103
|
+
self.functions[ext.function_anchor] = se.is_not_null
|
104
|
+
case _:
|
105
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
106
|
+
case uri:
|
107
|
+
raise NotImplementedError(f"Function extension URI {uri} not supported")
|
108
|
+
|
109
|
+
def _extension_type(self, ext: SimpleExtensionDeclarationExtensionType):
|
110
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
111
|
+
match ext_uri:
|
112
|
+
case "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml":
|
113
|
+
match ext.name:
|
114
|
+
case "null":
|
115
|
+
self.type_factories[ext.type_anchor] = pa.null
|
116
|
+
case "interval_month_day_nano":
|
117
|
+
self.type_factories[ext.type_anchor] = pa.month_day_nano_interval
|
118
|
+
case "u8":
|
119
|
+
self.type_factories[ext.type_anchor] = pa.uint8
|
120
|
+
case "u16":
|
121
|
+
self.type_factories[ext.type_anchor] = pa.uint16
|
122
|
+
case "u32":
|
123
|
+
self.type_factories[ext.type_anchor] = pa.uint32
|
124
|
+
case "u64":
|
125
|
+
self.type_factories[ext.type_anchor] = pa.uint64
|
126
|
+
case "fp16":
|
127
|
+
self.type_factories[ext.type_anchor] = pa.float16
|
128
|
+
case "date_millis":
|
129
|
+
self.type_factories[ext.type_anchor] = pa.date64
|
130
|
+
case "time_seconds":
|
131
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("s")
|
132
|
+
case "time_millis":
|
133
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("ms")
|
134
|
+
case "time_nanos":
|
135
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time64("ns")
|
136
|
+
case "large_string":
|
137
|
+
self.type_factories[ext.type_anchor] = pa.large_string
|
138
|
+
case "large_binary":
|
139
|
+
self.type_factories[ext.type_anchor] = pa.large_binary
|
140
|
+
case "decimal256":
|
141
|
+
self.type_factories[ext.type_anchor] = pa.decimal256
|
142
|
+
case "large_list":
|
143
|
+
self.type_factories[ext.type_anchor] = pa.large_list
|
144
|
+
case "fixed_size_list":
|
145
|
+
self.type_factories[ext.type_anchor] = pa.list_
|
146
|
+
case "duration":
|
147
|
+
self.type_factories[ext.type_anchor] = pa.duration
|
148
|
+
case uri:
|
149
|
+
raise NotImplementedError(f"Type extension URI {uri} not support")
|
150
|
+
|
151
|
+
def _extension_type_variation(self, ext: SimpleExtensionDeclarationExtensionTypeVariation):
|
152
|
+
raise NotImplementedError()
|
153
|
+
|
154
|
+
def _expr(self, expr: Expression) -> Expr:
|
155
|
+
match betterproto.which_one_of(expr, "rex_type"):
|
156
|
+
case "literal", e:
|
157
|
+
return self._expr_literal(e)
|
158
|
+
case "selection", e:
|
159
|
+
return self._expr_selection(e)
|
160
|
+
case "scalar_function", e:
|
161
|
+
return self._expr_scalar_function(e)
|
162
|
+
case "window_function", _:
|
163
|
+
raise ValueError("Window functions are not supported in Spiral push-down")
|
164
|
+
case "if_then", e:
|
165
|
+
return self._expr_if_then(e)
|
166
|
+
case "switch", e:
|
167
|
+
return self._expr_switch(e)
|
168
|
+
case "singular_or_list", _:
|
169
|
+
raise ValueError("singular_or_list is not supported in Spiral push-down")
|
170
|
+
case "multi_or_list", _:
|
171
|
+
raise ValueError("multi_or_list is not supported in Spiral push-down")
|
172
|
+
case "cast", e:
|
173
|
+
return self._expr_cast(e)
|
174
|
+
case "subquery", _:
|
175
|
+
raise ValueError("Subqueries are not supported in Spiral push-down")
|
176
|
+
case "nested", e:
|
177
|
+
return self._expr_nested(e)
|
178
|
+
case _:
|
179
|
+
raise NotImplementedError(f"Expression type {expr.rex_type} not implemented")
|
180
|
+
|
181
|
+
def _expr_literal(self, expr: ExpressionLiteral):
|
182
|
+
# TODO(ngates): the Spiral literal expression is quite weakly typed...
|
183
|
+
# Maybe we can switch to Vortex?
|
184
|
+
simple = {
|
185
|
+
"boolean",
|
186
|
+
"i8",
|
187
|
+
"i16",
|
188
|
+
"i32",
|
189
|
+
"i64",
|
190
|
+
"fp32",
|
191
|
+
"fp64",
|
192
|
+
"string",
|
193
|
+
"binary",
|
194
|
+
"fixed_char",
|
195
|
+
"var_char",
|
196
|
+
"fixed_binary",
|
197
|
+
}
|
198
|
+
|
199
|
+
match betterproto.which_one_of(expr, "literal_type"):
|
200
|
+
case type_, v if type_ in simple:
|
201
|
+
return se.scalar(pa.scalar(v))
|
202
|
+
case "timestamp", v:
|
203
|
+
return se.scalar(pa.scalar(v, type=pa.timestamp("us")))
|
204
|
+
case "date", v:
|
205
|
+
return se.scalar(pa.scalar(v, type=pa.date32()))
|
206
|
+
case "time", v:
|
207
|
+
# Substrait time is us since midnight. PyArrow only supports ms.
|
208
|
+
v: int
|
209
|
+
v = int(v / 1000)
|
210
|
+
return se.scalar(pa.scalar(v, type=pa.time32("ms")))
|
211
|
+
case "null", _null_type:
|
212
|
+
# We need a typed null value
|
213
|
+
raise NotImplementedError()
|
214
|
+
case "struct", v:
|
215
|
+
v: ExpressionLiteralStruct
|
216
|
+
# Hmm, v has fields, but no field names. I guess we return a list and the type is applied later?
|
217
|
+
raise NotImplementedError()
|
218
|
+
case "list", v:
|
219
|
+
v: ExpressionLiteralList
|
220
|
+
return pa.scalar([self._expr_literal(e) for e in v.values])
|
221
|
+
case "user_defined", v:
|
222
|
+
v: ExpressionLiteralUserDefined
|
223
|
+
raise NotImplementedError()
|
224
|
+
case literal_type, _:
|
225
|
+
raise NotImplementedError(f"Literal type not supported: {literal_type}")
|
226
|
+
|
227
|
+
def _expr_selection(self, expr: ExpressionFieldReference):
|
228
|
+
match betterproto.which_one_of(expr, "root_type"):
|
229
|
+
case "root_reference", _:
|
230
|
+
# The reference is relative to the root
|
231
|
+
base_expr = self.scope
|
232
|
+
base_type = pa.struct(self.schema)
|
233
|
+
case _:
|
234
|
+
raise NotImplementedError("Only root_reference expressions are supported")
|
235
|
+
|
236
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
237
|
+
case "direct_reference", direct_ref:
|
238
|
+
return self._expr_direct_reference(base_expr, base_type, direct_ref)
|
239
|
+
case "masked_reference", masked_ref:
|
240
|
+
return self._expr_masked_reference(base_expr, base_type, masked_ref)
|
241
|
+
case _:
|
242
|
+
raise NotImplementedError()
|
243
|
+
|
244
|
+
def _expr_direct_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionReferenceSegment):
|
245
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
246
|
+
case "map_key", ref:
|
247
|
+
raise NotImplementedError("Map types not yet supported in Spiral")
|
248
|
+
case "struct_field", ref:
|
249
|
+
ref: ExpressionReferenceSegmentStructField
|
250
|
+
field_name = scope_type.field(ref.field).name
|
251
|
+
|
252
|
+
if field_name in self.key_names:
|
253
|
+
# This is a key column, so we need to select it from the scope.
|
254
|
+
return se.var(field_name)
|
255
|
+
|
256
|
+
scope = se.getitem(scope, field_name)
|
257
|
+
scope_type = scope_type.field(ref.field).type
|
258
|
+
return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
|
259
|
+
case "list_element", ref:
|
260
|
+
ref: ExpressionReferenceSegmentListElement
|
261
|
+
scope = se.getitem(scope, ref.offset)
|
262
|
+
scope_type = scope_type.field(ref.field).type
|
263
|
+
return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
|
264
|
+
case "", ref:
|
265
|
+
# Because Proto... we hit this case when we recurse into a child node and it's actually "None".
|
266
|
+
return scope
|
267
|
+
case _:
|
268
|
+
raise NotImplementedError()
|
269
|
+
|
270
|
+
def _expr_masked_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionMaskExpression):
|
271
|
+
raise NotImplementedError("Masked references are not yet supported in Spiral push-down")
|
272
|
+
|
273
|
+
def _expr_scalar_function(self, expr: ExpressionScalarFunction):
|
274
|
+
args = [self._expr(arg.value) for arg in expr.arguments]
|
275
|
+
return self.functions[expr.function_reference](*args)
|
spiral/table.py
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import TYPE_CHECKING, Literal
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral import expressions as se
|
7
|
+
from spiral.config import FILE_FORMAT, Config
|
8
|
+
from spiral.core.core import Table as CoreTable
|
9
|
+
from spiral.core.core import flush_wal, write
|
10
|
+
from spiral.expressions.base import Expr, ExprLike
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
import duckdb
|
14
|
+
import polars as pl
|
15
|
+
import pyarrow.dataset
|
16
|
+
|
17
|
+
from spiral.scan_ import Scan
|
18
|
+
|
19
|
+
|
20
|
+
class Table(Expr):
|
21
|
+
"""API for interacting with a SpiralDB's Table.
|
22
|
+
|
23
|
+
Different catalog implementations should ultimately construct a Table object.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
table: CoreTable,
|
29
|
+
name: str | None = None,
|
30
|
+
):
|
31
|
+
super().__init__(table.__expr__)
|
32
|
+
|
33
|
+
self._table = table
|
34
|
+
self._name = name or self._table.id
|
35
|
+
self._key_schema = self._table.key_schema.to_arrow()
|
36
|
+
self._key_columns = set(self._key_schema.names)
|
37
|
+
|
38
|
+
@property
|
39
|
+
def table_id(self) -> str:
|
40
|
+
return self._table.id
|
41
|
+
|
42
|
+
@property
|
43
|
+
def last_modified_at(self) -> int:
|
44
|
+
return self._table.get_wal(asof=None).last_modified_at
|
45
|
+
|
46
|
+
def __str__(self):
|
47
|
+
return self._name
|
48
|
+
|
49
|
+
def __repr__(self):
|
50
|
+
return f'Table("{self._name}")'
|
51
|
+
|
52
|
+
def __getitem__(self, item: str) -> Expr:
|
53
|
+
from spiral import expressions as se
|
54
|
+
|
55
|
+
if item in self._key_columns:
|
56
|
+
return se.var(name=item)
|
57
|
+
|
58
|
+
return super().__getitem__(item)
|
59
|
+
|
60
|
+
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
61
|
+
# Override an expression select in the root column group to split between keys and columns.
|
62
|
+
if exclude is not None:
|
63
|
+
if set(exclude) & self._key_columns:
|
64
|
+
raise ValueError(
|
65
|
+
"Cannot use 'exclude' arg with key columns. Use 'exclude_keys' and an explicit select of keys."
|
66
|
+
)
|
67
|
+
|
68
|
+
key_paths = set(paths) & self._key_columns
|
69
|
+
other_paths = set(paths) - key_paths
|
70
|
+
if not key_paths:
|
71
|
+
return super().select(*paths, exclude=exclude)
|
72
|
+
|
73
|
+
from spiral import expressions as se
|
74
|
+
|
75
|
+
return se.merge(se.pack({key: se.var(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
|
76
|
+
|
77
|
+
@property
|
78
|
+
def key_schema(self) -> pa.Schema:
|
79
|
+
"""Returns the key schema of the table."""
|
80
|
+
return self._key_schema
|
81
|
+
|
82
|
+
@property
|
83
|
+
def schema(self) -> pa.Schema:
|
84
|
+
"""Returns the FULL schema of the table.
|
85
|
+
|
86
|
+
NOTE: This can be expensive for large tables.
|
87
|
+
"""
|
88
|
+
return self._table.get_schema(asof=None)
|
89
|
+
|
90
|
+
def to_dataset(self) -> "pyarrow.dataset.Dataset":
|
91
|
+
"""Returns a PyArrow Dataset representing the table."""
|
92
|
+
from .dataset import TableDataset
|
93
|
+
|
94
|
+
return TableDataset(self)
|
95
|
+
|
96
|
+
def to_polars(self) -> "pl.LazyFrame":
|
97
|
+
"""Returns a Polars LazyFrame for the Spiral table."""
|
98
|
+
import polars as pl
|
99
|
+
|
100
|
+
return pl.scan_pyarrow_dataset(self.to_dataset())
|
101
|
+
|
102
|
+
def to_duckdb(self) -> "duckdb.DuckDBPyRelation":
|
103
|
+
"""Returns a DuckDB relation for the Spiral table."""
|
104
|
+
import duckdb
|
105
|
+
|
106
|
+
return duckdb.from_arrow(self.to_dataset())
|
107
|
+
|
108
|
+
def scan(
|
109
|
+
self,
|
110
|
+
*projections: ExprLike,
|
111
|
+
where: ExprLike | None = None,
|
112
|
+
asof: datetime | int | str = None,
|
113
|
+
exclude_keys: bool = False,
|
114
|
+
# TODO(marko): Support config.
|
115
|
+
# config: Config | None = None,
|
116
|
+
) -> "Scan":
|
117
|
+
"""Reads the table. If projections are not provided, the entire table is read.
|
118
|
+
|
119
|
+
See `spiral.scan` for more information.
|
120
|
+
"""
|
121
|
+
from spiral.scan_ import scan
|
122
|
+
|
123
|
+
if not projections:
|
124
|
+
projections = [self]
|
125
|
+
|
126
|
+
return scan(
|
127
|
+
*projections,
|
128
|
+
where=where,
|
129
|
+
asof=asof,
|
130
|
+
exclude_keys=exclude_keys,
|
131
|
+
# config=config,
|
132
|
+
)
|
133
|
+
|
134
|
+
# NOTE: "vortex" is valid format. We don't want that visible in the API docs.
|
135
|
+
def write(
|
136
|
+
self,
|
137
|
+
expr: ExprLike,
|
138
|
+
*,
|
139
|
+
format: Literal["parquet"] | None = None,
|
140
|
+
# TODO(joe): support group_by, and config
|
141
|
+
config: Config | None = None,
|
142
|
+
) -> None:
|
143
|
+
"""Write an item to the table inside a single transaction.
|
144
|
+
|
145
|
+
:param expr: The expression to write. Must evaluate to a struct array.
|
146
|
+
:param format: the format to write the data in. Defaults to "parquet".
|
147
|
+
:param config: The configuration to use for this write.
|
148
|
+
"""
|
149
|
+
write(
|
150
|
+
self._table,
|
151
|
+
se.lift(expr).__expr__,
|
152
|
+
format=format or FILE_FORMAT,
|
153
|
+
partition_size=config.partition_file_min_size if config else None,
|
154
|
+
)
|
155
|
+
# Flush the WAL if configured.
|
156
|
+
if config is not None and config.flush_wal_on_write:
|
157
|
+
flush_wal(self._table, manifest_format=format or FILE_FORMAT)
|