pyspiral 0.1.0__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyspiral-0.1.0.dist-info/METADATA +48 -0
- pyspiral-0.1.0.dist-info/RECORD +81 -0
- pyspiral-0.1.0.dist-info/WHEEL +4 -0
- pyspiral-0.1.0.dist-info/entry_points.txt +2 -0
- spiral/__init__.py +11 -0
- spiral/_lib.abi3.so +0 -0
- spiral/adbc.py +386 -0
- spiral/api/__init__.py +221 -0
- spiral/api/admin.py +29 -0
- spiral/api/filesystems.py +125 -0
- spiral/api/organizations.py +90 -0
- spiral/api/projects.py +160 -0
- spiral/api/tables.py +94 -0
- spiral/api/tokens.py +56 -0
- spiral/api/workloads.py +45 -0
- spiral/arrow.py +209 -0
- spiral/authn/__init__.py +0 -0
- spiral/authn/authn.py +89 -0
- spiral/authn/device.py +206 -0
- spiral/authn/github_.py +33 -0
- spiral/authn/modal_.py +18 -0
- spiral/catalog.py +78 -0
- spiral/cli/__init__.py +82 -0
- spiral/cli/__main__.py +4 -0
- spiral/cli/admin.py +21 -0
- spiral/cli/app.py +48 -0
- spiral/cli/console.py +95 -0
- spiral/cli/fs.py +47 -0
- spiral/cli/login.py +13 -0
- spiral/cli/org.py +90 -0
- spiral/cli/printer.py +45 -0
- spiral/cli/project.py +107 -0
- spiral/cli/state.py +3 -0
- spiral/cli/table.py +20 -0
- spiral/cli/token.py +27 -0
- spiral/cli/types.py +53 -0
- spiral/cli/workload.py +59 -0
- spiral/config.py +26 -0
- spiral/core/__init__.py +0 -0
- spiral/core/core/__init__.pyi +53 -0
- spiral/core/manifests/__init__.pyi +53 -0
- spiral/core/metastore/__init__.pyi +91 -0
- spiral/core/spec/__init__.pyi +257 -0
- spiral/dataset.py +239 -0
- spiral/debug.py +251 -0
- spiral/expressions/__init__.py +222 -0
- spiral/expressions/base.py +149 -0
- spiral/expressions/http.py +86 -0
- spiral/expressions/io.py +100 -0
- spiral/expressions/list_.py +68 -0
- spiral/expressions/refs.py +44 -0
- spiral/expressions/str_.py +39 -0
- spiral/expressions/struct.py +57 -0
- spiral/expressions/tiff.py +223 -0
- spiral/expressions/udf.py +46 -0
- spiral/grpc_.py +32 -0
- spiral/project.py +137 -0
- spiral/proto/_/__init__.py +0 -0
- spiral/proto/_/arrow/__init__.py +0 -0
- spiral/proto/_/arrow/flight/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/__init__.py +0 -0
- spiral/proto/_/arrow/flight/protocol/sql/__init__.py +1990 -0
- spiral/proto/_/scandal/__init__.py +223 -0
- spiral/proto/_/spfs/__init__.py +36 -0
- spiral/proto/_/spiral/__init__.py +0 -0
- spiral/proto/_/spiral/table/__init__.py +225 -0
- spiral/proto/_/spiraldb/__init__.py +0 -0
- spiral/proto/_/spiraldb/metastore/__init__.py +499 -0
- spiral/proto/__init__.py +0 -0
- spiral/proto/scandal/__init__.py +45 -0
- spiral/proto/spiral/__init__.py +0 -0
- spiral/proto/spiral/table/__init__.py +96 -0
- spiral/proto/substrait/__init__.py +3399 -0
- spiral/proto/substrait/extensions/__init__.py +115 -0
- spiral/proto/util.py +41 -0
- spiral/py.typed +0 -0
- spiral/scan_.py +168 -0
- spiral/settings.py +157 -0
- spiral/substrait_.py +275 -0
- spiral/table.py +157 -0
- spiral/types_.py +6 -0
spiral/substrait_.py
ADDED
@@ -0,0 +1,275 @@
|
|
1
|
+
import betterproto
|
2
|
+
import pyarrow as pa
|
3
|
+
|
4
|
+
import spiral.expressions as se
|
5
|
+
from spiral.expressions.base import Expr
|
6
|
+
from spiral.proto.substrait import (
|
7
|
+
Expression,
|
8
|
+
ExpressionFieldReference,
|
9
|
+
ExpressionLiteral,
|
10
|
+
ExpressionLiteralList,
|
11
|
+
ExpressionLiteralStruct,
|
12
|
+
ExpressionLiteralUserDefined,
|
13
|
+
ExpressionMaskExpression,
|
14
|
+
ExpressionReferenceSegment,
|
15
|
+
ExpressionReferenceSegmentListElement,
|
16
|
+
ExpressionReferenceSegmentStructField,
|
17
|
+
ExpressionScalarFunction,
|
18
|
+
ExtendedExpression,
|
19
|
+
)
|
20
|
+
from spiral.proto.substrait.extensions import (
|
21
|
+
SimpleExtensionDeclaration,
|
22
|
+
SimpleExtensionDeclarationExtensionFunction,
|
23
|
+
SimpleExtensionDeclarationExtensionType,
|
24
|
+
SimpleExtensionDeclarationExtensionTypeVariation,
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
class SubstraitConverter:
|
29
|
+
def __init__(self, scope: Expr, schema: pa.Schema, key_schema: pa.Schema):
|
30
|
+
self.scope = scope
|
31
|
+
self.schema = schema
|
32
|
+
self.key_names = set(key_schema.names)
|
33
|
+
|
34
|
+
# Extension URIs, keyed by extension URI anchor
|
35
|
+
self.extension_uris = {}
|
36
|
+
|
37
|
+
# Functions, keyed by function_anchor
|
38
|
+
self.functions = {}
|
39
|
+
|
40
|
+
# Types, keyed by type anchor
|
41
|
+
self.type_factories = {}
|
42
|
+
|
43
|
+
def convert(self, buffer: pa.Buffer) -> Expr:
|
44
|
+
"""Convert a Substrait Extended Expression into a Spiral expression."""
|
45
|
+
|
46
|
+
expr: ExtendedExpression = ExtendedExpression().parse(buffer.to_pybytes())
|
47
|
+
assert len(expr.referred_expr) == 1, "Only one expression is supported"
|
48
|
+
|
49
|
+
# Parse the extension URIs from the plan.
|
50
|
+
for ext_uri in expr.extension_uris:
|
51
|
+
self.extension_uris[ext_uri.extension_uri_anchor] = ext_uri.uri
|
52
|
+
|
53
|
+
# Parse the extensions from the plan.
|
54
|
+
for ext in expr.extensions:
|
55
|
+
self._extension_declaration(ext)
|
56
|
+
|
57
|
+
# Convert the expression
|
58
|
+
return self._expr(expr.referred_expr[0].expression)
|
59
|
+
|
60
|
+
def _extension_declaration(self, ext: SimpleExtensionDeclaration):
|
61
|
+
match betterproto.which_one_of(ext, "mapping_type"):
|
62
|
+
case "extension_function", ext_func:
|
63
|
+
self._extension_function(ext_func)
|
64
|
+
case "extension_type", ext_type:
|
65
|
+
self._extension_type(ext_type)
|
66
|
+
case "extension_type_variation", ext_type_variation:
|
67
|
+
self._extension_type_variation(ext_type_variation)
|
68
|
+
case _:
|
69
|
+
raise AssertionError("Invalid substrait plan")
|
70
|
+
|
71
|
+
def _extension_function(self, ext: SimpleExtensionDeclarationExtensionFunction):
|
72
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
73
|
+
match ext_uri:
|
74
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml":
|
75
|
+
match ext.name:
|
76
|
+
case "or":
|
77
|
+
self.functions[ext.function_anchor] = se.or_
|
78
|
+
case "and":
|
79
|
+
self.functions[ext.function_anchor] = se.and_
|
80
|
+
case "xor":
|
81
|
+
self.functions[ext.function_anchor] = se.xor
|
82
|
+
case "not":
|
83
|
+
self.functions[ext.function_anchor] = se.not_
|
84
|
+
case _:
|
85
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
86
|
+
case "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml":
|
87
|
+
match ext.name:
|
88
|
+
case "equal":
|
89
|
+
self.functions[ext.function_anchor] = se.eq
|
90
|
+
case "not_equal":
|
91
|
+
self.functions[ext.function_anchor] = se.neq
|
92
|
+
case "lt":
|
93
|
+
self.functions[ext.function_anchor] = se.lt
|
94
|
+
case "lte":
|
95
|
+
self.functions[ext.function_anchor] = se.lte
|
96
|
+
case "gt":
|
97
|
+
self.functions[ext.function_anchor] = se.gt
|
98
|
+
case "gte":
|
99
|
+
self.functions[ext.function_anchor] = se.gte
|
100
|
+
case "is_null":
|
101
|
+
self.functions[ext.function_anchor] = se.is_null
|
102
|
+
case "is_not_null":
|
103
|
+
self.functions[ext.function_anchor] = se.is_not_null
|
104
|
+
case _:
|
105
|
+
raise NotImplementedError(f"Function name {ext.name} not supported")
|
106
|
+
case uri:
|
107
|
+
raise NotImplementedError(f"Function extension URI {uri} not supported")
|
108
|
+
|
109
|
+
def _extension_type(self, ext: SimpleExtensionDeclarationExtensionType):
|
110
|
+
ext_uri: str = self.extension_uris[ext.extension_uri_reference]
|
111
|
+
match ext_uri:
|
112
|
+
case "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml":
|
113
|
+
match ext.name:
|
114
|
+
case "null":
|
115
|
+
self.type_factories[ext.type_anchor] = pa.null
|
116
|
+
case "interval_month_day_nano":
|
117
|
+
self.type_factories[ext.type_anchor] = pa.month_day_nano_interval
|
118
|
+
case "u8":
|
119
|
+
self.type_factories[ext.type_anchor] = pa.uint8
|
120
|
+
case "u16":
|
121
|
+
self.type_factories[ext.type_anchor] = pa.uint16
|
122
|
+
case "u32":
|
123
|
+
self.type_factories[ext.type_anchor] = pa.uint32
|
124
|
+
case "u64":
|
125
|
+
self.type_factories[ext.type_anchor] = pa.uint64
|
126
|
+
case "fp16":
|
127
|
+
self.type_factories[ext.type_anchor] = pa.float16
|
128
|
+
case "date_millis":
|
129
|
+
self.type_factories[ext.type_anchor] = pa.date64
|
130
|
+
case "time_seconds":
|
131
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("s")
|
132
|
+
case "time_millis":
|
133
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time32("ms")
|
134
|
+
case "time_nanos":
|
135
|
+
self.type_factories[ext.type_anchor] = lambda: pa.time64("ns")
|
136
|
+
case "large_string":
|
137
|
+
self.type_factories[ext.type_anchor] = pa.large_string
|
138
|
+
case "large_binary":
|
139
|
+
self.type_factories[ext.type_anchor] = pa.large_binary
|
140
|
+
case "decimal256":
|
141
|
+
self.type_factories[ext.type_anchor] = pa.decimal256
|
142
|
+
case "large_list":
|
143
|
+
self.type_factories[ext.type_anchor] = pa.large_list
|
144
|
+
case "fixed_size_list":
|
145
|
+
self.type_factories[ext.type_anchor] = pa.list_
|
146
|
+
case "duration":
|
147
|
+
self.type_factories[ext.type_anchor] = pa.duration
|
148
|
+
case uri:
|
149
|
+
raise NotImplementedError(f"Type extension URI {uri} not support")
|
150
|
+
|
151
|
+
def _extension_type_variation(self, ext: SimpleExtensionDeclarationExtensionTypeVariation):
|
152
|
+
raise NotImplementedError()
|
153
|
+
|
154
|
+
def _expr(self, expr: Expression) -> Expr:
|
155
|
+
match betterproto.which_one_of(expr, "rex_type"):
|
156
|
+
case "literal", e:
|
157
|
+
return self._expr_literal(e)
|
158
|
+
case "selection", e:
|
159
|
+
return self._expr_selection(e)
|
160
|
+
case "scalar_function", e:
|
161
|
+
return self._expr_scalar_function(e)
|
162
|
+
case "window_function", _:
|
163
|
+
raise ValueError("Window functions are not supported in Spiral push-down")
|
164
|
+
case "if_then", e:
|
165
|
+
return self._expr_if_then(e)
|
166
|
+
case "switch", e:
|
167
|
+
return self._expr_switch(e)
|
168
|
+
case "singular_or_list", _:
|
169
|
+
raise ValueError("singular_or_list is not supported in Spiral push-down")
|
170
|
+
case "multi_or_list", _:
|
171
|
+
raise ValueError("multi_or_list is not supported in Spiral push-down")
|
172
|
+
case "cast", e:
|
173
|
+
return self._expr_cast(e)
|
174
|
+
case "subquery", _:
|
175
|
+
raise ValueError("Subqueries are not supported in Spiral push-down")
|
176
|
+
case "nested", e:
|
177
|
+
return self._expr_nested(e)
|
178
|
+
case _:
|
179
|
+
raise NotImplementedError(f"Expression type {expr.rex_type} not implemented")
|
180
|
+
|
181
|
+
def _expr_literal(self, expr: ExpressionLiteral):
|
182
|
+
# TODO(ngates): the Spiral literal expression is quite weakly typed...
|
183
|
+
# Maybe we can switch to Vortex?
|
184
|
+
simple = {
|
185
|
+
"boolean",
|
186
|
+
"i8",
|
187
|
+
"i16",
|
188
|
+
"i32",
|
189
|
+
"i64",
|
190
|
+
"fp32",
|
191
|
+
"fp64",
|
192
|
+
"string",
|
193
|
+
"binary",
|
194
|
+
"fixed_char",
|
195
|
+
"var_char",
|
196
|
+
"fixed_binary",
|
197
|
+
}
|
198
|
+
|
199
|
+
match betterproto.which_one_of(expr, "literal_type"):
|
200
|
+
case type_, v if type_ in simple:
|
201
|
+
return se.scalar(pa.scalar(v))
|
202
|
+
case "timestamp", v:
|
203
|
+
return se.scalar(pa.scalar(v, type=pa.timestamp("us")))
|
204
|
+
case "date", v:
|
205
|
+
return se.scalar(pa.scalar(v, type=pa.date32()))
|
206
|
+
case "time", v:
|
207
|
+
# Substrait time is us since midnight. PyArrow only supports ms.
|
208
|
+
v: int
|
209
|
+
v = int(v / 1000)
|
210
|
+
return se.scalar(pa.scalar(v, type=pa.time32("ms")))
|
211
|
+
case "null", _null_type:
|
212
|
+
# We need a typed null value
|
213
|
+
raise NotImplementedError()
|
214
|
+
case "struct", v:
|
215
|
+
v: ExpressionLiteralStruct
|
216
|
+
# Hmm, v has fields, but no field names. I guess we return a list and the type is applied later?
|
217
|
+
raise NotImplementedError()
|
218
|
+
case "list", v:
|
219
|
+
v: ExpressionLiteralList
|
220
|
+
return pa.scalar([self._expr_literal(e) for e in v.values])
|
221
|
+
case "user_defined", v:
|
222
|
+
v: ExpressionLiteralUserDefined
|
223
|
+
raise NotImplementedError()
|
224
|
+
case literal_type, _:
|
225
|
+
raise NotImplementedError(f"Literal type not supported: {literal_type}")
|
226
|
+
|
227
|
+
def _expr_selection(self, expr: ExpressionFieldReference):
|
228
|
+
match betterproto.which_one_of(expr, "root_type"):
|
229
|
+
case "root_reference", _:
|
230
|
+
# The reference is relative to the root
|
231
|
+
base_expr = self.scope
|
232
|
+
base_type = pa.struct(self.schema)
|
233
|
+
case _:
|
234
|
+
raise NotImplementedError("Only root_reference expressions are supported")
|
235
|
+
|
236
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
237
|
+
case "direct_reference", direct_ref:
|
238
|
+
return self._expr_direct_reference(base_expr, base_type, direct_ref)
|
239
|
+
case "masked_reference", masked_ref:
|
240
|
+
return self._expr_masked_reference(base_expr, base_type, masked_ref)
|
241
|
+
case _:
|
242
|
+
raise NotImplementedError()
|
243
|
+
|
244
|
+
def _expr_direct_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionReferenceSegment):
|
245
|
+
match betterproto.which_one_of(expr, "reference_type"):
|
246
|
+
case "map_key", ref:
|
247
|
+
raise NotImplementedError("Map types not yet supported in Spiral")
|
248
|
+
case "struct_field", ref:
|
249
|
+
ref: ExpressionReferenceSegmentStructField
|
250
|
+
field_name = scope_type.field(ref.field).name
|
251
|
+
|
252
|
+
if field_name in self.key_names:
|
253
|
+
# This is a key column, so we need to select it from the scope.
|
254
|
+
return se.var(field_name)
|
255
|
+
|
256
|
+
scope = se.getitem(scope, field_name)
|
257
|
+
scope_type = scope_type.field(ref.field).type
|
258
|
+
return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
|
259
|
+
case "list_element", ref:
|
260
|
+
ref: ExpressionReferenceSegmentListElement
|
261
|
+
scope = se.getitem(scope, ref.offset)
|
262
|
+
scope_type = scope_type.field(ref.field).type
|
263
|
+
return self._expr_direct_reference(scope, scope_type, ref.child) if ref.child else scope
|
264
|
+
case "", ref:
|
265
|
+
# Because Proto... we hit this case when we recurse into a child node and it's actually "None".
|
266
|
+
return scope
|
267
|
+
case _:
|
268
|
+
raise NotImplementedError()
|
269
|
+
|
270
|
+
def _expr_masked_reference(self, scope: Expr, scope_type: pa.StructType, expr: ExpressionMaskExpression):
|
271
|
+
raise NotImplementedError("Masked references are not yet supported in Spiral push-down")
|
272
|
+
|
273
|
+
def _expr_scalar_function(self, expr: ExpressionScalarFunction):
|
274
|
+
args = [self._expr(arg.value) for arg in expr.arguments]
|
275
|
+
return self.functions[expr.function_reference](*args)
|
spiral/table.py
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import TYPE_CHECKING, Literal
|
3
|
+
|
4
|
+
import pyarrow as pa
|
5
|
+
|
6
|
+
from spiral import expressions as se
|
7
|
+
from spiral.config import FILE_FORMAT, Config
|
8
|
+
from spiral.core.core import Table as CoreTable
|
9
|
+
from spiral.core.core import flush_wal, write
|
10
|
+
from spiral.expressions.base import Expr, ExprLike
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
import duckdb
|
14
|
+
import polars as pl
|
15
|
+
import pyarrow.dataset
|
16
|
+
|
17
|
+
from spiral.scan_ import Scan
|
18
|
+
|
19
|
+
|
20
|
+
class Table(Expr):
|
21
|
+
"""API for interacting with a SpiralDB's Table.
|
22
|
+
|
23
|
+
Different catalog implementations should ultimately construct a Table object.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
table: CoreTable,
|
29
|
+
name: str | None = None,
|
30
|
+
):
|
31
|
+
super().__init__(table.__expr__)
|
32
|
+
|
33
|
+
self._table = table
|
34
|
+
self._name = name or self._table.id
|
35
|
+
self._key_schema = self._table.key_schema.to_arrow()
|
36
|
+
self._key_columns = set(self._key_schema.names)
|
37
|
+
|
38
|
+
@property
|
39
|
+
def table_id(self) -> str:
|
40
|
+
return self._table.id
|
41
|
+
|
42
|
+
@property
|
43
|
+
def last_modified_at(self) -> int:
|
44
|
+
return self._table.get_wal(asof=None).last_modified_at
|
45
|
+
|
46
|
+
def __str__(self):
|
47
|
+
return self._name
|
48
|
+
|
49
|
+
def __repr__(self):
|
50
|
+
return f'Table("{self._name}")'
|
51
|
+
|
52
|
+
def __getitem__(self, item: str) -> Expr:
|
53
|
+
from spiral import expressions as se
|
54
|
+
|
55
|
+
if item in self._key_columns:
|
56
|
+
return se.var(name=item)
|
57
|
+
|
58
|
+
return super().__getitem__(item)
|
59
|
+
|
60
|
+
def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
|
61
|
+
# Override an expression select in the root column group to split between keys and columns.
|
62
|
+
if exclude is not None:
|
63
|
+
if set(exclude) & self._key_columns:
|
64
|
+
raise ValueError(
|
65
|
+
"Cannot use 'exclude' arg with key columns. Use 'exclude_keys' and an explicit select of keys."
|
66
|
+
)
|
67
|
+
|
68
|
+
key_paths = set(paths) & self._key_columns
|
69
|
+
other_paths = set(paths) - key_paths
|
70
|
+
if not key_paths:
|
71
|
+
return super().select(*paths, exclude=exclude)
|
72
|
+
|
73
|
+
from spiral import expressions as se
|
74
|
+
|
75
|
+
return se.merge(se.pack({key: se.var(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
|
76
|
+
|
77
|
+
@property
|
78
|
+
def key_schema(self) -> pa.Schema:
|
79
|
+
"""Returns the key schema of the table."""
|
80
|
+
return self._key_schema
|
81
|
+
|
82
|
+
@property
|
83
|
+
def schema(self) -> pa.Schema:
|
84
|
+
"""Returns the FULL schema of the table.
|
85
|
+
|
86
|
+
NOTE: This can be expensive for large tables.
|
87
|
+
"""
|
88
|
+
return self._table.get_schema(asof=None)
|
89
|
+
|
90
|
+
def to_dataset(self) -> "pyarrow.dataset.Dataset":
|
91
|
+
"""Returns a PyArrow Dataset representing the table."""
|
92
|
+
from .dataset import TableDataset
|
93
|
+
|
94
|
+
return TableDataset(self)
|
95
|
+
|
96
|
+
def to_polars(self) -> "pl.LazyFrame":
|
97
|
+
"""Returns a Polars LazyFrame for the Spiral table."""
|
98
|
+
import polars as pl
|
99
|
+
|
100
|
+
return pl.scan_pyarrow_dataset(self.to_dataset())
|
101
|
+
|
102
|
+
def to_duckdb(self) -> "duckdb.DuckDBPyRelation":
|
103
|
+
"""Returns a DuckDB relation for the Spiral table."""
|
104
|
+
import duckdb
|
105
|
+
|
106
|
+
return duckdb.from_arrow(self.to_dataset())
|
107
|
+
|
108
|
+
def scan(
|
109
|
+
self,
|
110
|
+
*projections: ExprLike,
|
111
|
+
where: ExprLike | None = None,
|
112
|
+
asof: datetime | int | str = None,
|
113
|
+
exclude_keys: bool = False,
|
114
|
+
# TODO(marko): Support config.
|
115
|
+
# config: Config | None = None,
|
116
|
+
) -> "Scan":
|
117
|
+
"""Reads the table. If projections are not provided, the entire table is read.
|
118
|
+
|
119
|
+
See `spiral.scan` for more information.
|
120
|
+
"""
|
121
|
+
from spiral.scan_ import scan
|
122
|
+
|
123
|
+
if not projections:
|
124
|
+
projections = [self]
|
125
|
+
|
126
|
+
return scan(
|
127
|
+
*projections,
|
128
|
+
where=where,
|
129
|
+
asof=asof,
|
130
|
+
exclude_keys=exclude_keys,
|
131
|
+
# config=config,
|
132
|
+
)
|
133
|
+
|
134
|
+
# NOTE: "vortex" is valid format. We don't want that visible in the API docs.
|
135
|
+
def write(
|
136
|
+
self,
|
137
|
+
expr: ExprLike,
|
138
|
+
*,
|
139
|
+
format: Literal["parquet"] | None = None,
|
140
|
+
# TODO(joe): support group_by, and config
|
141
|
+
config: Config | None = None,
|
142
|
+
) -> None:
|
143
|
+
"""Write an item to the table inside a single transaction.
|
144
|
+
|
145
|
+
:param expr: The expression to write. Must evaluate to a struct array.
|
146
|
+
:param format: the format to write the data in. Defaults to "parquet".
|
147
|
+
:param config: The configuration to use for this write.
|
148
|
+
"""
|
149
|
+
write(
|
150
|
+
self._table,
|
151
|
+
se.lift(expr).__expr__,
|
152
|
+
format=format or FILE_FORMAT,
|
153
|
+
partition_size=config.partition_file_min_size if config else None,
|
154
|
+
)
|
155
|
+
# Flush the WAL if configured.
|
156
|
+
if config is not None and config.flush_wal_on_write:
|
157
|
+
flush_wal(self._table, manifest_format=format or FILE_FORMAT)
|