pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -20,7 +20,8 @@ class InMemoryDataNode(ExecNode):
|
|
|
20
20
|
- if an input row doesn't provide a value, sets the slot to the column default
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
tbl: catalog.
|
|
23
|
+
tbl: catalog.TableVersionHandle
|
|
24
|
+
|
|
24
25
|
input_rows: list[dict[str, Any]]
|
|
25
26
|
start_row_id: int
|
|
26
27
|
output_rows: Optional[DataRowBatch]
|
|
@@ -29,12 +30,16 @@ class InMemoryDataNode(ExecNode):
|
|
|
29
30
|
output_exprs: list[exprs.ColumnRef]
|
|
30
31
|
|
|
31
32
|
def __init__(
|
|
32
|
-
self,
|
|
33
|
+
self,
|
|
34
|
+
tbl: catalog.TableVersionHandle,
|
|
35
|
+
rows: list[dict[str, Any]],
|
|
36
|
+
row_builder: exprs.RowBuilder,
|
|
37
|
+
start_row_id: int,
|
|
33
38
|
):
|
|
34
39
|
# we materialize the input slots
|
|
35
40
|
output_exprs = list(row_builder.input_exprs)
|
|
36
41
|
super().__init__(row_builder, output_exprs, [], None)
|
|
37
|
-
assert tbl.is_insertable()
|
|
42
|
+
assert tbl.get().is_insertable()
|
|
38
43
|
self.tbl = tbl
|
|
39
44
|
self.input_rows = rows
|
|
40
45
|
self.start_row_id = start_row_id
|
|
@@ -62,7 +67,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
62
67
|
|
|
63
68
|
if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
|
|
64
69
|
# this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
|
|
65
|
-
path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.version))
|
|
70
|
+
path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
|
|
66
71
|
open(path, 'wb').write(val)
|
|
67
72
|
val = path
|
|
68
73
|
self.output_rows[row_idx][col_info.slot_idx] = val
|
|
@@ -3,7 +3,6 @@ from typing import Any, AsyncIterator
|
|
|
3
3
|
|
|
4
4
|
import pixeltable.catalog as catalog
|
|
5
5
|
import pixeltable.exprs as exprs
|
|
6
|
-
from pixeltable.utils.media_store import MediaStore
|
|
7
6
|
|
|
8
7
|
from .data_row_batch import DataRowBatch
|
|
9
8
|
from .exec_node import ExecNode
|
|
@@ -40,7 +39,7 @@ class RowUpdateNode(ExecNode):
|
|
|
40
39
|
if isinstance(col_ref, exprs.ColumnRef)
|
|
41
40
|
}
|
|
42
41
|
self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
|
|
43
|
-
self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
|
|
42
|
+
self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
|
|
44
43
|
self.matched_key_vals: set[tuple] = set()
|
|
45
44
|
|
|
46
45
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import warnings
|
|
3
3
|
from decimal import Decimal
|
|
4
|
-
from typing import TYPE_CHECKING, AsyncIterator, Iterable,
|
|
4
|
+
from typing import TYPE_CHECKING, AsyncIterator, Iterable, NamedTuple, Optional, Sequence
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import sqlalchemy as sql
|
|
8
8
|
|
|
9
9
|
import pixeltable.catalog as catalog
|
|
10
10
|
import pixeltable.exprs as exprs
|
|
11
|
+
from pixeltable.env import Env
|
|
11
12
|
|
|
12
13
|
from .data_row_batch import DataRowBatch
|
|
13
14
|
from .exec_node import ExecNode
|
|
@@ -122,7 +123,7 @@ class SqlNode(ExecNode):
|
|
|
122
123
|
if set_pk:
|
|
123
124
|
# we also need to retrieve the pk columns
|
|
124
125
|
assert tbl is not None
|
|
125
|
-
self.num_pk_cols = len(tbl.tbl_version.store_tbl.pk_columns())
|
|
126
|
+
self.num_pk_cols = len(tbl.tbl_version.get().store_tbl.pk_columns())
|
|
126
127
|
|
|
127
128
|
# additional state
|
|
128
129
|
self.result_cursor = None
|
|
@@ -142,7 +143,7 @@ class SqlNode(ExecNode):
|
|
|
142
143
|
sql_select_list = [self.sql_elements.get(e) for e in self.select_list]
|
|
143
144
|
if self.set_pk:
|
|
144
145
|
assert self.tbl is not None
|
|
145
|
-
sql_select_list += self.tbl.tbl_version.store_tbl.pk_columns()
|
|
146
|
+
sql_select_list += self.tbl.tbl_version.get().store_tbl.pk_columns()
|
|
146
147
|
stmt = sql.select(*sql_select_list)
|
|
147
148
|
|
|
148
149
|
where_clause_element = (
|
|
@@ -215,29 +216,31 @@ class SqlNode(ExecNode):
|
|
|
215
216
|
exact_version_only = set()
|
|
216
217
|
candidates = tbl.get_tbl_versions()
|
|
217
218
|
assert len(candidates) > 0
|
|
218
|
-
joined_tbls: list[catalog.
|
|
219
|
+
joined_tbls: list[catalog.TableVersionHandle] = [candidates[0]]
|
|
219
220
|
for tbl in candidates[1:]:
|
|
220
221
|
if tbl.id in refd_tbl_ids:
|
|
221
222
|
joined_tbls.append(tbl)
|
|
222
223
|
|
|
223
224
|
first = True
|
|
224
|
-
prev_tbl: catalog.
|
|
225
|
+
prev_tbl: catalog.TableVersionHandle
|
|
225
226
|
for tbl in joined_tbls[::-1]:
|
|
226
227
|
if first:
|
|
227
|
-
stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
|
|
228
|
+
stmt = stmt.select_from(tbl.get().store_tbl.sa_tbl)
|
|
228
229
|
first = False
|
|
229
230
|
else:
|
|
230
231
|
# join tbl to prev_tbl on prev_tbl's rowid cols
|
|
231
|
-
prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
|
|
232
|
-
tbl_rowid_cols = tbl.store_tbl.rowid_columns()
|
|
232
|
+
prev_tbl_rowid_cols = prev_tbl.get().store_tbl.rowid_columns()
|
|
233
|
+
tbl_rowid_cols = tbl.get().store_tbl.rowid_columns()
|
|
233
234
|
rowid_clauses = [
|
|
234
235
|
c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
|
|
235
236
|
]
|
|
236
|
-
stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
|
|
237
|
+
stmt = stmt.join(tbl.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
|
|
237
238
|
if tbl.id in exact_version_only:
|
|
238
|
-
stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
|
|
239
|
+
stmt = stmt.where(tbl.get().store_tbl.v_min_col == tbl.get().version)
|
|
239
240
|
else:
|
|
240
|
-
stmt = stmt.where(tbl.store_tbl.v_min_col <= tbl.version).where(
|
|
241
|
+
stmt = stmt.where(tbl.get().store_tbl.v_min_col <= tbl.get().version).where(
|
|
242
|
+
tbl.get().store_tbl.v_max_col > tbl.get().version
|
|
243
|
+
)
|
|
241
244
|
prev_tbl = tbl
|
|
242
245
|
return stmt
|
|
243
246
|
|
|
@@ -264,10 +267,11 @@ class SqlNode(ExecNode):
|
|
|
264
267
|
self.limit = limit
|
|
265
268
|
|
|
266
269
|
def _log_explain(self, stmt: sql.Select) -> None:
|
|
270
|
+
conn = Env.get().conn
|
|
267
271
|
try:
|
|
268
272
|
# don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
|
|
269
273
|
stmt_str = str(stmt.compile(compile_kwargs={'literal_binds': True}))
|
|
270
|
-
explain_result =
|
|
274
|
+
explain_result = conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
|
|
271
275
|
explain_str = '\n'.join([str(row) for row in explain_result])
|
|
272
276
|
_logger.debug(f'SqlScanNode explain:\n{explain_str}')
|
|
273
277
|
except Exception as e:
|
|
@@ -275,7 +279,6 @@ class SqlNode(ExecNode):
|
|
|
275
279
|
|
|
276
280
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
277
281
|
# run the query; do this here rather than in _open(), exceptions are only expected during iteration
|
|
278
|
-
assert self.ctx.conn is not None
|
|
279
282
|
with warnings.catch_warnings(record=True) as w:
|
|
280
283
|
stmt = self._create_stmt()
|
|
281
284
|
try:
|
|
@@ -286,7 +289,8 @@ class SqlNode(ExecNode):
|
|
|
286
289
|
pass
|
|
287
290
|
self._log_explain(stmt)
|
|
288
291
|
|
|
289
|
-
|
|
292
|
+
conn = Env.get().conn
|
|
293
|
+
result_cursor = conn.execute(stmt)
|
|
290
294
|
for warning in w:
|
|
291
295
|
pass
|
|
292
296
|
|
|
@@ -351,7 +355,7 @@ class SqlScanNode(SqlNode):
|
|
|
351
355
|
Supports filtering and ordering.
|
|
352
356
|
"""
|
|
353
357
|
|
|
354
|
-
exact_version_only: list[catalog.
|
|
358
|
+
exact_version_only: list[catalog.TableVersionHandle]
|
|
355
359
|
|
|
356
360
|
def __init__(
|
|
357
361
|
self,
|
|
@@ -359,7 +363,7 @@ class SqlScanNode(SqlNode):
|
|
|
359
363
|
row_builder: exprs.RowBuilder,
|
|
360
364
|
select_list: Iterable[exprs.Expr],
|
|
361
365
|
set_pk: bool = False,
|
|
362
|
-
exact_version_only: Optional[list[catalog.
|
|
366
|
+
exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
|
|
363
367
|
):
|
|
364
368
|
"""
|
|
365
369
|
Args:
|
pixeltable/exprs/__init__.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
-
import
|
|
8
|
-
import pixeltable.exprs as exprs
|
|
9
|
-
import pixeltable.type_system as ts
|
|
7
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
10
8
|
|
|
11
9
|
from .data_row import DataRow
|
|
12
10
|
from .expr import Expr
|
|
@@ -50,13 +48,13 @@ class ArithmeticExpr(Expr):
|
|
|
50
48
|
# add parentheses around operands that are ArithmeticExprs to express precedence
|
|
51
49
|
op1_str = f'({self._op1})' if isinstance(self._op1, ArithmeticExpr) else str(self._op1)
|
|
52
50
|
op2_str = f'({self._op2})' if isinstance(self._op2, ArithmeticExpr) else str(self._op2)
|
|
53
|
-
return f'{op1_str} {
|
|
51
|
+
return f'{op1_str} {self.operator} {op2_str}'
|
|
54
52
|
|
|
55
53
|
def _equals(self, other: ArithmeticExpr) -> bool:
|
|
56
54
|
return self.operator == other.operator
|
|
57
55
|
|
|
58
56
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
59
|
-
return super()._id_attrs()
|
|
57
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
60
58
|
|
|
61
59
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
62
60
|
assert self.col_type.is_int_type() or self.col_type.is_float_type() or self.col_type.is_json_type()
|
|
@@ -95,7 +93,7 @@ class ArithmeticExpr(Expr):
|
|
|
95
93
|
return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
|
|
96
94
|
if self.col_type.is_float_type():
|
|
97
95
|
return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
|
|
98
|
-
|
|
96
|
+
raise AssertionError()
|
|
99
97
|
|
|
100
98
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
101
99
|
op1_val = data_row[self._op1.slot_idx]
|
|
@@ -113,9 +111,7 @@ class ArithmeticExpr(Expr):
|
|
|
113
111
|
|
|
114
112
|
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
115
113
|
|
|
116
|
-
def eval_nullable(
|
|
117
|
-
self, op1_val: Union[int, float, None], op2_val: Union[int, float, None]
|
|
118
|
-
) -> Union[int, float, None]:
|
|
114
|
+
def eval_nullable(self, op1_val: Optional[float], op2_val: Optional[float]) -> Optional[float]:
|
|
119
115
|
"""
|
|
120
116
|
Return the result of evaluating the expression on two nullable int/float operands,
|
|
121
117
|
None is interpreted as SQL NULL
|
|
@@ -124,7 +120,7 @@ class ArithmeticExpr(Expr):
|
|
|
124
120
|
return None
|
|
125
121
|
return self.eval_non_null(op1_val, op2_val)
|
|
126
122
|
|
|
127
|
-
def eval_non_null(self, op1_val:
|
|
123
|
+
def eval_non_null(self, op1_val: float, op2_val: float) -> float:
|
|
128
124
|
"""
|
|
129
125
|
Return the result of evaluating the expression on two int/float operands
|
|
130
126
|
"""
|
pixeltable/exprs/array_slice.py
CHANGED
|
@@ -41,7 +41,7 @@ class ArraySlice(Expr):
|
|
|
41
41
|
return self.index == other.index
|
|
42
42
|
|
|
43
43
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
44
|
-
return super()._id_attrs()
|
|
44
|
+
return [*super()._id_attrs(), ('index', self.index)]
|
|
45
45
|
|
|
46
46
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
47
47
|
return None
|
|
@@ -40,7 +40,7 @@ class ColumnPropertyRef(Expr):
|
|
|
40
40
|
return self.prop == other.prop
|
|
41
41
|
|
|
42
42
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
43
|
-
return super()._id_attrs()
|
|
43
|
+
return [*super()._id_attrs(), ('prop', self.prop.value)]
|
|
44
44
|
|
|
45
45
|
@property
|
|
46
46
|
def _col_ref(self) -> ColumnRef:
|
|
@@ -52,7 +52,7 @@ class ColumnPropertyRef(Expr):
|
|
|
52
52
|
return f'{self._col_ref}.{self.prop.name.lower()}'
|
|
53
53
|
|
|
54
54
|
def is_error_prop(self) -> bool:
|
|
55
|
-
return self.prop
|
|
55
|
+
return self.prop in {self.Property.ERRORTYPE, self.Property.ERRORMSG}
|
|
56
56
|
|
|
57
57
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
58
|
if not self._col_ref.col.is_stored:
|
|
@@ -95,7 +95,7 @@ class ColumnPropertyRef(Expr):
|
|
|
95
95
|
else:
|
|
96
96
|
data_row[self.slot_idx] = str(exc)
|
|
97
97
|
else:
|
|
98
|
-
|
|
98
|
+
raise AssertionError()
|
|
99
99
|
|
|
100
100
|
def _as_dict(self) -> dict:
|
|
101
101
|
return {'prop': self.prop.value, **super()._as_dict()}
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -6,9 +6,7 @@ from uuid import UUID
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
|
-
import
|
|
10
|
-
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.iterators as iters
|
|
9
|
+
from pixeltable import catalog, exceptions as excs, iterators as iters
|
|
12
10
|
|
|
13
11
|
from ..utils.description_helper import DescriptionHelper
|
|
14
12
|
from .data_row import DataRow
|
|
@@ -52,15 +50,15 @@ class ColumnRef(Expr):
|
|
|
52
50
|
assert col.tbl is not None
|
|
53
51
|
self.col = col
|
|
54
52
|
self.is_unstored_iter_col = (
|
|
55
|
-
col.tbl.
|
|
53
|
+
col.tbl.get().is_component_view and col.tbl.get().is_iterator_column(col) and not col.is_stored
|
|
56
54
|
)
|
|
57
55
|
self.iter_arg_ctx = None
|
|
58
56
|
# number of rowid columns in the base table
|
|
59
|
-
self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
|
|
57
|
+
self.base_rowid_len = col.tbl.get().base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
|
|
60
58
|
self.base_rowid = [None] * self.base_rowid_len
|
|
61
59
|
self.iterator = None
|
|
62
60
|
# index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
|
|
63
|
-
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
61
|
+
self.pos_idx = col.tbl.get().num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
64
62
|
|
|
65
63
|
self.perform_validation = False
|
|
66
64
|
if col.col_type.is_media_type():
|
|
@@ -84,7 +82,8 @@ class ColumnRef(Expr):
|
|
|
84
82
|
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
85
83
|
|
|
86
84
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
87
|
-
return
|
|
85
|
+
return [
|
|
86
|
+
*super()._id_attrs(),
|
|
88
87
|
('tbl_id', self.col.tbl.id),
|
|
89
88
|
('col_id', self.col.id),
|
|
90
89
|
('perform_validation', self.perform_validation),
|
|
@@ -138,7 +137,7 @@ class ColumnRef(Expr):
|
|
|
138
137
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
139
138
|
|
|
140
139
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
141
|
-
tbl = catalog.Catalog.get().
|
|
140
|
+
tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
|
|
142
141
|
return tbl.select(self)
|
|
143
142
|
|
|
144
143
|
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
@@ -166,9 +165,9 @@ class ColumnRef(Expr):
|
|
|
166
165
|
return self._descriptors().to_html()
|
|
167
166
|
|
|
168
167
|
def _descriptors(self) -> DescriptionHelper:
|
|
169
|
-
tbl = catalog.Catalog.get().
|
|
168
|
+
tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
|
|
170
169
|
helper = DescriptionHelper()
|
|
171
|
-
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
|
|
170
|
+
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
172
171
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
173
172
|
idxs = tbl._index_descriptor([self.col.name])
|
|
174
173
|
if len(idxs) > 0:
|
|
@@ -217,7 +216,7 @@ class ColumnRef(Expr):
|
|
|
217
216
|
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
218
217
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
219
218
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
220
|
-
self.iterator = self.col.tbl.iterator_cls(**iterator_args)
|
|
219
|
+
self.iterator = self.col.tbl.get().iterator_cls(**iterator_args)
|
|
221
220
|
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
222
221
|
self.iterator.set_pos(data_row.pk[self.pos_idx])
|
|
223
222
|
res = next(self.iterator)
|
|
@@ -225,7 +224,7 @@ class ColumnRef(Expr):
|
|
|
225
224
|
|
|
226
225
|
def _as_dict(self) -> dict:
|
|
227
226
|
tbl = self.col.tbl
|
|
228
|
-
version = tbl.version if tbl.is_snapshot else None
|
|
227
|
+
version = tbl.get().version if tbl.get().is_snapshot else None
|
|
229
228
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
230
229
|
# non-validating component ColumnRef
|
|
231
230
|
return {
|
|
@@ -238,7 +237,7 @@ class ColumnRef(Expr):
|
|
|
238
237
|
@classmethod
|
|
239
238
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
240
239
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
241
|
-
tbl_version = catalog.Catalog.get().
|
|
240
|
+
tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
|
|
242
241
|
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
243
242
|
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
244
243
|
return col
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -15,9 +15,6 @@ from .literal import Literal
|
|
|
15
15
|
from .row_builder import RowBuilder
|
|
16
16
|
from .sql_element_cache import SqlElementCache
|
|
17
17
|
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from pixeltable import index
|
|
20
|
-
|
|
21
18
|
|
|
22
19
|
class Comparison(Expr):
|
|
23
20
|
is_search_arg_comparison: bool
|
|
@@ -62,7 +59,7 @@ class Comparison(Expr):
|
|
|
62
59
|
return self.operator == other.operator
|
|
63
60
|
|
|
64
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
65
|
-
return super()._id_attrs()
|
|
62
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
66
63
|
|
|
67
64
|
@property
|
|
68
65
|
def _op1(self) -> Expr:
|
|
@@ -84,7 +81,7 @@ class Comparison(Expr):
|
|
|
84
81
|
if self.is_search_arg_comparison:
|
|
85
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
86
83
|
# (indices don't apply to snapshots)
|
|
87
|
-
tbl = self._op1.col.tbl
|
|
84
|
+
tbl = self._op1.col.tbl.get()
|
|
88
85
|
idx_info = [
|
|
89
86
|
info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
|
|
90
87
|
]
|
|
@@ -5,7 +5,7 @@ from typing import Any, Callable, Optional
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
from pixeltable import type_system as ts
|
|
9
9
|
|
|
10
10
|
from .data_row import DataRow
|
|
11
11
|
from .expr import Expr
|
|
@@ -58,10 +58,10 @@ class CompoundPredicate(Expr):
|
|
|
58
58
|
return self.operator == other.operator
|
|
59
59
|
|
|
60
60
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
|
-
return super()._id_attrs()
|
|
61
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
62
|
|
|
63
63
|
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
64
|
-
if self.operator
|
|
64
|
+
if self.operator in {LogicalOperator.OR, LogicalOperator.NOT}:
|
|
65
65
|
return super().split_conjuncts(condition)
|
|
66
66
|
matches = [op for op in self.components if condition(op)]
|
|
67
67
|
non_matches = [op for op in self.components if not condition(op)]
|
|
@@ -83,7 +83,7 @@ class CompoundPredicate(Expr):
|
|
|
83
83
|
if self.operator == LogicalOperator.NOT:
|
|
84
84
|
data_row[self.slot_idx] = not data_row[self.components[0].slot_idx]
|
|
85
85
|
else:
|
|
86
|
-
val =
|
|
86
|
+
val = self.operator == LogicalOperator.AND
|
|
87
87
|
op_function = operator.and_ if self.operator == LogicalOperator.AND else operator.or_
|
|
88
88
|
for op in self.components:
|
|
89
89
|
val = op_function(val, data_row[op.slot_idx])
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -14,10 +14,7 @@ import numpy as np
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
from typing_extensions import Self, _AnnotatedAlias
|
|
16
16
|
|
|
17
|
-
import
|
|
18
|
-
import pixeltable.exceptions as excs
|
|
19
|
-
import pixeltable.func as func
|
|
20
|
-
import pixeltable.type_system as ts
|
|
17
|
+
from pixeltable import catalog, exceptions as excs, func, type_system as ts
|
|
21
18
|
|
|
22
19
|
from .data_row import DataRow
|
|
23
20
|
from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
|
|
@@ -110,11 +107,29 @@ class Expr(abc.ABC):
|
|
|
110
107
|
"""
|
|
111
108
|
return None
|
|
112
109
|
|
|
110
|
+
@property
|
|
111
|
+
def validation_error(self) -> Optional[str]:
|
|
112
|
+
"""
|
|
113
|
+
Subclasses can override this to indicate that validation has failed after a catalog load.
|
|
114
|
+
|
|
115
|
+
If an Expr (or any of its transitive components) is invalid, then it cannot be evaluated, but its metadata
|
|
116
|
+
will still be preserved in the catalog (so that the user can take appropriate corrective action).
|
|
117
|
+
"""
|
|
118
|
+
for c in self.components:
|
|
119
|
+
error = c.validation_error
|
|
120
|
+
if error is not None:
|
|
121
|
+
return error
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def is_valid(self) -> bool:
|
|
126
|
+
return self.validation_error is None
|
|
127
|
+
|
|
113
128
|
def equals(self, other: Expr) -> bool:
|
|
114
129
|
"""
|
|
115
130
|
Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
|
|
116
131
|
"""
|
|
117
|
-
if type(self)
|
|
132
|
+
if type(self) is not type(other):
|
|
118
133
|
return False
|
|
119
134
|
if len(self.components) != len(other.components):
|
|
120
135
|
return False
|
|
@@ -156,10 +171,7 @@ class Expr(abc.ABC):
|
|
|
156
171
|
def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
|
|
157
172
|
if len(a) != len(b):
|
|
158
173
|
return False
|
|
159
|
-
for i in range(len(a))
|
|
160
|
-
if not a[i].equals(b[i]):
|
|
161
|
-
return False
|
|
162
|
-
return True
|
|
174
|
+
return all(a[i].equals(b[i]) for i in range(len(a)))
|
|
163
175
|
|
|
164
176
|
def copy(self) -> Expr:
|
|
165
177
|
"""
|
|
@@ -201,9 +213,9 @@ class Expr(abc.ABC):
|
|
|
201
213
|
return new.copy()
|
|
202
214
|
for i in range(len(self.components)):
|
|
203
215
|
self.components[i] = self.components[i].substitute(spec)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
return
|
|
216
|
+
result = self.maybe_literal()
|
|
217
|
+
result.id = result._create_id()
|
|
218
|
+
return result
|
|
207
219
|
|
|
208
220
|
@classmethod
|
|
209
221
|
def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
|
|
@@ -238,14 +250,11 @@ class Expr(abc.ABC):
|
|
|
238
250
|
from .column_ref import ColumnRef
|
|
239
251
|
|
|
240
252
|
col_refs = self.subexprs(ColumnRef)
|
|
241
|
-
for col_ref in col_refs
|
|
242
|
-
if not any(tbl.has_column(col_ref.col) for tbl in tbls):
|
|
243
|
-
return False
|
|
244
|
-
return True
|
|
253
|
+
return all(any(tbl.has_column(col_ref.col) for tbl in tbls) for col_ref in col_refs)
|
|
245
254
|
|
|
246
255
|
def retarget(self, tbl: catalog.TableVersionPath) -> Self:
|
|
247
256
|
"""Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
|
|
248
|
-
tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
|
|
257
|
+
tbl_versions = {tbl_version.id: tbl_version.get() for tbl_version in tbl.get_tbl_versions()}
|
|
249
258
|
return self._retarget(tbl_versions)
|
|
250
259
|
|
|
251
260
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
@@ -355,7 +364,7 @@ class Expr(abc.ABC):
|
|
|
355
364
|
|
|
356
365
|
@classmethod
|
|
357
366
|
def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
|
|
358
|
-
return
|
|
367
|
+
return {tbl_id for e in exprs_ for tbl_id in e.tbl_ids()}
|
|
359
368
|
|
|
360
369
|
@classmethod
|
|
361
370
|
def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
|
|
@@ -474,7 +483,7 @@ class Expr(abc.ABC):
|
|
|
474
483
|
return {'_classname': self.__class__.__name__, **self._as_dict()}
|
|
475
484
|
|
|
476
485
|
@classmethod
|
|
477
|
-
def as_dict_list(
|
|
486
|
+
def as_dict_list(cls, expr_list: list[Expr]) -> list[dict]:
|
|
478
487
|
return [e.as_dict() for e in expr_list]
|
|
479
488
|
|
|
480
489
|
def _as_dict(self) -> dict:
|
|
@@ -505,7 +514,7 @@ class Expr(abc.ABC):
|
|
|
505
514
|
|
|
506
515
|
@classmethod
|
|
507
516
|
def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
|
|
508
|
-
|
|
517
|
+
raise AssertionError('not implemented')
|
|
509
518
|
|
|
510
519
|
def isin(self, value_set: Any) -> 'exprs.InPredicate':
|
|
511
520
|
from .in_predicate import InPredicate
|
|
@@ -777,13 +786,13 @@ class Expr(abc.ABC):
|
|
|
777
786
|
first_param = next(params_iter) if len(params) >= 1 else None
|
|
778
787
|
second_param = next(params_iter) if len(params) >= 2 else None
|
|
779
788
|
# Check that fn has at least one positional parameter
|
|
780
|
-
if len(params) == 0 or first_param.kind in
|
|
789
|
+
if len(params) == 0 or first_param.kind in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD}:
|
|
781
790
|
raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
|
|
782
791
|
# Check that fn has at most one required parameter, i.e., its second parameter
|
|
783
792
|
# has no default and is not a varargs
|
|
784
793
|
if (
|
|
785
794
|
len(params) >= 2
|
|
786
|
-
and second_param.kind not in
|
|
795
|
+
and second_param.kind not in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
|
|
787
796
|
and second_param.default is inspect.Parameter.empty
|
|
788
797
|
):
|
|
789
798
|
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
pixeltable/exprs/expr_dict.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from typing import Generic, Iterable, Iterator, Optional, TypeVar
|
|
2
2
|
|
|
3
|
-
T = TypeVar('T')
|
|
4
|
-
|
|
5
3
|
from .expr import Expr
|
|
6
4
|
|
|
5
|
+
T = TypeVar('T')
|
|
6
|
+
|
|
7
7
|
|
|
8
8
|
class ExprDict(Generic[T]):
|
|
9
9
|
"""
|
|
@@ -47,7 +47,7 @@ class ExprDict(Generic[T]):
|
|
|
47
47
|
self._data.clear()
|
|
48
48
|
|
|
49
49
|
def keys(self) -> Iterator[Expr]:
|
|
50
|
-
return self
|
|
50
|
+
return iter(self)
|
|
51
51
|
|
|
52
52
|
def values(self) -> Iterator[T]:
|
|
53
53
|
return (value for _, value in self._data.values())
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -46,7 +46,7 @@ class ExprSet(Generic[T]):
|
|
|
46
46
|
|
|
47
47
|
def __getitem__(self, index: object) -> Optional[T]:
|
|
48
48
|
"""Indexed lookup by slot_idx or Expr.id."""
|
|
49
|
-
assert isinstance(index, int
|
|
49
|
+
assert isinstance(index, (int, Expr))
|
|
50
50
|
if isinstance(index, int):
|
|
51
51
|
# return expr with matching slot_idx
|
|
52
52
|
return self.exprs_by_idx.get(index)
|