pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +41 -29
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +30 -10
- pixeltable/catalog/table.py +198 -86
- pixeltable/catalog/table_version.py +47 -53
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +17 -18
- pixeltable/dataframe.py +27 -36
- pixeltable/env.py +7 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +5 -22
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval_node.py +23 -16
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +12 -5
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +97 -14
- pixeltable/exprs/comparison.py +10 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +6 -11
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/function.py +11 -10
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +155 -45
- pixeltable/functions/llama_cpp.py +107 -0
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +67 -26
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +17 -15
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +100 -36
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +12 -13
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
- pixeltable-0.2.23.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
|
@@ -6,6 +6,7 @@ import sqlalchemy as sql
|
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
import pixeltable.type_system as ts
|
|
9
|
+
|
|
9
10
|
from .data_row import DataRow
|
|
10
11
|
from .expr import Expr
|
|
11
12
|
from .globals import ArithmeticOperator
|
|
@@ -68,11 +69,15 @@ class ArithmeticExpr(Expr):
|
|
|
68
69
|
return left * right
|
|
69
70
|
if self.operator == ArithmeticOperator.DIV:
|
|
70
71
|
assert self.col_type.is_float_type()
|
|
72
|
+
# Avoid DivisionByZero: if right is 0, make this a NULL
|
|
73
|
+
# TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
|
|
74
|
+
nullif = sql.sql.func.nullif(right, 0)
|
|
71
75
|
# We have to cast to a `float`, or else we'll get a `Decimal`
|
|
72
|
-
return sql.sql.expression.cast(left /
|
|
76
|
+
return sql.sql.expression.cast(left / nullif, sql.Float)
|
|
73
77
|
if self.operator == ArithmeticOperator.MOD:
|
|
74
78
|
if self.col_type.is_int_type():
|
|
75
|
-
|
|
79
|
+
nullif = sql.sql.func.nullif(right, 0)
|
|
80
|
+
return left % nullif
|
|
76
81
|
if self.col_type.is_float_type():
|
|
77
82
|
# Postgres does not support modulus for floats
|
|
78
83
|
return None
|
|
@@ -82,10 +87,12 @@ class ArithmeticExpr(Expr):
|
|
|
82
87
|
# We need the behavior to be consistent, so that expressions will evaluate the same way
|
|
83
88
|
# whether or not their operands can be translated to SQL. These SQL clauses should
|
|
84
89
|
# mimic the behavior of Python's // operator.
|
|
90
|
+
nullif = sql.sql.func.nullif(right, 0)
|
|
85
91
|
if self.col_type.is_int_type():
|
|
86
|
-
return sql.sql.expression.cast(sql.func.floor(left /
|
|
92
|
+
return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Integer)
|
|
87
93
|
if self.col_type.is_float_type():
|
|
88
|
-
return sql.sql.expression.cast(sql.func.floor(left /
|
|
94
|
+
return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Float)
|
|
95
|
+
assert False
|
|
89
96
|
|
|
90
97
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
91
98
|
op1_val = data_row[self._op1.slot_idx]
|
|
@@ -121,7 +128,7 @@ class ArithmeticExpr(Expr):
|
|
|
121
128
|
return {'operator': self.operator.value, **super()._as_dict()}
|
|
122
129
|
|
|
123
130
|
@classmethod
|
|
124
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
131
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> ArithmeticExpr:
|
|
125
132
|
assert 'operator' in d
|
|
126
133
|
assert len(components) == 2
|
|
127
134
|
return cls(ArithmeticOperator(d['operator']), components[0], components[1])
|
pixeltable/exprs/array_slice.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ class ArraySlice(Expr):
|
|
|
15
15
|
"""
|
|
16
16
|
Slice operation on an array, eg, t.array_col[:, 1:2].
|
|
17
17
|
"""
|
|
18
|
-
def __init__(self, arr: Expr, index:
|
|
18
|
+
def __init__(self, arr: Expr, index: tuple[Union[int, slice], ...]):
|
|
19
19
|
assert arr.col_type.is_array_type()
|
|
20
20
|
# determine result type
|
|
21
21
|
super().__init__(arr.col_type)
|
|
@@ -24,7 +24,7 @@ class ArraySlice(Expr):
|
|
|
24
24
|
self.id = self._create_id()
|
|
25
25
|
|
|
26
26
|
def __str__(self) -> str:
|
|
27
|
-
index_strs:
|
|
27
|
+
index_strs: list[str] = []
|
|
28
28
|
for el in self.index:
|
|
29
29
|
if isinstance(el, int):
|
|
30
30
|
index_strs.append(str(el))
|
|
@@ -39,7 +39,7 @@ class ArraySlice(Expr):
|
|
|
39
39
|
def _equals(self, other: ArraySlice) -> bool:
|
|
40
40
|
return self.index == other.index
|
|
41
41
|
|
|
42
|
-
def _id_attrs(self) ->
|
|
42
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
43
43
|
return super()._id_attrs() + [('index', self.index)]
|
|
44
44
|
|
|
45
45
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
@@ -49,8 +49,8 @@ class ArraySlice(Expr):
|
|
|
49
49
|
val = data_row[self._array.slot_idx]
|
|
50
50
|
data_row[self.slot_idx] = val[self.index]
|
|
51
51
|
|
|
52
|
-
def _as_dict(self) ->
|
|
53
|
-
index = []
|
|
52
|
+
def _as_dict(self) -> dict:
|
|
53
|
+
index: list[Any] = []
|
|
54
54
|
for el in self.index:
|
|
55
55
|
if isinstance(el, slice):
|
|
56
56
|
index.append([el.start, el.stop, el.step])
|
|
@@ -59,7 +59,7 @@ class ArraySlice(Expr):
|
|
|
59
59
|
return {'index': index, **super()._as_dict()}
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
|
-
def _from_dict(cls, d:
|
|
62
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> ArraySlice:
|
|
63
63
|
assert 'index' in d
|
|
64
64
|
index = []
|
|
65
65
|
for el in d['index']:
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable import catalog
|
|
9
10
|
from .column_ref import ColumnRef
|
|
10
11
|
from .data_row import DataRow
|
|
11
12
|
from .expr import Expr
|
|
@@ -33,22 +34,36 @@ class ColumnPropertyRef(Expr):
|
|
|
33
34
|
def default_column_name(self) -> Optional[str]:
|
|
34
35
|
return str(self).replace('.', '_')
|
|
35
36
|
|
|
36
|
-
def _equals(self, other:
|
|
37
|
+
def _equals(self, other: ColumnPropertyRef) -> bool:
|
|
37
38
|
return self.prop == other.prop
|
|
38
39
|
|
|
39
|
-
def _id_attrs(self) ->
|
|
40
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
40
41
|
return super()._id_attrs() + [('prop', self.prop.value)]
|
|
41
42
|
|
|
42
43
|
@property
|
|
43
44
|
def _col_ref(self) -> ColumnRef:
|
|
44
|
-
|
|
45
|
+
col_ref = self.components[0]
|
|
46
|
+
assert isinstance(col_ref, ColumnRef)
|
|
47
|
+
return col_ref
|
|
45
48
|
|
|
46
49
|
def __str__(self) -> str:
|
|
47
50
|
return f'{self._col_ref}.{self.prop.name.lower()}'
|
|
48
51
|
|
|
52
|
+
def is_error_prop(self) -> bool:
|
|
53
|
+
return self.prop == self.Property.ERRORTYPE or self.prop == self.Property.ERRORMSG
|
|
54
|
+
|
|
49
55
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
50
56
|
if not self._col_ref.col.is_stored:
|
|
51
57
|
return None
|
|
58
|
+
|
|
59
|
+
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
60
|
+
if (
|
|
61
|
+
self._col_ref.col.col_type.is_media_type()
|
|
62
|
+
and self._col_ref.col.media_validation == catalog.MediaValidation.ON_READ
|
|
63
|
+
and self.is_error_prop()
|
|
64
|
+
):
|
|
65
|
+
return None
|
|
66
|
+
|
|
52
67
|
if self.prop == self.Property.ERRORTYPE:
|
|
53
68
|
assert self._col_ref.col.sa_errortype_col is not None
|
|
54
69
|
return self._col_ref.col.sa_errortype_col
|
|
@@ -61,18 +76,30 @@ class ColumnPropertyRef(Expr):
|
|
|
61
76
|
return None
|
|
62
77
|
|
|
63
78
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
64
|
-
assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
|
|
65
|
-
assert data_row.has_val[self._col_ref.slot_idx]
|
|
66
79
|
if self.prop == self.Property.FILEURL:
|
|
80
|
+
assert data_row.has_val[self._col_ref.slot_idx]
|
|
67
81
|
data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
|
|
68
|
-
|
|
82
|
+
return
|
|
83
|
+
elif self.prop == self.Property.LOCALPATH:
|
|
84
|
+
assert data_row.has_val[self._col_ref.slot_idx]
|
|
69
85
|
data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
|
|
70
|
-
|
|
71
|
-
|
|
86
|
+
return
|
|
87
|
+
elif self.is_error_prop():
|
|
88
|
+
exc = data_row.get_exc(self._col_ref.slot_idx)
|
|
89
|
+
if exc is None:
|
|
90
|
+
data_row[self.slot_idx] = None
|
|
91
|
+
elif self.prop == self.Property.ERRORTYPE:
|
|
92
|
+
data_row[self.slot_idx] = type(exc).__name__
|
|
93
|
+
else:
|
|
94
|
+
data_row[self.slot_idx] = str(exc)
|
|
95
|
+
else:
|
|
96
|
+
assert False
|
|
97
|
+
|
|
98
|
+
def _as_dict(self) -> dict:
|
|
72
99
|
return {'prop': self.prop.value, **super()._as_dict()}
|
|
73
100
|
|
|
74
101
|
@classmethod
|
|
75
|
-
def _from_dict(cls, d:
|
|
102
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> ColumnPropertyRef:
|
|
76
103
|
assert 'prop' in d
|
|
77
104
|
assert isinstance(components[0], ColumnRef)
|
|
78
105
|
return cls(components[0], cls.Property(d['prop']))
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Sequence
|
|
3
4
|
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.iterators as iters
|
|
11
|
+
|
|
8
12
|
from .data_row import DataRow
|
|
13
|
+
from .expr import Expr
|
|
9
14
|
from .row_builder import RowBuilder
|
|
10
15
|
from .sql_element_cache import SqlElementCache
|
|
11
|
-
import pixeltable.iterators as iters
|
|
12
|
-
import pixeltable.exceptions as excs
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class ColumnRef(Expr):
|
|
@@ -19,18 +21,31 @@ class ColumnRef(Expr):
|
|
|
19
21
|
When this reference is created in the context of a view, it can also refer to a column of the view base.
|
|
20
22
|
For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
|
|
21
23
|
unique in the context of a particular table).
|
|
24
|
+
|
|
25
|
+
Media validation:
|
|
26
|
+
- media validation is potentially cpu-intensive, and it's desirable to schedule and parallelize it during
|
|
27
|
+
general expr evaluation
|
|
28
|
+
- media validation on read is done in ColumnRef.eval()
|
|
29
|
+
- a validating ColumnRef cannot be translated to SQL (because the validation is done in Python)
|
|
30
|
+
- in that case, the ColumnRef also instantiates a second non-validating ColumnRef as a component (= dependency)
|
|
31
|
+
- the non-validating ColumnRef is used for SQL translation
|
|
32
|
+
|
|
33
|
+
TODO:
|
|
34
|
+
separate Exprs (like validating ColumnRefs) from the logical expression tree and instead have RowBuilder
|
|
35
|
+
insert them into the EvalCtxs as needed
|
|
22
36
|
"""
|
|
23
37
|
|
|
24
38
|
col: catalog.Column
|
|
25
39
|
is_unstored_iter_col: bool
|
|
26
40
|
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
27
41
|
base_rowid_len: int
|
|
28
|
-
base_rowid:
|
|
42
|
+
base_rowid: Sequence[Optional[Any]]
|
|
29
43
|
iterator: Optional[iters.ComponentIterator]
|
|
30
44
|
pos_idx: Optional[int]
|
|
31
45
|
id: int
|
|
46
|
+
perform_validation: bool # if True, performs media validation
|
|
32
47
|
|
|
33
|
-
def __init__(self, col: catalog.Column):
|
|
48
|
+
def __init__(self, col: catalog.Column, perform_validation: Optional[bool] = None):
|
|
34
49
|
super().__init__(col.col_type)
|
|
35
50
|
assert col.tbl is not None
|
|
36
51
|
self.col = col
|
|
@@ -43,17 +58,44 @@ class ColumnRef(Expr):
|
|
|
43
58
|
self.iterator = None
|
|
44
59
|
# index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
|
|
45
60
|
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
61
|
+
|
|
62
|
+
self.perform_validation = False
|
|
63
|
+
if col.col_type.is_media_type():
|
|
64
|
+
# we perform media validation if the column is a media type and the validation is set to ON_READ,
|
|
65
|
+
# unless we're told not to
|
|
66
|
+
if perform_validation is not None:
|
|
67
|
+
self.perform_validation = perform_validation
|
|
68
|
+
else:
|
|
69
|
+
self.perform_validation = (
|
|
70
|
+
col.col_type.is_media_type() and col.media_validation == catalog.MediaValidation.ON_READ
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
assert perform_validation is None or not perform_validation
|
|
74
|
+
if self.perform_validation:
|
|
75
|
+
non_validating_col_ref = ColumnRef(col, perform_validation=False)
|
|
76
|
+
self.components = [non_validating_col_ref]
|
|
46
77
|
self.id = self._create_id()
|
|
47
78
|
|
|
48
79
|
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
|
|
49
80
|
self.iter_arg_ctx = iter_arg_ctx
|
|
50
81
|
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
51
82
|
|
|
52
|
-
def _id_attrs(self) -> list[
|
|
53
|
-
return
|
|
83
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
84
|
+
return (
|
|
85
|
+
super()._id_attrs()
|
|
86
|
+
+ [('tbl_id', self.col.tbl.id), ('col_id', self.col.id), ('perform_validation', self.perform_validation)]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# override
|
|
90
|
+
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
91
|
+
target = tbl_versions[self.col.tbl.id]
|
|
92
|
+
assert self.col.id in target.cols_by_id
|
|
93
|
+
col = target.cols_by_id[self.col.id]
|
|
94
|
+
return ColumnRef(col)
|
|
54
95
|
|
|
55
96
|
def __getattr__(self, name: str) -> Expr:
|
|
56
97
|
from .column_property_ref import ColumnPropertyRef
|
|
98
|
+
|
|
57
99
|
# resolve column properties
|
|
58
100
|
if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
|
|
59
101
|
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
|
|
@@ -82,7 +124,7 @@ class ColumnRef(Expr):
|
|
|
82
124
|
return str(self)
|
|
83
125
|
|
|
84
126
|
def _equals(self, other: ColumnRef) -> bool:
|
|
85
|
-
return self.col == other.col
|
|
127
|
+
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
86
128
|
|
|
87
129
|
def __str__(self) -> str:
|
|
88
130
|
if self.col.name is None:
|
|
@@ -93,10 +135,43 @@ class ColumnRef(Expr):
|
|
|
93
135
|
def __repr__(self) -> str:
|
|
94
136
|
return f'ColumnRef({self.col!r})'
|
|
95
137
|
|
|
138
|
+
def _repr_html_(self) -> str:
|
|
139
|
+
tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
|
|
140
|
+
return tbl._description_html(cols=[self.col])._repr_html_() # type: ignore[attr-defined]
|
|
141
|
+
|
|
96
142
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
97
|
-
return self.col.sa_col
|
|
143
|
+
return None if self.perform_validation else self.col.sa_col
|
|
98
144
|
|
|
99
145
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
146
|
+
if self.perform_validation:
|
|
147
|
+
# validate media file of our input ColumnRef and if successful, replicate the state of that slot
|
|
148
|
+
# to our slot
|
|
149
|
+
unvalidated_slot_idx = self.components[0].slot_idx
|
|
150
|
+
if data_row.file_paths[unvalidated_slot_idx] is None:
|
|
151
|
+
# no media file to validate, we still need to replicate the value
|
|
152
|
+
assert data_row.file_urls[unvalidated_slot_idx] is None
|
|
153
|
+
val = data_row.vals[unvalidated_slot_idx]
|
|
154
|
+
data_row.vals[self.slot_idx] = val
|
|
155
|
+
data_row.has_val[self.slot_idx] = True
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
self.col.col_type.validate_media(data_row.file_paths[unvalidated_slot_idx])
|
|
160
|
+
# access the value only after successful validation
|
|
161
|
+
val = data_row[unvalidated_slot_idx]
|
|
162
|
+
data_row.vals[self.slot_idx] = val
|
|
163
|
+
data_row.has_val[self.slot_idx] = True
|
|
164
|
+
# make sure that the validated slot points to the same file as the unvalidated slot
|
|
165
|
+
data_row.file_paths[self.slot_idx] = data_row.file_paths[unvalidated_slot_idx]
|
|
166
|
+
data_row.file_urls[self.slot_idx] = data_row.file_urls[unvalidated_slot_idx]
|
|
167
|
+
return
|
|
168
|
+
except excs.Error as exc:
|
|
169
|
+
# propagate the exception, but ignore it otherwise;
|
|
170
|
+
# media validation errors don't cause exceptions during query execution
|
|
171
|
+
# TODO: allow for different error-handling behavior
|
|
172
|
+
row_builder.set_exc(data_row, self.slot_idx, exc)
|
|
173
|
+
return
|
|
174
|
+
|
|
100
175
|
if not self.is_unstored_iter_col:
|
|
101
176
|
# supply default
|
|
102
177
|
data_row[self.slot_idx] = None
|
|
@@ -115,7 +190,14 @@ class ColumnRef(Expr):
|
|
|
115
190
|
def _as_dict(self) -> dict:
|
|
116
191
|
tbl = self.col.tbl
|
|
117
192
|
version = tbl.version if tbl.is_snapshot else None
|
|
118
|
-
|
|
193
|
+
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
194
|
+
# non-validating component ColumnRef
|
|
195
|
+
return {
|
|
196
|
+
'tbl_id': str(tbl.id),
|
|
197
|
+
'tbl_version': version,
|
|
198
|
+
'col_id': self.col.id,
|
|
199
|
+
'perform_validation': self.perform_validation
|
|
200
|
+
}
|
|
119
201
|
|
|
120
202
|
@classmethod
|
|
121
203
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
@@ -126,6 +208,7 @@ class ColumnRef(Expr):
|
|
|
126
208
|
return col
|
|
127
209
|
|
|
128
210
|
@classmethod
|
|
129
|
-
def _from_dict(cls, d: dict, _: list[Expr]) ->
|
|
211
|
+
def _from_dict(cls, d: dict, _: list[Expr]) -> ColumnRef:
|
|
130
212
|
col = cls.get_column(d)
|
|
131
|
-
|
|
213
|
+
perform_validation = d['perform_validation']
|
|
214
|
+
return cls(col, perform_validation=perform_validation)
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
import pixeltable.index as index
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
|
+
|
|
10
11
|
from .column_ref import ColumnRef
|
|
11
12
|
from .data_row import DataRow
|
|
12
13
|
from .expr import Expr
|
|
@@ -65,7 +66,12 @@ class Comparison(Expr):
|
|
|
65
66
|
def _op2(self) -> Expr:
|
|
66
67
|
return self.components[1]
|
|
67
68
|
|
|
68
|
-
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.
|
|
69
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
70
|
+
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
71
|
+
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
72
|
+
# TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
|
|
73
|
+
return None
|
|
74
|
+
|
|
69
75
|
left = sql_elements.get(self._op1)
|
|
70
76
|
if self.is_search_arg_comparison:
|
|
71
77
|
# reference the index value column if there is an index and this is not a snapshot
|
|
@@ -113,11 +119,10 @@ class Comparison(Expr):
|
|
|
113
119
|
elif self.operator == ComparisonOperator.GE:
|
|
114
120
|
data_row[self.slot_idx] = left >= right
|
|
115
121
|
|
|
116
|
-
def _as_dict(self) ->
|
|
122
|
+
def _as_dict(self) -> dict:
|
|
117
123
|
return {'operator': self.operator.value, **super()._as_dict()}
|
|
118
124
|
|
|
119
125
|
@classmethod
|
|
120
|
-
def _from_dict(cls, d:
|
|
126
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Comparison:
|
|
121
127
|
assert 'operator' in d
|
|
122
128
|
return cls(ComparisonOperator(d['operator']), components[0], components[1])
|
|
123
|
-
|
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
8
10
|
from .data_row import DataRow
|
|
9
11
|
from .expr import Expr
|
|
10
12
|
from .globals import LogicalOperator
|
|
11
13
|
from .row_builder import RowBuilder
|
|
12
14
|
from .sql_element_cache import SqlElementCache
|
|
13
|
-
import pixeltable.type_system as ts
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class CompoundPredicate(Expr):
|
|
17
|
-
def __init__(self, operator: LogicalOperator, operands:
|
|
18
|
+
def __init__(self, operator: LogicalOperator, operands: list[Expr]):
|
|
18
19
|
super().__init__(ts.BoolType())
|
|
19
20
|
self.operator = operator
|
|
20
21
|
# operands are stored in self.components
|
|
@@ -23,7 +24,7 @@ class CompoundPredicate(Expr):
|
|
|
23
24
|
self.components = operands
|
|
24
25
|
else:
|
|
25
26
|
assert len(operands) > 1
|
|
26
|
-
self.operands:
|
|
27
|
+
self.operands: list[Expr] = []
|
|
27
28
|
for operand in operands:
|
|
28
29
|
self._merge_operand(operand)
|
|
29
30
|
|
|
@@ -35,7 +36,7 @@ class CompoundPredicate(Expr):
|
|
|
35
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
36
37
|
|
|
37
38
|
@classmethod
|
|
38
|
-
def make_conjunction(cls, operands:
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
|
|
39
40
|
if len(operands) == 0:
|
|
40
41
|
return None
|
|
41
42
|
if len(operands) == 1:
|
|
@@ -89,11 +90,11 @@ class CompoundPredicate(Expr):
|
|
|
89
90
|
val = op_function(val, data_row[op.slot_idx])
|
|
90
91
|
data_row[self.slot_idx] = val
|
|
91
92
|
|
|
92
|
-
def _as_dict(self) ->
|
|
93
|
+
def _as_dict(self) -> dict:
|
|
93
94
|
return {'operator': self.operator.value, **super()._as_dict()}
|
|
94
95
|
|
|
95
96
|
@classmethod
|
|
96
|
-
def _from_dict(cls, d:
|
|
97
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> CompoundPredicate:
|
|
97
98
|
assert 'operator' in d
|
|
98
99
|
return cls(LogicalOperator(d['operator']), components)
|
|
99
100
|
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -4,13 +4,13 @@ import datetime
|
|
|
4
4
|
import io
|
|
5
5
|
import urllib.parse
|
|
6
6
|
import urllib.request
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
|
-
import
|
|
10
|
-
import pgvector.sqlalchemy
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
11
11
|
import PIL
|
|
12
12
|
import PIL.Image
|
|
13
|
-
import
|
|
13
|
+
import sqlalchemy as sql
|
|
14
14
|
|
|
15
15
|
from pixeltable import env
|
|
16
16
|
|
|
@@ -57,7 +57,7 @@ class DataRow:
|
|
|
57
57
|
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
58
58
|
file_paths: list[Optional[str]]
|
|
59
59
|
|
|
60
|
-
def __init__(self, size: int, img_slot_idxs:
|
|
60
|
+
def __init__(self, size: int, img_slot_idxs: list[int], media_slot_idxs: list[int], array_slot_idxs: list[int]):
|
|
61
61
|
self.vals = [None] * size
|
|
62
62
|
self.has_val = [False] * size
|
|
63
63
|
self.excs = [None] * size
|
|
@@ -89,27 +89,35 @@ class DataRow:
|
|
|
89
89
|
target.file_urls = self.file_urls.copy()
|
|
90
90
|
target.file_paths = self.file_paths.copy()
|
|
91
91
|
|
|
92
|
-
def set_pk(self, pk:
|
|
92
|
+
def set_pk(self, pk: tuple[int, ...]) -> None:
|
|
93
93
|
self.pk = pk
|
|
94
94
|
|
|
95
|
-
def has_exc(self, slot_idx: int) -> bool:
|
|
96
|
-
|
|
95
|
+
def has_exc(self, slot_idx: Optional[int] = None) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
98
|
+
"""
|
|
99
|
+
if slot_idx is not None:
|
|
100
|
+
return self.excs[slot_idx] is not None
|
|
101
|
+
return any(exc is not None for exc in self.excs)
|
|
97
102
|
|
|
98
|
-
def get_exc(self, slot_idx: int) -> Exception:
|
|
99
|
-
assert self.has_val[slot_idx] is False
|
|
100
|
-
assert self.excs[slot_idx] is not None
|
|
103
|
+
def get_exc(self, slot_idx: int) -> Optional[Exception]:
|
|
101
104
|
return self.excs[slot_idx]
|
|
102
105
|
|
|
106
|
+
def get_first_exc(self) -> Optional[Exception]:
|
|
107
|
+
for exc in self.excs:
|
|
108
|
+
if exc is not None:
|
|
109
|
+
return exc
|
|
110
|
+
return None
|
|
111
|
+
|
|
103
112
|
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
104
113
|
assert self.excs[slot_idx] is None
|
|
105
114
|
self.excs[slot_idx] = exc
|
|
106
115
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
self.file_urls[slot_idx] = None
|
|
116
|
+
# an exception means the value is None
|
|
117
|
+
self.has_val[slot_idx] = True
|
|
118
|
+
self.vals[slot_idx] = None
|
|
119
|
+
self.file_paths[slot_idx] = None
|
|
120
|
+
self.file_urls[slot_idx] = None
|
|
113
121
|
|
|
114
122
|
def __len__(self) -> int:
|
|
115
123
|
return len(self.vals)
|
|
@@ -124,6 +132,7 @@ class DataRow:
|
|
|
124
132
|
|
|
125
133
|
if self.file_urls[index] is not None and index in self.img_slot_idxs:
|
|
126
134
|
# if we need to load this from a file, it should have been materialized locally
|
|
135
|
+
# TODO this fails if the url was instantiated dynamically using astype()
|
|
127
136
|
assert self.file_paths[index] is not None
|
|
128
137
|
if self.vals[index] is None:
|
|
129
138
|
self.vals[index] = PIL.Image.open(self.file_paths[index])
|
|
@@ -231,7 +240,7 @@ class DataRow:
|
|
|
231
240
|
self.vals[index] = None
|
|
232
241
|
|
|
233
242
|
@property
|
|
234
|
-
def rowid(self) ->
|
|
243
|
+
def rowid(self) -> tuple[int, ...]:
|
|
235
244
|
return self.pk[:-1]
|
|
236
245
|
|
|
237
246
|
@property
|