pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
pixeltable/exprs/__init__.py
CHANGED
|
@@ -6,9 +6,10 @@ from .comparison import Comparison
|
|
|
6
6
|
from .compound_predicate import CompoundPredicate
|
|
7
7
|
from .data_row import DataRow
|
|
8
8
|
from .expr import Expr
|
|
9
|
+
from .expr_set import ExprSet
|
|
9
10
|
from .function_call import FunctionCall
|
|
10
11
|
from .image_member_access import ImageMemberAccess
|
|
11
|
-
from .
|
|
12
|
+
from .in_predicate import InPredicate
|
|
12
13
|
from .inline_array import InlineArray
|
|
13
14
|
from .inline_dict import InlineDict
|
|
14
15
|
from .is_null import IsNull
|
|
@@ -16,9 +17,9 @@ from .json_mapper import JsonMapper
|
|
|
16
17
|
from .json_path import RELATIVE_PATH_ROOT, JsonPath
|
|
17
18
|
from .literal import Literal
|
|
18
19
|
from .object_ref import ObjectRef
|
|
19
|
-
from .variable import Variable
|
|
20
20
|
from .predicate import Predicate
|
|
21
21
|
from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
|
|
22
22
|
from .rowid_ref import RowidRef
|
|
23
|
-
from .
|
|
23
|
+
from .similarity_expr import SimilarityExpr
|
|
24
24
|
from .type_cast import TypeCast
|
|
25
|
+
from .variable import Variable
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -63,6 +63,15 @@ class ColumnRef(Expr):
|
|
|
63
63
|
|
|
64
64
|
return super().__getattr__(name)
|
|
65
65
|
|
|
66
|
+
def similarity(self, other: Any) -> Expr:
|
|
67
|
+
if isinstance(other, Expr):
|
|
68
|
+
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
|
+
item = Expr.from_object(other)
|
|
70
|
+
if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
|
|
71
|
+
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
|
|
72
|
+
from .similarity_expr import SimilarityExpr
|
|
73
|
+
return SimilarityExpr(self, item)
|
|
74
|
+
|
|
66
75
|
def default_column_name(self) -> Optional[str]:
|
|
67
76
|
return str(self)
|
|
68
77
|
|
|
@@ -99,7 +108,7 @@ class ColumnRef(Expr):
|
|
|
99
108
|
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
100
109
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
101
110
|
tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
|
|
102
|
-
|
|
103
|
-
col = tbl_version.
|
|
111
|
+
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
112
|
+
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
104
113
|
return cls(col)
|
|
105
114
|
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -4,18 +4,44 @@ from typing import Optional, List, Any, Dict, Tuple
|
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
+
from .column_ref import ColumnRef
|
|
7
8
|
from .data_row import DataRow
|
|
8
9
|
from .expr import Expr
|
|
9
10
|
from .globals import ComparisonOperator
|
|
11
|
+
from .literal import Literal
|
|
10
12
|
from .predicate import Predicate
|
|
11
13
|
from .row_builder import RowBuilder
|
|
14
|
+
import pixeltable.exceptions as excs
|
|
15
|
+
import pixeltable.index as index
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
class Comparison(Predicate):
|
|
15
19
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
16
20
|
super().__init__()
|
|
17
21
|
self.operator = operator
|
|
18
|
-
|
|
22
|
+
|
|
23
|
+
# if this is a comparison of a column to a literal (ie, could be used as a search argument in an index lookup),
|
|
24
|
+
# normalize it to <column> <operator> <literal>.
|
|
25
|
+
if isinstance(op1, ColumnRef) and isinstance(op2, Literal):
|
|
26
|
+
self.is_search_arg_comparison = True
|
|
27
|
+
self.components = [op1, op2]
|
|
28
|
+
elif isinstance(op1, Literal) and isinstance(op2, ColumnRef):
|
|
29
|
+
self.is_search_arg_comparison = True
|
|
30
|
+
self.components = [op2, op1]
|
|
31
|
+
self.operator = self.operator.reverse()
|
|
32
|
+
else:
|
|
33
|
+
self.is_search_arg_comparison = False
|
|
34
|
+
self.components = [op1, op2]
|
|
35
|
+
|
|
36
|
+
import pixeltable.index as index
|
|
37
|
+
if self.is_search_arg_comparison and self._op2.col_type.is_string_type() \
|
|
38
|
+
and len(self._op2.val) >= index.BtreeIndex.MAX_STRING_LEN:
|
|
39
|
+
# we can't use an index for this after all
|
|
40
|
+
raise excs.Error(
|
|
41
|
+
f'String literal too long for comparison against indexed column {self._op1.col.name!r} '
|
|
42
|
+
f'(max length is {index.BtreeIndex.MAX_STRING_LEN - 1})'
|
|
43
|
+
)
|
|
44
|
+
|
|
19
45
|
self.id = self._create_id()
|
|
20
46
|
|
|
21
47
|
def __str__(self) -> str:
|
|
@@ -37,6 +63,18 @@ class Comparison(Predicate):
|
|
|
37
63
|
|
|
38
64
|
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
39
65
|
left = self._op1.sql_expr()
|
|
66
|
+
if self.is_search_arg_comparison:
|
|
67
|
+
# reference the index value column if there is an index and this is not a snapshot
|
|
68
|
+
# (indices don't apply to snapshots)
|
|
69
|
+
tbl = self._op1.col.tbl
|
|
70
|
+
idx_info = [
|
|
71
|
+
info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
|
|
72
|
+
]
|
|
73
|
+
if len(idx_info) > 0 and not tbl.is_snapshot:
|
|
74
|
+
# there shouldn't be multiple B-tree indices on a column
|
|
75
|
+
assert len(idx_info) == 1
|
|
76
|
+
left = idx_info[0].val_col.sa_col
|
|
77
|
+
|
|
40
78
|
right = self._op2.sql_expr()
|
|
41
79
|
if left is None or right is None:
|
|
42
80
|
return None
|
pixeltable/exprs/data_row.py
CHANGED
pixeltable/exprs/expr.py
CHANGED
|
@@ -60,9 +60,9 @@ class Expr(abc.ABC):
|
|
|
60
60
|
|
|
61
61
|
# index of the expr's value in the data row:
|
|
62
62
|
# - set for all materialized exprs
|
|
63
|
-
# -
|
|
63
|
+
# - None: not executable
|
|
64
64
|
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
65
|
-
self.slot_idx =
|
|
65
|
+
self.slot_idx: Optional[int] = None
|
|
66
66
|
self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
|
|
67
67
|
|
|
68
68
|
def dependencies(self) -> List[Expr]:
|
|
@@ -110,6 +110,11 @@ class Expr(abc.ABC):
|
|
|
110
110
|
return False
|
|
111
111
|
return self._equals(other)
|
|
112
112
|
|
|
113
|
+
def _equals(self, other: Expr) -> bool:
|
|
114
|
+
# we already compared the type and components in equals(); subclasses that require additional comparisons
|
|
115
|
+
# override this
|
|
116
|
+
return True
|
|
117
|
+
|
|
113
118
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
114
119
|
"""Returns attribute name/value pairs that are used to construct the instance id.
|
|
115
120
|
|
|
@@ -148,7 +153,7 @@ class Expr(abc.ABC):
|
|
|
148
153
|
cls = self.__class__
|
|
149
154
|
result = cls.__new__(cls)
|
|
150
155
|
result.__dict__.update(self.__dict__)
|
|
151
|
-
result.slot_idx =
|
|
156
|
+
result.slot_idx = None
|
|
152
157
|
result.components = [c.copy() for c in self.components]
|
|
153
158
|
return result
|
|
154
159
|
|
|
@@ -164,16 +169,22 @@ class Expr(abc.ABC):
|
|
|
164
169
|
memo[id(self)] = result
|
|
165
170
|
return result
|
|
166
171
|
|
|
167
|
-
def substitute(self,
|
|
172
|
+
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
168
173
|
"""
|
|
169
174
|
Replace 'old' with 'new' recursively.
|
|
170
175
|
"""
|
|
171
|
-
|
|
172
|
-
|
|
176
|
+
for old, new in spec.items():
|
|
177
|
+
if self.equals(old):
|
|
178
|
+
return new.copy()
|
|
173
179
|
for i in range(len(self.components)):
|
|
174
|
-
self.components[i] = self.components[i].substitute(
|
|
180
|
+
self.components[i] = self.components[i].substitute(spec)
|
|
175
181
|
return self
|
|
176
182
|
|
|
183
|
+
@classmethod
|
|
184
|
+
def list_substitute(cls, expr_list: List[Expr], spec: dict[Expr, Expr]) -> None:
|
|
185
|
+
for i in range(len(expr_list)):
|
|
186
|
+
expr_list[i] = expr_list[i].substitute(spec)
|
|
187
|
+
|
|
177
188
|
def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
|
|
178
189
|
"""
|
|
179
190
|
Recursively replace ColRefs to unstored computed columns with their value exprs.
|
|
@@ -191,9 +202,7 @@ class Expr(abc.ABC):
|
|
|
191
202
|
])
|
|
192
203
|
if len(target_col_refs) == 0:
|
|
193
204
|
return result
|
|
194
|
-
for ref in target_col_refs
|
|
195
|
-
assert ref.col.value_expr is not None
|
|
196
|
-
result = result.substitute(ref, ref.col.value_expr)
|
|
205
|
+
result = result.substitute({ref: ref.col.value_expr for ref in target_col_refs})
|
|
197
206
|
|
|
198
207
|
def is_bound_by(self, tbl: catalog.TableVersionPath) -> bool:
|
|
199
208
|
"""Returns True if this expr can be evaluated in the context of tbl."""
|
|
@@ -220,11 +229,6 @@ class Expr(abc.ABC):
|
|
|
220
229
|
self.components[i] = self.components[i]._retarget(tbl_versions)
|
|
221
230
|
return self
|
|
222
231
|
|
|
223
|
-
@classmethod
|
|
224
|
-
def list_substitute(cls, expr_list: List[Expr], old: Expr, new: Expr) -> None:
|
|
225
|
-
for i in range(len(expr_list)):
|
|
226
|
-
expr_list[i] = expr_list[i].substitute(old, new)
|
|
227
|
-
|
|
228
232
|
@abc.abstractmethod
|
|
229
233
|
def __str__(self) -> str:
|
|
230
234
|
pass
|
|
@@ -313,10 +317,6 @@ class Expr(abc.ABC):
|
|
|
313
317
|
return InlineArray(tuple(o))
|
|
314
318
|
return None
|
|
315
319
|
|
|
316
|
-
@abc.abstractmethod
|
|
317
|
-
def _equals(self, other: Expr) -> bool:
|
|
318
|
-
pass
|
|
319
|
-
|
|
320
320
|
@abc.abstractmethod
|
|
321
321
|
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
322
322
|
"""
|
|
@@ -396,6 +396,13 @@ class Expr(abc.ABC):
|
|
|
396
396
|
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
397
397
|
assert False, 'not implemented'
|
|
398
398
|
|
|
399
|
+
def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
|
|
400
|
+
from .in_predicate import InPredicate
|
|
401
|
+
if isinstance(value_set, Expr):
|
|
402
|
+
return InPredicate(self, value_set_expr=value_set)
|
|
403
|
+
else:
|
|
404
|
+
return InPredicate(self, value_set_literal=value_set)
|
|
405
|
+
|
|
399
406
|
def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
|
|
400
407
|
from pixeltable.exprs import TypeCast
|
|
401
408
|
return TypeCast(self, new_type)
|
|
@@ -28,7 +28,7 @@ class FunctionCall(Expr):
|
|
|
28
28
|
if group_by_clause is None:
|
|
29
29
|
group_by_clause = []
|
|
30
30
|
signature = fn.signature
|
|
31
|
-
super().__init__(
|
|
31
|
+
super().__init__(fn.call_return_type(bound_args))
|
|
32
32
|
self.fn = fn
|
|
33
33
|
self.is_method_call = is_method_call
|
|
34
34
|
self.check_args(signature, bound_args)
|
|
@@ -46,9 +46,9 @@ class FunctionCall(Expr):
|
|
|
46
46
|
|
|
47
47
|
# Tuple[int, Any]:
|
|
48
48
|
# - for Exprs: (index into components, None)
|
|
49
|
-
# - otherwise: (
|
|
50
|
-
self.args: List[Tuple[int, Any]] = []
|
|
51
|
-
self.kwargs: Dict[str, Tuple[int, Any]] = {}
|
|
49
|
+
# - otherwise: (None, val)
|
|
50
|
+
self.args: List[Tuple[Optional[int], Optional[Any]]] = []
|
|
51
|
+
self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
|
|
52
52
|
|
|
53
53
|
# we record the types of non-variable parameters for runtime type checks
|
|
54
54
|
self.arg_types: List[ts.ColumnType] = []
|
|
@@ -62,7 +62,7 @@ class FunctionCall(Expr):
|
|
|
62
62
|
self.args.append((len(self.components), None))
|
|
63
63
|
self.components.append(arg.copy())
|
|
64
64
|
else:
|
|
65
|
-
self.args.append((
|
|
65
|
+
self.args.append((None, arg))
|
|
66
66
|
if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
67
67
|
self.arg_types.append(signature.parameters[param.name].col_type)
|
|
68
68
|
|
|
@@ -74,7 +74,7 @@ class FunctionCall(Expr):
|
|
|
74
74
|
self.kwargs[param_name] = (len(self.components), None)
|
|
75
75
|
self.components.append(arg.copy())
|
|
76
76
|
else:
|
|
77
|
-
self.kwargs[param_name] = (
|
|
77
|
+
self.kwargs[param_name] = (None, arg)
|
|
78
78
|
if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
79
79
|
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
80
80
|
|
|
@@ -174,9 +174,6 @@ class FunctionCall(Expr):
|
|
|
174
174
|
f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
|
|
175
175
|
f'{param_type}')
|
|
176
176
|
|
|
177
|
-
def is_nos_call(self) -> bool:
|
|
178
|
-
return isinstance(self.fn, func.NOSFunction)
|
|
179
|
-
|
|
180
177
|
def _equals(self, other: FunctionCall) -> bool:
|
|
181
178
|
if self.fn != other.fn:
|
|
182
179
|
return False
|
|
@@ -215,12 +212,12 @@ class FunctionCall(Expr):
|
|
|
215
212
|
|
|
216
213
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
217
214
|
arg_strs = [
|
|
218
|
-
str(arg) if idx
|
|
215
|
+
str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
219
216
|
]
|
|
220
217
|
def print_arg(arg: Any) -> str:
|
|
221
218
|
return f"'{arg}'" if isinstance(arg, str) else str(arg)
|
|
222
219
|
arg_strs.extend([
|
|
223
|
-
f'{param_name}={print_arg(arg) if idx
|
|
220
|
+
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
224
221
|
for param_name, (idx, arg) in self.kwargs.items()
|
|
225
222
|
])
|
|
226
223
|
if len(self.order_by) > 0:
|
|
@@ -287,7 +284,7 @@ class FunctionCall(Expr):
|
|
|
287
284
|
"""Return args and kwargs, constructed for data_row"""
|
|
288
285
|
kwargs: Dict[str, Any] = {}
|
|
289
286
|
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
290
|
-
val = arg if component_idx
|
|
287
|
+
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
291
288
|
param = self.fn.signature.parameters[param_name]
|
|
292
289
|
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
293
290
|
# expand **kwargs parameter
|
|
@@ -298,7 +295,7 @@ class FunctionCall(Expr):
|
|
|
298
295
|
|
|
299
296
|
args: List[Any] = []
|
|
300
297
|
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
301
|
-
val = arg if component_idx
|
|
298
|
+
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
302
299
|
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
303
300
|
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
304
301
|
# expand *args parameter
|
|
@@ -333,7 +330,8 @@ class FunctionCall(Expr):
|
|
|
333
330
|
# TODO: can we get rid of this extra copy?
|
|
334
331
|
fn_expr = self.components[self.fn_expr_idx]
|
|
335
332
|
data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
|
|
336
|
-
elif isinstance(self.fn, func.CallableFunction):
|
|
333
|
+
elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
|
|
334
|
+
# optimization: avoid additional level of indirection we'd get from calling Function.exec()
|
|
337
335
|
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
338
336
|
elif self.is_window_fn_call:
|
|
339
337
|
if self.has_group_by():
|
|
@@ -348,9 +346,10 @@ class FunctionCall(Expr):
|
|
|
348
346
|
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
349
347
|
self.aggregator.update(*args)
|
|
350
348
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
351
|
-
|
|
352
|
-
assert self.is_agg_fn_call
|
|
349
|
+
elif self.is_agg_fn_call:
|
|
353
350
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
351
|
+
else:
|
|
352
|
+
data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
|
|
354
353
|
|
|
355
354
|
def _as_dict(self) -> Dict:
|
|
356
355
|
result = {
|
|
@@ -369,9 +368,9 @@ class FunctionCall(Expr):
|
|
|
369
368
|
# reassemble bound args
|
|
370
369
|
fn = func.Function.from_dict(d['fn'])
|
|
371
370
|
param_names = list(fn.signature.parameters.keys())
|
|
372
|
-
bound_args = {param_names[i]: arg if idx
|
|
371
|
+
bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
|
|
373
372
|
bound_args.update(
|
|
374
|
-
{param_name: val if idx
|
|
373
|
+
{param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
|
|
375
374
|
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
376
375
|
order_by_exprs = components[d['order_by_start_idx']:]
|
|
377
376
|
fn_call = cls(
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import datetime
|
|
2
|
-
from typing import Union
|
|
3
4
|
import enum
|
|
4
|
-
|
|
5
|
+
from typing import Union
|
|
5
6
|
|
|
6
7
|
# Python types corresponding to our literal types
|
|
7
8
|
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
|
|
@@ -33,6 +34,17 @@ class ComparisonOperator(enum.Enum):
|
|
|
33
34
|
if self == self.GE:
|
|
34
35
|
return '>='
|
|
35
36
|
|
|
37
|
+
def reverse(self) -> ComparisonOperator:
|
|
38
|
+
if self == self.LT:
|
|
39
|
+
return self.GT
|
|
40
|
+
if self == self.LE:
|
|
41
|
+
return self.GE
|
|
42
|
+
if self == self.GT:
|
|
43
|
+
return self.LT
|
|
44
|
+
if self == self.GE:
|
|
45
|
+
return self.LE
|
|
46
|
+
return self
|
|
47
|
+
|
|
36
48
|
|
|
37
49
|
class LogicalOperator(enum.Enum):
|
|
38
50
|
AND = 0
|
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
4
|
|
|
4
5
|
import PIL
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.func as func
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
from .data_row import DataRow
|
|
7
12
|
from .expr import Expr
|
|
8
|
-
from .column_ref import ColumnRef
|
|
9
13
|
from .function_call import FunctionCall
|
|
10
|
-
from .image_similarity_predicate import ImageSimilarityPredicate
|
|
11
|
-
from .data_row import DataRow
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
|
-
import pixeltable.func as func
|
|
15
|
-
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.type_system as ts
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
# TODO: this doesn't dig up all attrs for actual jpeg images
|
|
@@ -43,9 +41,7 @@ class ImageMemberAccess(Expr):
|
|
|
43
41
|
attr_info = _create_pil_attr_info()
|
|
44
42
|
|
|
45
43
|
def __init__(self, member_name: str, caller: Expr):
|
|
46
|
-
if member_name
|
|
47
|
-
super().__init__(ts.InvalidType()) # requires FunctionCall to return value
|
|
48
|
-
elif member_name in self.attr_info:
|
|
44
|
+
if member_name in self.attr_info:
|
|
49
45
|
super().__init__(self.attr_info[member_name])
|
|
50
46
|
else:
|
|
51
47
|
candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
|
|
@@ -78,22 +74,8 @@ class ImageMemberAccess(Expr):
|
|
|
78
74
|
assert len(components) == 1
|
|
79
75
|
return cls(d['member_name'], components[0])
|
|
80
76
|
|
|
81
|
-
def __call__(self, *args, **kwargs) ->
|
|
82
|
-
|
|
83
|
-
call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
|
|
84
|
-
if self.member_name == 'nearest':
|
|
85
|
-
# - caller must be ColumnRef
|
|
86
|
-
# - signature is (Union[PIL.Image.Image, str])
|
|
87
|
-
if not isinstance(caller, ColumnRef):
|
|
88
|
-
raise excs.Error(f'nearest(): caller must be an image column')
|
|
89
|
-
if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
|
|
90
|
-
raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
|
|
91
|
-
return ImageSimilarityPredicate(
|
|
92
|
-
caller,
|
|
93
|
-
img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
|
|
94
|
-
text=args[0] if isinstance(args[0], str) else None)
|
|
95
|
-
|
|
96
|
-
result = self.img_method(*[caller, *args], **kwargs)
|
|
77
|
+
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
78
|
+
result = self.img_method(*[self._caller, *args], **kwargs)
|
|
97
79
|
result.is_method_call = True
|
|
98
80
|
return result
|
|
99
81
|
|
|
@@ -112,4 +94,3 @@ class ImageMemberAccess(Expr):
|
|
|
112
94
|
data_row[self.slot_idx] = getattr(caller_val, self.member_name)
|
|
113
95
|
except AttributeError:
|
|
114
96
|
data_row[self.slot_idx] = None
|
|
115
|
-
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple, Iterable
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .expr import Expr
|
|
10
|
+
from .predicate import Predicate
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InPredicate(Predicate):
|
|
15
|
+
"""Predicate corresponding to the SQL IN operator."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
|
|
18
|
+
assert (value_set_literal is None) != (value_set_expr is None)
|
|
19
|
+
if not lhs.col_type.is_scalar_type():
|
|
20
|
+
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
21
|
+
super().__init__()
|
|
22
|
+
|
|
23
|
+
self.value_list: Optional[list] = None # only contains values of the correct type
|
|
24
|
+
if value_set_expr is not None:
|
|
25
|
+
if not value_set_expr.col_type.is_json_type():
|
|
26
|
+
raise excs.Error(
|
|
27
|
+
f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
|
|
28
|
+
self.components = [lhs.copy(), value_set_expr.copy()]
|
|
29
|
+
else:
|
|
30
|
+
assert value_set_literal is not None
|
|
31
|
+
self.components = [lhs.copy()]
|
|
32
|
+
self.value_list = self._normalize_value_set(value_set_literal)
|
|
33
|
+
|
|
34
|
+
self.id = self._create_id()
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def _lhs(self) -> Expr:
|
|
38
|
+
return self.components[0]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def _value_set_expr(self) -> Expr:
|
|
42
|
+
assert len(self.components) == 2
|
|
43
|
+
return self.components[1]
|
|
44
|
+
|
|
45
|
+
def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
|
|
46
|
+
if not isinstance(value_set, Iterable):
|
|
47
|
+
raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
|
|
48
|
+
value_list = list(value_set)
|
|
49
|
+
if not filter_type_mismatches:
|
|
50
|
+
return value_list
|
|
51
|
+
|
|
52
|
+
# ignore elements of the wrong type
|
|
53
|
+
result = []
|
|
54
|
+
for val in value_list:
|
|
55
|
+
try:
|
|
56
|
+
self._lhs.col_type.validate_literal(val)
|
|
57
|
+
result.append(val)
|
|
58
|
+
except TypeError:
|
|
59
|
+
pass
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
def __str__(self) -> str:
|
|
63
|
+
if self.value_list is not None:
|
|
64
|
+
return f'{self.components[0]}.isin({self.value_list})'
|
|
65
|
+
return f'{self.components[0]}.isin({self.components[1]})'
|
|
66
|
+
|
|
67
|
+
def _equals(self, other: InPredicate) -> bool:
|
|
68
|
+
return self.value_list == other.value_list
|
|
69
|
+
|
|
70
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
71
|
+
return super()._id_attrs() + [('value_list', self.value_list)]
|
|
72
|
+
|
|
73
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
74
|
+
lhs_sql_exprs = self.components[0].sql_expr()
|
|
75
|
+
if lhs_sql_exprs is None or self.value_list is None:
|
|
76
|
+
return None
|
|
77
|
+
return lhs_sql_exprs.in_(self.value_list)
|
|
78
|
+
|
|
79
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
80
|
+
lhs_val = data_row[self._lhs.slot_idx]
|
|
81
|
+
if self.value_list is not None:
|
|
82
|
+
data_row[self.slot_idx] = lhs_val in self.value_list
|
|
83
|
+
else:
|
|
84
|
+
value_set = data_row[self._value_set_expr.slot_idx]
|
|
85
|
+
value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
|
|
86
|
+
data_row[self.slot_idx] = lhs_val in value_list
|
|
87
|
+
|
|
88
|
+
def _as_dict(self) -> Dict:
|
|
89
|
+
return {'value_list': self.value_list, **super()._as_dict()}
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
93
|
+
assert 'value_list' in d
|
|
94
|
+
assert len(components) <= 2
|
|
95
|
+
return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
|
|
96
|
+
|
pixeltable/exprs/inline_array.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import copy
|
|
4
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
4
5
|
|
|
5
|
-
import sqlalchemy as sql
|
|
6
6
|
import numpy as np
|
|
7
|
+
import sqlalchemy as sql
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
import pixeltable.type_system as ts
|
|
9
10
|
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
10
12
|
from .inline_dict import InlineDict
|
|
11
13
|
from .row_builder import RowBuilder
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
13
|
-
import pixeltable.type_system as ts
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class InlineArray(Expr):
|
|
@@ -27,8 +27,8 @@ class InlineArray(Expr):
|
|
|
27
27
|
|
|
28
28
|
# elements contains
|
|
29
29
|
# - for Expr elements: (index into components, None)
|
|
30
|
-
# - for non-Expr elements: (
|
|
31
|
-
self.elements: List[Tuple[int, Any]] = []
|
|
30
|
+
# - for non-Expr elements: (None, value)
|
|
31
|
+
self.elements: List[Tuple[Optional[int], Any]] = []
|
|
32
32
|
for el in elements:
|
|
33
33
|
el = copy.deepcopy(el)
|
|
34
34
|
if isinstance(el, list):
|
|
@@ -41,11 +41,11 @@ class InlineArray(Expr):
|
|
|
41
41
|
self.elements.append((len(self.components), None))
|
|
42
42
|
self.components.append(el)
|
|
43
43
|
else:
|
|
44
|
-
self.elements.append((
|
|
44
|
+
self.elements.append((None, el))
|
|
45
45
|
|
|
46
46
|
inferred_element_type = ts.InvalidType()
|
|
47
47
|
for idx, val in self.elements:
|
|
48
|
-
if idx
|
|
48
|
+
if idx is not None:
|
|
49
49
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
|
|
50
50
|
else:
|
|
51
51
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
|
|
@@ -83,7 +83,7 @@ class InlineArray(Expr):
|
|
|
83
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
84
84
|
result = [None] * len(self.elements)
|
|
85
85
|
for i, (child_idx, val) in enumerate(self.elements):
|
|
86
|
-
if child_idx
|
|
86
|
+
if child_idx is not None:
|
|
87
87
|
result[i] = data_row[self.components[child_idx].slot_idx]
|
|
88
88
|
else:
|
|
89
89
|
result[i] = copy.deepcopy(val)
|
|
@@ -100,7 +100,9 @@ class InlineArray(Expr):
|
|
|
100
100
|
assert 'elements' in d
|
|
101
101
|
arg: List[Any] = []
|
|
102
102
|
for idx, val in d['elements']:
|
|
103
|
-
|
|
103
|
+
# TODO Normalize idx -1 to None via schema migrations.
|
|
104
|
+
# Long-term we should not be allowing idx == -1.
|
|
105
|
+
if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
|
|
104
106
|
arg.append(components[idx])
|
|
105
107
|
else:
|
|
106
108
|
arg.append(val)
|