pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +18 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +31 -50
- pixeltable/catalog/insertable_table.py +7 -6
- pixeltable/catalog/table.py +171 -57
- pixeltable/catalog/table_version.py +417 -140
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/dataframe.py +239 -121
- pixeltable/env.py +82 -16
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/in_memory_data_node.py +11 -7
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +9 -0
- pixeltable/exprs/comparison.py +3 -3
- pixeltable/exprs/data_row.py +5 -1
- pixeltable/exprs/expr.py +15 -7
- pixeltable/exprs/function_call.py +17 -15
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/literal.py +16 -4
- pixeltable/exprs/row_builder.py +15 -41
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +18 -15
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +20 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/globals.py +24 -14
- pixeltable/func/signature.py +23 -27
- pixeltable/func/udf.py +13 -12
- pixeltable/functions/__init__.py +8 -8
- pixeltable/functions/eval.py +7 -8
- pixeltable/functions/huggingface.py +64 -17
- pixeltable/functions/openai.py +36 -3
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +21 -0
- pixeltable/functions/util.py +11 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +51 -0
- pixeltable/index/embedding_index.py +168 -0
- pixeltable/io/__init__.py +3 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +4 -0
- pixeltable/iterators/document.py +218 -97
- pixeltable/iterators/video.py +8 -9
- pixeltable/metadata/__init__.py +7 -3
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/schema.py +45 -22
- pixeltable/plan.py +15 -51
- pixeltable/store.py +38 -41
- pixeltable/tool/create_test_db_dump.py +39 -4
- pixeltable/type_system.py +47 -96
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
- pixeltable-0.2.6.dist-info/RECORD +119 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -604
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/tests/conftest.py +0 -177
- pixeltable/tests/functions/test_fireworks.py +0 -42
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -152
- pixeltable/tests/functions/test_together.py +0 -111
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -370
- pixeltable/tests/test_dataframe.py +0 -439
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -805
- pixeltable/tests/test_function.py +0 -324
- pixeltable/tests/test_migration.py +0 -43
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -208
- pixeltable/tests/test_table.py +0 -1267
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -22
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -530
- pixeltable/tests/utils.py +0 -408
- pixeltable-0.2.4.dist-info/RECORD +0 -132
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
|
@@ -28,7 +28,7 @@ class FunctionCall(Expr):
|
|
|
28
28
|
if group_by_clause is None:
|
|
29
29
|
group_by_clause = []
|
|
30
30
|
signature = fn.signature
|
|
31
|
-
super().__init__(
|
|
31
|
+
super().__init__(fn.call_return_type(bound_args))
|
|
32
32
|
self.fn = fn
|
|
33
33
|
self.is_method_call = is_method_call
|
|
34
34
|
self.check_args(signature, bound_args)
|
|
@@ -46,9 +46,9 @@ class FunctionCall(Expr):
|
|
|
46
46
|
|
|
47
47
|
# Tuple[int, Any]:
|
|
48
48
|
# - for Exprs: (index into components, None)
|
|
49
|
-
# - otherwise: (
|
|
50
|
-
self.args: List[Tuple[int, Any]] = []
|
|
51
|
-
self.kwargs: Dict[str, Tuple[int, Any]] = {}
|
|
49
|
+
# - otherwise: (None, val)
|
|
50
|
+
self.args: List[Tuple[Optional[int], Optional[Any]]] = []
|
|
51
|
+
self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
|
|
52
52
|
|
|
53
53
|
# we record the types of non-variable parameters for runtime type checks
|
|
54
54
|
self.arg_types: List[ts.ColumnType] = []
|
|
@@ -62,7 +62,7 @@ class FunctionCall(Expr):
|
|
|
62
62
|
self.args.append((len(self.components), None))
|
|
63
63
|
self.components.append(arg.copy())
|
|
64
64
|
else:
|
|
65
|
-
self.args.append((
|
|
65
|
+
self.args.append((None, arg))
|
|
66
66
|
if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
67
67
|
self.arg_types.append(signature.parameters[param.name].col_type)
|
|
68
68
|
|
|
@@ -74,7 +74,7 @@ class FunctionCall(Expr):
|
|
|
74
74
|
self.kwargs[param_name] = (len(self.components), None)
|
|
75
75
|
self.components.append(arg.copy())
|
|
76
76
|
else:
|
|
77
|
-
self.kwargs[param_name] = (
|
|
77
|
+
self.kwargs[param_name] = (None, arg)
|
|
78
78
|
if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
79
79
|
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
80
80
|
|
|
@@ -215,12 +215,12 @@ class FunctionCall(Expr):
|
|
|
215
215
|
|
|
216
216
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
217
217
|
arg_strs = [
|
|
218
|
-
str(arg) if idx
|
|
218
|
+
str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
219
219
|
]
|
|
220
220
|
def print_arg(arg: Any) -> str:
|
|
221
221
|
return f"'{arg}'" if isinstance(arg, str) else str(arg)
|
|
222
222
|
arg_strs.extend([
|
|
223
|
-
f'{param_name}={print_arg(arg) if idx
|
|
223
|
+
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
224
224
|
for param_name, (idx, arg) in self.kwargs.items()
|
|
225
225
|
])
|
|
226
226
|
if len(self.order_by) > 0:
|
|
@@ -287,7 +287,7 @@ class FunctionCall(Expr):
|
|
|
287
287
|
"""Return args and kwargs, constructed for data_row"""
|
|
288
288
|
kwargs: Dict[str, Any] = {}
|
|
289
289
|
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
290
|
-
val = arg if component_idx
|
|
290
|
+
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
291
291
|
param = self.fn.signature.parameters[param_name]
|
|
292
292
|
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
293
293
|
# expand **kwargs parameter
|
|
@@ -298,7 +298,7 @@ class FunctionCall(Expr):
|
|
|
298
298
|
|
|
299
299
|
args: List[Any] = []
|
|
300
300
|
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
301
|
-
val = arg if component_idx
|
|
301
|
+
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
302
302
|
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
303
303
|
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
304
304
|
# expand *args parameter
|
|
@@ -333,7 +333,8 @@ class FunctionCall(Expr):
|
|
|
333
333
|
# TODO: can we get rid of this extra copy?
|
|
334
334
|
fn_expr = self.components[self.fn_expr_idx]
|
|
335
335
|
data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
|
|
336
|
-
elif isinstance(self.fn, func.CallableFunction):
|
|
336
|
+
elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
|
|
337
|
+
# optimization: avoid additional level of indirection we'd get from calling Function.exec()
|
|
337
338
|
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
338
339
|
elif self.is_window_fn_call:
|
|
339
340
|
if self.has_group_by():
|
|
@@ -348,9 +349,10 @@ class FunctionCall(Expr):
|
|
|
348
349
|
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
349
350
|
self.aggregator.update(*args)
|
|
350
351
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
351
|
-
|
|
352
|
-
assert self.is_agg_fn_call
|
|
352
|
+
elif self.is_agg_fn_call:
|
|
353
353
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
354
|
+
else:
|
|
355
|
+
data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
|
|
354
356
|
|
|
355
357
|
def _as_dict(self) -> Dict:
|
|
356
358
|
result = {
|
|
@@ -369,9 +371,9 @@ class FunctionCall(Expr):
|
|
|
369
371
|
# reassemble bound args
|
|
370
372
|
fn = func.Function.from_dict(d['fn'])
|
|
371
373
|
param_names = list(fn.signature.parameters.keys())
|
|
372
|
-
bound_args = {param_names[i]: arg if idx
|
|
374
|
+
bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
|
|
373
375
|
bound_args.update(
|
|
374
|
-
{param_name: val if idx
|
|
376
|
+
{param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
|
|
375
377
|
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
376
378
|
order_by_exprs = components[d['order_by_start_idx']:]
|
|
377
379
|
fn_call = cls(
|
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
4
|
|
|
4
5
|
import PIL
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.func as func
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
from .data_row import DataRow
|
|
7
12
|
from .expr import Expr
|
|
8
|
-
from .column_ref import ColumnRef
|
|
9
13
|
from .function_call import FunctionCall
|
|
10
|
-
from .image_similarity_predicate import ImageSimilarityPredicate
|
|
11
|
-
from .data_row import DataRow
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
|
-
import pixeltable.func as func
|
|
15
|
-
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.type_system as ts
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
# TODO: this doesn't dig up all attrs for actual jpeg images
|
|
@@ -43,9 +41,7 @@ class ImageMemberAccess(Expr):
|
|
|
43
41
|
attr_info = _create_pil_attr_info()
|
|
44
42
|
|
|
45
43
|
def __init__(self, member_name: str, caller: Expr):
|
|
46
|
-
if member_name
|
|
47
|
-
super().__init__(ts.InvalidType()) # requires FunctionCall to return value
|
|
48
|
-
elif member_name in self.attr_info:
|
|
44
|
+
if member_name in self.attr_info:
|
|
49
45
|
super().__init__(self.attr_info[member_name])
|
|
50
46
|
else:
|
|
51
47
|
candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
|
|
@@ -78,22 +74,8 @@ class ImageMemberAccess(Expr):
|
|
|
78
74
|
assert len(components) == 1
|
|
79
75
|
return cls(d['member_name'], components[0])
|
|
80
76
|
|
|
81
|
-
def __call__(self, *args, **kwargs) ->
|
|
82
|
-
|
|
83
|
-
call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
|
|
84
|
-
if self.member_name == 'nearest':
|
|
85
|
-
# - caller must be ColumnRef
|
|
86
|
-
# - signature is (Union[PIL.Image.Image, str])
|
|
87
|
-
if not isinstance(caller, ColumnRef):
|
|
88
|
-
raise excs.Error(f'nearest(): caller must be an image column')
|
|
89
|
-
if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
|
|
90
|
-
raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
|
|
91
|
-
return ImageSimilarityPredicate(
|
|
92
|
-
caller,
|
|
93
|
-
img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
|
|
94
|
-
text=args[0] if isinstance(args[0], str) else None)
|
|
95
|
-
|
|
96
|
-
result = self.img_method(*[caller, *args], **kwargs)
|
|
77
|
+
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
78
|
+
result = self.img_method(*[self._caller, *args], **kwargs)
|
|
97
79
|
result.is_method_call = True
|
|
98
80
|
return result
|
|
99
81
|
|
|
@@ -112,4 +94,3 @@ class ImageMemberAccess(Expr):
|
|
|
112
94
|
data_row[self.slot_idx] = getattr(caller_val, self.member_name)
|
|
113
95
|
except AttributeError:
|
|
114
96
|
data_row[self.slot_idx] = None
|
|
115
|
-
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple, Iterable
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .expr import Expr
|
|
10
|
+
from .predicate import Predicate
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InPredicate(Predicate):
|
|
15
|
+
"""Predicate corresponding to the SQL IN operator."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
|
|
18
|
+
assert (value_set_literal is None) != (value_set_expr is None)
|
|
19
|
+
if not lhs.col_type.is_scalar_type():
|
|
20
|
+
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
21
|
+
super().__init__()
|
|
22
|
+
|
|
23
|
+
self.value_list: Optional[list] = None # only contains values of the correct type
|
|
24
|
+
if value_set_expr is not None:
|
|
25
|
+
if not value_set_expr.col_type.is_json_type():
|
|
26
|
+
raise excs.Error(
|
|
27
|
+
f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
|
|
28
|
+
self.components = [lhs.copy(), value_set_expr.copy()]
|
|
29
|
+
else:
|
|
30
|
+
assert value_set_literal is not None
|
|
31
|
+
self.components = [lhs.copy()]
|
|
32
|
+
self.value_list = self._normalize_value_set(value_set_literal)
|
|
33
|
+
|
|
34
|
+
self.id = self._create_id()
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def _lhs(self) -> Expr:
|
|
38
|
+
return self.components[0]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def _value_set_expr(self) -> Expr:
|
|
42
|
+
assert len(self.components) == 2
|
|
43
|
+
return self.components[1]
|
|
44
|
+
|
|
45
|
+
def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
|
|
46
|
+
if not isinstance(value_set, Iterable):
|
|
47
|
+
raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
|
|
48
|
+
value_list = list(value_set)
|
|
49
|
+
if not filter_type_mismatches:
|
|
50
|
+
return value_list
|
|
51
|
+
|
|
52
|
+
# ignore elements of the wrong type
|
|
53
|
+
result = []
|
|
54
|
+
for val in value_list:
|
|
55
|
+
try:
|
|
56
|
+
self._lhs.col_type.validate_literal(val)
|
|
57
|
+
result.append(val)
|
|
58
|
+
except TypeError:
|
|
59
|
+
pass
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
def __str__(self) -> str:
|
|
63
|
+
if self.value_list is not None:
|
|
64
|
+
return f'{self.components[0]}.isin({self.value_list})'
|
|
65
|
+
return f'{self.components[0]}.isin({self.components[1]})'
|
|
66
|
+
|
|
67
|
+
def _equals(self, other: InPredicate) -> bool:
|
|
68
|
+
return self.value_list == other.value_list
|
|
69
|
+
|
|
70
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
71
|
+
return super()._id_attrs() + [('value_list', self.value_list)]
|
|
72
|
+
|
|
73
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
74
|
+
lhs_sql_exprs = self.components[0].sql_expr()
|
|
75
|
+
if lhs_sql_exprs is None or self.value_list is None:
|
|
76
|
+
return None
|
|
77
|
+
return lhs_sql_exprs.in_(self.value_list)
|
|
78
|
+
|
|
79
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
80
|
+
lhs_val = data_row[self._lhs.slot_idx]
|
|
81
|
+
if self.value_list is not None:
|
|
82
|
+
data_row[self.slot_idx] = lhs_val in self.value_list
|
|
83
|
+
else:
|
|
84
|
+
value_set = data_row[self._value_set_expr.slot_idx]
|
|
85
|
+
value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
|
|
86
|
+
data_row[self.slot_idx] = lhs_val in value_list
|
|
87
|
+
|
|
88
|
+
def _as_dict(self) -> Dict:
|
|
89
|
+
return {'value_list': self.value_list, **super()._as_dict()}
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
93
|
+
assert 'value_list' in d
|
|
94
|
+
assert len(components) <= 2
|
|
95
|
+
return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
|
|
96
|
+
|
pixeltable/exprs/inline_array.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import copy
|
|
4
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
4
5
|
|
|
5
|
-
import sqlalchemy as sql
|
|
6
6
|
import numpy as np
|
|
7
|
+
import sqlalchemy as sql
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
import pixeltable.type_system as ts
|
|
9
10
|
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
10
12
|
from .inline_dict import InlineDict
|
|
11
13
|
from .row_builder import RowBuilder
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
13
|
-
import pixeltable.type_system as ts
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class InlineArray(Expr):
|
|
@@ -27,8 +27,8 @@ class InlineArray(Expr):
|
|
|
27
27
|
|
|
28
28
|
# elements contains
|
|
29
29
|
# - for Expr elements: (index into components, None)
|
|
30
|
-
# - for non-Expr elements: (
|
|
31
|
-
self.elements: List[Tuple[int, Any]] = []
|
|
30
|
+
# - for non-Expr elements: (None, value)
|
|
31
|
+
self.elements: List[Tuple[Optional[int], Any]] = []
|
|
32
32
|
for el in elements:
|
|
33
33
|
el = copy.deepcopy(el)
|
|
34
34
|
if isinstance(el, list):
|
|
@@ -41,11 +41,11 @@ class InlineArray(Expr):
|
|
|
41
41
|
self.elements.append((len(self.components), None))
|
|
42
42
|
self.components.append(el)
|
|
43
43
|
else:
|
|
44
|
-
self.elements.append((
|
|
44
|
+
self.elements.append((None, el))
|
|
45
45
|
|
|
46
46
|
inferred_element_type = ts.InvalidType()
|
|
47
47
|
for idx, val in self.elements:
|
|
48
|
-
if idx
|
|
48
|
+
if idx is not None:
|
|
49
49
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
|
|
50
50
|
else:
|
|
51
51
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
|
|
@@ -83,7 +83,7 @@ class InlineArray(Expr):
|
|
|
83
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
84
84
|
result = [None] * len(self.elements)
|
|
85
85
|
for i, (child_idx, val) in enumerate(self.elements):
|
|
86
|
-
if child_idx
|
|
86
|
+
if child_idx is not None:
|
|
87
87
|
result[i] = data_row[self.components[child_idx].slot_idx]
|
|
88
88
|
else:
|
|
89
89
|
result[i] = copy.deepcopy(val)
|
|
@@ -100,7 +100,9 @@ class InlineArray(Expr):
|
|
|
100
100
|
assert 'elements' in d
|
|
101
101
|
arg: List[Any] = []
|
|
102
102
|
for idx, val in d['elements']:
|
|
103
|
-
|
|
103
|
+
# TODO Normalize idx -1 to None via schema migrations.
|
|
104
|
+
# Long-term we should not be allowing idx == -1.
|
|
105
|
+
if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
|
|
104
106
|
arg.append(components[idx])
|
|
105
107
|
else:
|
|
106
108
|
arg.append(val)
|
pixeltable/exprs/inline_dict.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import copy
|
|
4
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
7
|
-
from .expr import Expr
|
|
8
|
-
from .data_row import DataRow
|
|
9
|
-
from .row_builder import RowBuilder
|
|
10
8
|
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.catalog as catalog
|
|
12
9
|
import pixeltable.type_system as ts
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class InlineDict(Expr):
|
|
@@ -21,8 +21,8 @@ class InlineDict(Expr):
|
|
|
21
21
|
super().__init__(ts.JsonType()) # we need to call this in order to populate self.components
|
|
22
22
|
# dict_items contains
|
|
23
23
|
# - for Expr fields: (key, index into components, None)
|
|
24
|
-
# - for non-Expr fields: (key,
|
|
25
|
-
self.dict_items: List[Tuple[str, int, Any]] = []
|
|
24
|
+
# - for non-Expr fields: (key, None, value)
|
|
25
|
+
self.dict_items: List[Tuple[str, Optional[int], Any]] = []
|
|
26
26
|
for key, val in d.items():
|
|
27
27
|
if not isinstance(key, str):
|
|
28
28
|
raise excs.Error(f'Dictionary requires string keys, {key} has type {type(key)}')
|
|
@@ -35,11 +35,11 @@ class InlineDict(Expr):
|
|
|
35
35
|
self.dict_items.append((key, len(self.components), None))
|
|
36
36
|
self.components.append(val)
|
|
37
37
|
else:
|
|
38
|
-
self.dict_items.append((key,
|
|
38
|
+
self.dict_items.append((key, None, val))
|
|
39
39
|
|
|
40
40
|
self.type_spec: Optional[Dict[str, ts.ColumnType]] = {}
|
|
41
41
|
for key, idx, _ in self.dict_items:
|
|
42
|
-
if idx
|
|
42
|
+
if idx is None:
|
|
43
43
|
# TODO: implement type inference for values
|
|
44
44
|
self.type_spec = None
|
|
45
45
|
break
|
|
@@ -56,7 +56,7 @@ class InlineDict(Expr):
|
|
|
56
56
|
return f"'{val}'"
|
|
57
57
|
return str(val)
|
|
58
58
|
for key, idx, val in self.dict_items:
|
|
59
|
-
if idx
|
|
59
|
+
if idx is not None:
|
|
60
60
|
item_strs.append(f"'{key}': {str(self.components[i])}")
|
|
61
61
|
i += 1
|
|
62
62
|
else:
|
|
@@ -71,7 +71,7 @@ class InlineDict(Expr):
|
|
|
71
71
|
|
|
72
72
|
def to_dict(self) -> Dict[str, Any]:
|
|
73
73
|
"""Return the original dict used to construct this"""
|
|
74
|
-
return {key: val if idx
|
|
74
|
+
return {key: val if idx is None else self.components[idx] for key, idx, val in self.dict_items}
|
|
75
75
|
|
|
76
76
|
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
77
77
|
return None
|
|
@@ -80,7 +80,7 @@ class InlineDict(Expr):
|
|
|
80
80
|
result = {}
|
|
81
81
|
for key, idx, val in self.dict_items:
|
|
82
82
|
assert isinstance(key, str)
|
|
83
|
-
if idx
|
|
83
|
+
if idx is not None:
|
|
84
84
|
result[key] = data_row[self.components[idx].slot_idx]
|
|
85
85
|
else:
|
|
86
86
|
result[key] = copy.deepcopy(val)
|
|
@@ -94,7 +94,9 @@ class InlineDict(Expr):
|
|
|
94
94
|
assert 'dict_items' in d
|
|
95
95
|
arg: Dict[str, Any] = {}
|
|
96
96
|
for key, idx, val in d['dict_items']:
|
|
97
|
-
|
|
97
|
+
# TODO Normalize idx -1 to None via schema migrations.
|
|
98
|
+
# Long-term we should not be allowing idx == -1.
|
|
99
|
+
if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
|
|
98
100
|
arg[key] = components[idx]
|
|
99
101
|
else:
|
|
100
102
|
arg[key] = val
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
2
4
|
from typing import Optional, List, Any, Dict, Tuple
|
|
3
5
|
|
|
4
6
|
import sqlalchemy as sql
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.type_system as ts
|
|
7
10
|
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
8
12
|
from .row_builder import RowBuilder
|
|
9
|
-
|
|
10
|
-
import pixeltable.type_system as ts
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
class Literal(Expr):
|
|
13
16
|
def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
|
|
@@ -46,9 +49,18 @@ class Literal(Expr):
|
|
|
46
49
|
data_row[self.slot_idx] = self.val
|
|
47
50
|
|
|
48
51
|
def _as_dict(self) -> Dict:
|
|
49
|
-
|
|
52
|
+
# For some types, we need to explictly record their type, because JSON does not know
|
|
53
|
+
# how to interpret them unambiguously
|
|
54
|
+
if self.col_type.is_timestamp_type():
|
|
55
|
+
return {'val': self.val.isoformat(), 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
56
|
+
else:
|
|
57
|
+
return {'val': self.val, **super()._as_dict()}
|
|
50
58
|
|
|
51
59
|
@classmethod
|
|
52
60
|
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
53
61
|
assert 'val' in d
|
|
62
|
+
if 'val_t' in d:
|
|
63
|
+
val_t = d['val_t']
|
|
64
|
+
assert val_t == ts.ColumnType.Type.TIMESTAMP.name
|
|
65
|
+
return cls(datetime.datetime.fromisoformat(d['val']))
|
|
54
66
|
return cls(d['val'])
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -54,14 +54,14 @@ class RowBuilder:
|
|
|
54
54
|
target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
|
|
55
55
|
|
|
56
56
|
def __init__(
|
|
57
|
-
self, output_exprs: List[Expr], columns: List[catalog.Column],
|
|
58
|
-
indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
|
|
57
|
+
self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
|
|
59
58
|
):
|
|
60
59
|
"""
|
|
61
60
|
Args:
|
|
62
61
|
output_exprs: list of Exprs to be evaluated
|
|
63
62
|
columns: list of columns to be materialized
|
|
64
|
-
|
|
63
|
+
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
64
|
+
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
65
65
|
"""
|
|
66
66
|
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
67
67
|
self.next_slot_idx = 0
|
|
@@ -73,7 +73,6 @@ class RowBuilder:
|
|
|
73
73
|
# output exprs: all exprs the caller wants to materialize
|
|
74
74
|
# - explicitly requested output_exprs
|
|
75
75
|
# - values for computed columns
|
|
76
|
-
# - embedding values for indices
|
|
77
76
|
resolve_cols = set(columns)
|
|
78
77
|
self.output_exprs = [
|
|
79
78
|
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
@@ -97,21 +96,6 @@ class RowBuilder:
|
|
|
97
96
|
ref = self._record_unique_expr(ref, recursive=False)
|
|
98
97
|
self.add_table_column(col, ref.slot_idx)
|
|
99
98
|
|
|
100
|
-
# record indices; indexed by slot_idx
|
|
101
|
-
self.index_columns: List[catalog.Column] = []
|
|
102
|
-
for col, embedding_fn in indices:
|
|
103
|
-
# we assume that the parameter of the embedding function is a ref to an image column
|
|
104
|
-
assert col.col_type.is_image_type()
|
|
105
|
-
# construct expr to compute embedding; explicitly resize images to the required size
|
|
106
|
-
target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
|
|
107
|
-
expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
|
|
108
|
-
expr = self._record_unique_expr(expr, recursive=True)
|
|
109
|
-
self.output_exprs.append(expr)
|
|
110
|
-
if len(self.index_columns) <= expr.slot_idx:
|
|
111
|
-
# pad to slot_idx
|
|
112
|
-
self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
|
|
113
|
-
self.index_columns[expr.slot_idx] = col
|
|
114
|
-
|
|
115
99
|
# default eval ctx: all output exprs
|
|
116
100
|
self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
|
|
117
101
|
|
|
@@ -170,13 +154,6 @@ class RowBuilder:
|
|
|
170
154
|
"""Return ColumnSlotIdx for output columns"""
|
|
171
155
|
return self.table_columns
|
|
172
156
|
|
|
173
|
-
def index_slot_idxs(self) -> List[ColumnSlotIdx]:
|
|
174
|
-
"""Return ColumnSlotIdx for index columns"""
|
|
175
|
-
return [
|
|
176
|
-
ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
|
|
177
|
-
if self.output_columns[i] is not None
|
|
178
|
-
]
|
|
179
|
-
|
|
180
157
|
@property
|
|
181
158
|
def num_materialized(self) -> int:
|
|
182
159
|
return self.next_slot_idx
|
|
@@ -204,12 +181,16 @@ class RowBuilder:
|
|
|
204
181
|
for i, c in enumerate(expr.components):
|
|
205
182
|
# make sure we only refer to components that have themselves been recorded
|
|
206
183
|
expr.components[i] = self._record_unique_expr(c, True)
|
|
207
|
-
assert expr.slot_idx
|
|
184
|
+
assert expr.slot_idx is None
|
|
208
185
|
expr.slot_idx = self._next_slot_idx()
|
|
209
186
|
self.unique_exprs.append(expr)
|
|
210
187
|
return expr
|
|
211
188
|
|
|
212
189
|
def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
|
|
190
|
+
assert e.slot_idx is not None
|
|
191
|
+
assert output_expr_id is not None
|
|
192
|
+
if e.slot_idx in self.input_expr_slot_idxs:
|
|
193
|
+
return
|
|
213
194
|
self.output_expr_ids[e.slot_idx].add(output_expr_id)
|
|
214
195
|
for d in e.dependencies():
|
|
215
196
|
self._record_output_expr_id(d, output_expr_id)
|
|
@@ -334,22 +315,15 @@ class RowBuilder:
|
|
|
334
315
|
exc = data_row.get_exc(slot_idx)
|
|
335
316
|
num_excs += 1
|
|
336
317
|
exc_col_ids.add(col.id)
|
|
337
|
-
table_row[col.
|
|
338
|
-
table_row[col.
|
|
339
|
-
table_row[col.
|
|
318
|
+
table_row[col.store_name()] = None
|
|
319
|
+
table_row[col.errortype_store_name()] = type(exc).__name__
|
|
320
|
+
table_row[col.errormsg_store_name()] = str(exc)
|
|
340
321
|
else:
|
|
341
|
-
val = data_row.get_stored_val(slot_idx)
|
|
342
|
-
table_row[col.
|
|
322
|
+
val = data_row.get_stored_val(slot_idx, col.sa_col.type)
|
|
323
|
+
table_row[col.store_name()] = val
|
|
343
324
|
# we unfortunately need to set these, even if there are no errors
|
|
344
|
-
table_row[col.
|
|
345
|
-
table_row[col.
|
|
346
|
-
|
|
347
|
-
for slot_idx, col in enumerate(self.index_columns):
|
|
348
|
-
if col is None:
|
|
349
|
-
continue
|
|
350
|
-
# don't use get_stored_val() here, we need to pass in the ndarray
|
|
351
|
-
val = data_row[slot_idx]
|
|
352
|
-
table_row[col.index_storage_name()] = val
|
|
325
|
+
table_row[col.errortype_store_name()] = None
|
|
326
|
+
table_row[col.errormsg_store_name()] = None
|
|
353
327
|
|
|
354
328
|
return table_row, num_excs
|
|
355
329
|
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
import PIL.Image
|
|
5
|
+
|
|
6
|
+
import pixeltable.exceptions as excs
|
|
7
|
+
import pixeltable.type_system as ts
|
|
8
|
+
from .column_ref import ColumnRef
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .expr import Expr
|
|
11
|
+
from .literal import Literal
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimilarityExpr(Expr):
|
|
16
|
+
|
|
17
|
+
def __init__(self, col_ref: ColumnRef, item: Expr):
|
|
18
|
+
super().__init__(ts.FloatType())
|
|
19
|
+
self.components = [col_ref, item]
|
|
20
|
+
self.id = self._create_id()
|
|
21
|
+
assert isinstance(item, Literal)
|
|
22
|
+
assert item.col_type.is_string_type() or item.col_type.is_image_type()
|
|
23
|
+
|
|
24
|
+
# determine index to use
|
|
25
|
+
idx_info = col_ref.col.get_idx_info()
|
|
26
|
+
if len(idx_info) == 0:
|
|
27
|
+
raise excs.Error(f'No index found for column {col_ref.col}')
|
|
28
|
+
if len(idx_info) > 1:
|
|
29
|
+
raise excs.Error(
|
|
30
|
+
f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
|
|
31
|
+
f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
|
|
32
|
+
self.idx_info = next(iter(idx_info.values()))
|
|
33
|
+
idx = self.idx_info.idx
|
|
34
|
+
|
|
35
|
+
if item.col_type.is_string_type() and idx.txt_embed is None:
|
|
36
|
+
raise excs.Error(
|
|
37
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
38
|
+
f'text_embed parameter and does not support text queries')
|
|
39
|
+
if item.col_type.is_image_type() and idx.img_embed is None:
|
|
40
|
+
raise excs.Error(
|
|
41
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
42
|
+
f'img_embed parameter and does not support image queries')
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
46
|
+
|
|
47
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
48
|
+
assert isinstance(self.components[1], Literal)
|
|
49
|
+
item = self.components[1].val
|
|
50
|
+
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
51
|
+
|
|
52
|
+
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
|
|
53
|
+
assert isinstance(self.components[1], Literal)
|
|
54
|
+
item = self.components[1].val
|
|
55
|
+
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
56
|
+
|
|
57
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
58
|
+
# this should never get called
|
|
59
|
+
assert False
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
|
|
63
|
+
assert len(components) == 2
|
|
64
|
+
assert isinstance(components[0], ColumnRef)
|
|
65
|
+
return cls(components[0], components[1])
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extended integrations for Pixeltable. This package contains experimental or demonstration features that
|
|
3
|
+
are not intended for production use. Long-term support cannot be guaranteed, usually because the features
|
|
4
|
+
have dependencies whose future support is unclear.
|
|
5
|
+
"""
|