pixeltable 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +18 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +9 -5
- pixeltable/catalog/insertable_table.py +0 -2
- pixeltable/catalog/table.py +16 -8
- pixeltable/catalog/table_version.py +3 -2
- pixeltable/dataframe.py +184 -110
- pixeltable/env.py +69 -18
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +9 -0
- pixeltable/exprs/expr.py +15 -7
- pixeltable/exprs/function_call.py +17 -15
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +3 -0
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +11 -2
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +6 -10
- pixeltable/functions/huggingface.py +23 -4
- pixeltable/functions/openai.py +34 -1
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +21 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/base.py +3 -1
- pixeltable/index/embedding_index.py +87 -14
- pixeltable/io/__init__.py +3 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +4 -0
- pixeltable/iterators/document.py +218 -97
- pixeltable/iterators/video.py +8 -9
- pixeltable/metadata/__init__.py +7 -3
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/plan.py +2 -19
- pixeltable/store.py +2 -2
- pixeltable/tool/create_test_db_dump.py +32 -13
- pixeltable/type_system.py +13 -54
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/METADATA +10 -7
- pixeltable-0.2.6.dist-info/RECORD +119 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
4
|
|
|
4
5
|
import PIL
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.func as func
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
from .data_row import DataRow
|
|
7
12
|
from .expr import Expr
|
|
8
|
-
from .column_ref import ColumnRef
|
|
9
13
|
from .function_call import FunctionCall
|
|
10
|
-
from .image_similarity_predicate import ImageSimilarityPredicate
|
|
11
|
-
from .data_row import DataRow
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
|
-
import pixeltable.func as func
|
|
15
|
-
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.type_system as ts
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
# TODO: this doesn't dig up all attrs for actual jpeg images
|
|
@@ -43,9 +41,7 @@ class ImageMemberAccess(Expr):
|
|
|
43
41
|
attr_info = _create_pil_attr_info()
|
|
44
42
|
|
|
45
43
|
def __init__(self, member_name: str, caller: Expr):
|
|
46
|
-
if member_name
|
|
47
|
-
super().__init__(ts.InvalidType()) # requires FunctionCall to return value
|
|
48
|
-
elif member_name in self.attr_info:
|
|
44
|
+
if member_name in self.attr_info:
|
|
49
45
|
super().__init__(self.attr_info[member_name])
|
|
50
46
|
else:
|
|
51
47
|
candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
|
|
@@ -78,22 +74,8 @@ class ImageMemberAccess(Expr):
|
|
|
78
74
|
assert len(components) == 1
|
|
79
75
|
return cls(d['member_name'], components[0])
|
|
80
76
|
|
|
81
|
-
def __call__(self, *args, **kwargs) ->
|
|
82
|
-
|
|
83
|
-
call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
|
|
84
|
-
if self.member_name == 'nearest':
|
|
85
|
-
# - caller must be ColumnRef
|
|
86
|
-
# - signature is (Union[PIL.Image.Image, str])
|
|
87
|
-
if not isinstance(caller, ColumnRef):
|
|
88
|
-
raise excs.Error(f'nearest(): caller must be an image column')
|
|
89
|
-
if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
|
|
90
|
-
raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
|
|
91
|
-
return ImageSimilarityPredicate(
|
|
92
|
-
caller,
|
|
93
|
-
img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
|
|
94
|
-
text=args[0] if isinstance(args[0], str) else None)
|
|
95
|
-
|
|
96
|
-
result = self.img_method(*[caller, *args], **kwargs)
|
|
77
|
+
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
78
|
+
result = self.img_method(*[self._caller, *args], **kwargs)
|
|
97
79
|
result.is_method_call = True
|
|
98
80
|
return result
|
|
99
81
|
|
|
@@ -112,4 +94,3 @@ class ImageMemberAccess(Expr):
|
|
|
112
94
|
data_row[self.slot_idx] = getattr(caller_val, self.member_name)
|
|
113
95
|
except AttributeError:
|
|
114
96
|
data_row[self.slot_idx] = None
|
|
115
|
-
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List, Any, Dict, Tuple, Iterable
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .expr import Expr
|
|
10
|
+
from .predicate import Predicate
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InPredicate(Predicate):
|
|
15
|
+
"""Predicate corresponding to the SQL IN operator."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
|
|
18
|
+
assert (value_set_literal is None) != (value_set_expr is None)
|
|
19
|
+
if not lhs.col_type.is_scalar_type():
|
|
20
|
+
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
21
|
+
super().__init__()
|
|
22
|
+
|
|
23
|
+
self.value_list: Optional[list] = None # only contains values of the correct type
|
|
24
|
+
if value_set_expr is not None:
|
|
25
|
+
if not value_set_expr.col_type.is_json_type():
|
|
26
|
+
raise excs.Error(
|
|
27
|
+
f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
|
|
28
|
+
self.components = [lhs.copy(), value_set_expr.copy()]
|
|
29
|
+
else:
|
|
30
|
+
assert value_set_literal is not None
|
|
31
|
+
self.components = [lhs.copy()]
|
|
32
|
+
self.value_list = self._normalize_value_set(value_set_literal)
|
|
33
|
+
|
|
34
|
+
self.id = self._create_id()
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def _lhs(self) -> Expr:
|
|
38
|
+
return self.components[0]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def _value_set_expr(self) -> Expr:
|
|
42
|
+
assert len(self.components) == 2
|
|
43
|
+
return self.components[1]
|
|
44
|
+
|
|
45
|
+
def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
|
|
46
|
+
if not isinstance(value_set, Iterable):
|
|
47
|
+
raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
|
|
48
|
+
value_list = list(value_set)
|
|
49
|
+
if not filter_type_mismatches:
|
|
50
|
+
return value_list
|
|
51
|
+
|
|
52
|
+
# ignore elements of the wrong type
|
|
53
|
+
result = []
|
|
54
|
+
for val in value_list:
|
|
55
|
+
try:
|
|
56
|
+
self._lhs.col_type.validate_literal(val)
|
|
57
|
+
result.append(val)
|
|
58
|
+
except TypeError:
|
|
59
|
+
pass
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
def __str__(self) -> str:
|
|
63
|
+
if self.value_list is not None:
|
|
64
|
+
return f'{self.components[0]}.isin({self.value_list})'
|
|
65
|
+
return f'{self.components[0]}.isin({self.components[1]})'
|
|
66
|
+
|
|
67
|
+
def _equals(self, other: InPredicate) -> bool:
|
|
68
|
+
return self.value_list == other.value_list
|
|
69
|
+
|
|
70
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
71
|
+
return super()._id_attrs() + [('value_list', self.value_list)]
|
|
72
|
+
|
|
73
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
74
|
+
lhs_sql_exprs = self.components[0].sql_expr()
|
|
75
|
+
if lhs_sql_exprs is None or self.value_list is None:
|
|
76
|
+
return None
|
|
77
|
+
return lhs_sql_exprs.in_(self.value_list)
|
|
78
|
+
|
|
79
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
80
|
+
lhs_val = data_row[self._lhs.slot_idx]
|
|
81
|
+
if self.value_list is not None:
|
|
82
|
+
data_row[self.slot_idx] = lhs_val in self.value_list
|
|
83
|
+
else:
|
|
84
|
+
value_set = data_row[self._value_set_expr.slot_idx]
|
|
85
|
+
value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
|
|
86
|
+
data_row[self.slot_idx] = lhs_val in value_list
|
|
87
|
+
|
|
88
|
+
def _as_dict(self) -> Dict:
|
|
89
|
+
return {'value_list': self.value_list, **super()._as_dict()}
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
93
|
+
assert 'value_list' in d
|
|
94
|
+
assert len(components) <= 2
|
|
95
|
+
return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
|
|
96
|
+
|
pixeltable/exprs/inline_array.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import copy
|
|
4
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
4
5
|
|
|
5
|
-
import sqlalchemy as sql
|
|
6
6
|
import numpy as np
|
|
7
|
+
import sqlalchemy as sql
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
import pixeltable.type_system as ts
|
|
9
10
|
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
10
12
|
from .inline_dict import InlineDict
|
|
11
13
|
from .row_builder import RowBuilder
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
13
|
-
import pixeltable.type_system as ts
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class InlineArray(Expr):
|
|
@@ -27,8 +27,8 @@ class InlineArray(Expr):
|
|
|
27
27
|
|
|
28
28
|
# elements contains
|
|
29
29
|
# - for Expr elements: (index into components, None)
|
|
30
|
-
# - for non-Expr elements: (
|
|
31
|
-
self.elements: List[Tuple[int, Any]] = []
|
|
30
|
+
# - for non-Expr elements: (None, value)
|
|
31
|
+
self.elements: List[Tuple[Optional[int], Any]] = []
|
|
32
32
|
for el in elements:
|
|
33
33
|
el = copy.deepcopy(el)
|
|
34
34
|
if isinstance(el, list):
|
|
@@ -41,11 +41,11 @@ class InlineArray(Expr):
|
|
|
41
41
|
self.elements.append((len(self.components), None))
|
|
42
42
|
self.components.append(el)
|
|
43
43
|
else:
|
|
44
|
-
self.elements.append((
|
|
44
|
+
self.elements.append((None, el))
|
|
45
45
|
|
|
46
46
|
inferred_element_type = ts.InvalidType()
|
|
47
47
|
for idx, val in self.elements:
|
|
48
|
-
if idx
|
|
48
|
+
if idx is not None:
|
|
49
49
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
|
|
50
50
|
else:
|
|
51
51
|
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
|
|
@@ -83,7 +83,7 @@ class InlineArray(Expr):
|
|
|
83
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
84
84
|
result = [None] * len(self.elements)
|
|
85
85
|
for i, (child_idx, val) in enumerate(self.elements):
|
|
86
|
-
if child_idx
|
|
86
|
+
if child_idx is not None:
|
|
87
87
|
result[i] = data_row[self.components[child_idx].slot_idx]
|
|
88
88
|
else:
|
|
89
89
|
result[i] = copy.deepcopy(val)
|
|
@@ -100,7 +100,9 @@ class InlineArray(Expr):
|
|
|
100
100
|
assert 'elements' in d
|
|
101
101
|
arg: List[Any] = []
|
|
102
102
|
for idx, val in d['elements']:
|
|
103
|
-
|
|
103
|
+
# TODO Normalize idx -1 to None via schema migrations.
|
|
104
|
+
# Long-term we should not be allowing idx == -1.
|
|
105
|
+
if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
|
|
104
106
|
arg.append(components[idx])
|
|
105
107
|
else:
|
|
106
108
|
arg.append(val)
|
pixeltable/exprs/inline_dict.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import copy
|
|
4
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
7
|
-
from .expr import Expr
|
|
8
|
-
from .data_row import DataRow
|
|
9
|
-
from .row_builder import RowBuilder
|
|
10
8
|
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.catalog as catalog
|
|
12
9
|
import pixeltable.type_system as ts
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class InlineDict(Expr):
|
|
@@ -21,8 +21,8 @@ class InlineDict(Expr):
|
|
|
21
21
|
super().__init__(ts.JsonType()) # we need to call this in order to populate self.components
|
|
22
22
|
# dict_items contains
|
|
23
23
|
# - for Expr fields: (key, index into components, None)
|
|
24
|
-
# - for non-Expr fields: (key,
|
|
25
|
-
self.dict_items: List[Tuple[str, int, Any]] = []
|
|
24
|
+
# - for non-Expr fields: (key, None, value)
|
|
25
|
+
self.dict_items: List[Tuple[str, Optional[int], Any]] = []
|
|
26
26
|
for key, val in d.items():
|
|
27
27
|
if not isinstance(key, str):
|
|
28
28
|
raise excs.Error(f'Dictionary requires string keys, {key} has type {type(key)}')
|
|
@@ -35,11 +35,11 @@ class InlineDict(Expr):
|
|
|
35
35
|
self.dict_items.append((key, len(self.components), None))
|
|
36
36
|
self.components.append(val)
|
|
37
37
|
else:
|
|
38
|
-
self.dict_items.append((key,
|
|
38
|
+
self.dict_items.append((key, None, val))
|
|
39
39
|
|
|
40
40
|
self.type_spec: Optional[Dict[str, ts.ColumnType]] = {}
|
|
41
41
|
for key, idx, _ in self.dict_items:
|
|
42
|
-
if idx
|
|
42
|
+
if idx is None:
|
|
43
43
|
# TODO: implement type inference for values
|
|
44
44
|
self.type_spec = None
|
|
45
45
|
break
|
|
@@ -56,7 +56,7 @@ class InlineDict(Expr):
|
|
|
56
56
|
return f"'{val}'"
|
|
57
57
|
return str(val)
|
|
58
58
|
for key, idx, val in self.dict_items:
|
|
59
|
-
if idx
|
|
59
|
+
if idx is not None:
|
|
60
60
|
item_strs.append(f"'{key}': {str(self.components[i])}")
|
|
61
61
|
i += 1
|
|
62
62
|
else:
|
|
@@ -71,7 +71,7 @@ class InlineDict(Expr):
|
|
|
71
71
|
|
|
72
72
|
def to_dict(self) -> Dict[str, Any]:
|
|
73
73
|
"""Return the original dict used to construct this"""
|
|
74
|
-
return {key: val if idx
|
|
74
|
+
return {key: val if idx is None else self.components[idx] for key, idx, val in self.dict_items}
|
|
75
75
|
|
|
76
76
|
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
77
77
|
return None
|
|
@@ -80,7 +80,7 @@ class InlineDict(Expr):
|
|
|
80
80
|
result = {}
|
|
81
81
|
for key, idx, val in self.dict_items:
|
|
82
82
|
assert isinstance(key, str)
|
|
83
|
-
if idx
|
|
83
|
+
if idx is not None:
|
|
84
84
|
result[key] = data_row[self.components[idx].slot_idx]
|
|
85
85
|
else:
|
|
86
86
|
result[key] = copy.deepcopy(val)
|
|
@@ -94,7 +94,9 @@ class InlineDict(Expr):
|
|
|
94
94
|
assert 'dict_items' in d
|
|
95
95
|
arg: Dict[str, Any] = {}
|
|
96
96
|
for key, idx, val in d['dict_items']:
|
|
97
|
-
|
|
97
|
+
# TODO Normalize idx -1 to None via schema migrations.
|
|
98
|
+
# Long-term we should not be allowing idx == -1.
|
|
99
|
+
if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
|
|
98
100
|
arg[key] = components[idx]
|
|
99
101
|
else:
|
|
100
102
|
arg[key] = val
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -60,6 +60,8 @@ class RowBuilder:
|
|
|
60
60
|
Args:
|
|
61
61
|
output_exprs: list of Exprs to be evaluated
|
|
62
62
|
columns: list of columns to be materialized
|
|
63
|
+
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
64
|
+
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
63
65
|
"""
|
|
64
66
|
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
65
67
|
self.next_slot_idx = 0
|
|
@@ -179,12 +181,16 @@ class RowBuilder:
|
|
|
179
181
|
for i, c in enumerate(expr.components):
|
|
180
182
|
# make sure we only refer to components that have themselves been recorded
|
|
181
183
|
expr.components[i] = self._record_unique_expr(c, True)
|
|
182
|
-
assert expr.slot_idx
|
|
184
|
+
assert expr.slot_idx is None
|
|
183
185
|
expr.slot_idx = self._next_slot_idx()
|
|
184
186
|
self.unique_exprs.append(expr)
|
|
185
187
|
return expr
|
|
186
188
|
|
|
187
189
|
def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
|
|
190
|
+
assert e.slot_idx is not None
|
|
191
|
+
assert output_expr_id is not None
|
|
192
|
+
if e.slot_idx in self.input_expr_slot_idxs:
|
|
193
|
+
return
|
|
188
194
|
self.output_expr_ids[e.slot_idx].add(output_expr_id)
|
|
189
195
|
for d in e.dependencies():
|
|
190
196
|
self._record_output_expr_id(d, output_expr_id)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
import PIL.Image
|
|
5
|
+
|
|
6
|
+
import pixeltable.exceptions as excs
|
|
7
|
+
import pixeltable.type_system as ts
|
|
8
|
+
from .column_ref import ColumnRef
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .expr import Expr
|
|
11
|
+
from .literal import Literal
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimilarityExpr(Expr):
|
|
16
|
+
|
|
17
|
+
def __init__(self, col_ref: ColumnRef, item: Expr):
|
|
18
|
+
super().__init__(ts.FloatType())
|
|
19
|
+
self.components = [col_ref, item]
|
|
20
|
+
self.id = self._create_id()
|
|
21
|
+
assert isinstance(item, Literal)
|
|
22
|
+
assert item.col_type.is_string_type() or item.col_type.is_image_type()
|
|
23
|
+
|
|
24
|
+
# determine index to use
|
|
25
|
+
idx_info = col_ref.col.get_idx_info()
|
|
26
|
+
if len(idx_info) == 0:
|
|
27
|
+
raise excs.Error(f'No index found for column {col_ref.col}')
|
|
28
|
+
if len(idx_info) > 1:
|
|
29
|
+
raise excs.Error(
|
|
30
|
+
f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
|
|
31
|
+
f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
|
|
32
|
+
self.idx_info = next(iter(idx_info.values()))
|
|
33
|
+
idx = self.idx_info.idx
|
|
34
|
+
|
|
35
|
+
if item.col_type.is_string_type() and idx.txt_embed is None:
|
|
36
|
+
raise excs.Error(
|
|
37
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
38
|
+
f'text_embed parameter and does not support text queries')
|
|
39
|
+
if item.col_type.is_image_type() and idx.img_embed is None:
|
|
40
|
+
raise excs.Error(
|
|
41
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
42
|
+
f'img_embed parameter and does not support image queries')
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
46
|
+
|
|
47
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
48
|
+
assert isinstance(self.components[1], Literal)
|
|
49
|
+
item = self.components[1].val
|
|
50
|
+
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
51
|
+
|
|
52
|
+
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
|
|
53
|
+
assert isinstance(self.components[1], Literal)
|
|
54
|
+
item = self.components[1].val
|
|
55
|
+
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
56
|
+
|
|
57
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
58
|
+
# this should never get called
|
|
59
|
+
assert False
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
|
|
63
|
+
assert len(components) == 2
|
|
64
|
+
assert isinstance(components[0], ColumnRef)
|
|
65
|
+
return cls(components[0], components[1])
|
pixeltable/func/__init__.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from .aggregate_function import Aggregator, AggregateFunction, uda
|
|
2
|
-
from .batched_function import BatchedFunction, ExplicitBatchedFunction
|
|
3
2
|
from .callable_function import CallableFunction
|
|
4
3
|
from .expr_template_function import ExprTemplateFunction
|
|
5
4
|
from .function import Function
|
|
6
5
|
from .function_registry import FunctionRegistry
|
|
7
|
-
from .nos_function import NOSFunction
|
|
8
6
|
from .signature import Signature, Parameter, Batch
|
|
9
7
|
from .udf import udf, make_function, expr_udf
|
|
@@ -72,6 +72,9 @@ class AggregateFunction(Function):
|
|
|
72
72
|
if param.lower() in self.RESERVED_PARAMS:
|
|
73
73
|
raise excs.Error(f'{self.name}(): parameter name {param} is reserved')
|
|
74
74
|
|
|
75
|
+
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
76
|
+
raise NotImplementedError
|
|
77
|
+
|
|
75
78
|
def help_str(self) -> str:
|
|
76
79
|
res = super().help_str()
|
|
77
80
|
res += '\n\n' + inspect.getdoc(self.agg_cls.update)
|
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
import
|
|
5
|
-
from typing import Optional, Dict, Callable, List, Tuple
|
|
4
|
+
from typing import Optional, Callable, Tuple, Any
|
|
6
5
|
from uuid import UUID
|
|
6
|
+
|
|
7
7
|
import cloudpickle
|
|
8
8
|
|
|
9
|
-
import pixeltable.type_system as ts
|
|
10
|
-
import pixeltable.exceptions as excs
|
|
11
9
|
from .function import Function
|
|
12
|
-
from .function_registry import FunctionRegistry
|
|
13
|
-
from .globals import get_caller_module_path
|
|
14
10
|
from .signature import Signature
|
|
15
11
|
|
|
16
12
|
|
|
@@ -24,13 +20,48 @@ class CallableFunction(Function):
|
|
|
24
20
|
|
|
25
21
|
def __init__(
|
|
26
22
|
self, signature: Signature, py_fn: Callable, self_path: Optional[str] = None,
|
|
27
|
-
self_name: Optional[str] = None):
|
|
23
|
+
self_name: Optional[str] = None, batch_size: Optional[int] = None):
|
|
28
24
|
assert py_fn is not None
|
|
29
25
|
self.py_fn = py_fn
|
|
30
26
|
self.self_name = self_name
|
|
27
|
+
self.batch_size = batch_size
|
|
31
28
|
py_signature = inspect.signature(self.py_fn)
|
|
32
29
|
super().__init__(signature, py_signature, self_path=self_path)
|
|
33
30
|
|
|
31
|
+
@property
|
|
32
|
+
def is_batched(self) -> bool:
|
|
33
|
+
return self.batch_size is not None
|
|
34
|
+
|
|
35
|
+
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
36
|
+
if self.is_batched:
|
|
37
|
+
# Pack the batched parameters into singleton lists
|
|
38
|
+
constant_param_names = [p.name for p in self.signature.constant_parameters]
|
|
39
|
+
batched_args = [[arg] for arg in args]
|
|
40
|
+
constant_kwargs = {k: v for k, v in kwargs.items() if k in constant_param_names}
|
|
41
|
+
batched_kwargs = {k: [v] for k, v in kwargs.items() if k not in constant_param_names}
|
|
42
|
+
result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
|
|
43
|
+
assert len(result) == 1
|
|
44
|
+
return result[0]
|
|
45
|
+
else:
|
|
46
|
+
return self.py_fn(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
def exec_batch(self, *args: Any, **kwargs: Any) -> list:
|
|
49
|
+
"""Execute the function with the given arguments and return the result.
|
|
50
|
+
The arguments are expected to be batched: if the corresponding parameter has type T,
|
|
51
|
+
then the argument should have type T if it's a constant parameter, or list[T] if it's
|
|
52
|
+
a batched parameter.
|
|
53
|
+
"""
|
|
54
|
+
assert self.is_batched
|
|
55
|
+
# Unpack the constant parameters
|
|
56
|
+
constant_param_names = [p.name for p in self.signature.constant_parameters]
|
|
57
|
+
constant_kwargs = {k: v[0] for k, v in kwargs.items() if k in constant_param_names}
|
|
58
|
+
batched_kwargs = {k: v for k, v in kwargs.items() if k not in constant_param_names}
|
|
59
|
+
return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
|
|
60
|
+
|
|
61
|
+
# TODO(aaron-siegel): Implement conditional batch sizing
|
|
62
|
+
def get_batch_size(self, *args: Any, **kwargs: Any) -> Optional[int]:
|
|
63
|
+
return self.batch_size
|
|
64
|
+
|
|
34
65
|
@property
|
|
35
66
|
def display_name(self) -> str:
|
|
36
67
|
return self.self_name
|
|
@@ -44,7 +75,7 @@ class CallableFunction(Function):
|
|
|
44
75
|
res += '\n\n' + inspect.getdoc(self.py_fn)
|
|
45
76
|
return res
|
|
46
77
|
|
|
47
|
-
def _as_dict(self) ->
|
|
78
|
+
def _as_dict(self) -> dict:
|
|
48
79
|
if self.self_path is None:
|
|
49
80
|
# this is not a module function
|
|
50
81
|
from .function_registry import FunctionRegistry
|
|
@@ -53,17 +84,30 @@ class CallableFunction(Function):
|
|
|
53
84
|
return super()._as_dict()
|
|
54
85
|
|
|
55
86
|
@classmethod
|
|
56
|
-
def _from_dict(cls, d:
|
|
87
|
+
def _from_dict(cls, d: dict) -> Function:
|
|
57
88
|
if 'id' in d:
|
|
58
89
|
from .function_registry import FunctionRegistry
|
|
59
90
|
return FunctionRegistry.get().get_stored_function(UUID(hex=d['id']))
|
|
60
91
|
return super()._from_dict(d)
|
|
61
92
|
|
|
62
|
-
def to_store(self) ->
|
|
63
|
-
|
|
93
|
+
def to_store(self) -> tuple[dict, bytes]:
|
|
94
|
+
md = self.signature.as_dict()
|
|
95
|
+
if self.batch_size is not None:
|
|
96
|
+
md['batch_size'] = self.batch_size
|
|
97
|
+
return md, cloudpickle.dumps(self.py_fn)
|
|
64
98
|
|
|
65
99
|
@classmethod
|
|
66
|
-
def from_store(cls, name: Optional[str], md:
|
|
100
|
+
def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
|
|
67
101
|
py_fn = cloudpickle.loads(binary_obj)
|
|
68
102
|
assert isinstance(py_fn, Callable)
|
|
69
|
-
return CallableFunction(Signature.from_dict(md), py_fn, self_name=name)
|
|
103
|
+
return CallableFunction(Signature.from_dict(md), py_fn, self_name=name, batch_size=md.get('batch_size'))
|
|
104
|
+
|
|
105
|
+
def validate_call(self, bound_args: dict[str, Any]) -> None:
|
|
106
|
+
import pixeltable.exprs as exprs
|
|
107
|
+
if self.is_batched:
|
|
108
|
+
for param in self.signature.constant_parameters:
|
|
109
|
+
if param.name in bound_args and isinstance(bound_args[param.name], exprs.Expr):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f'{self.display_name}(): '
|
|
112
|
+
f'parameter {param.name} must be a constant value, not a Pixeltable expression'
|
|
113
|
+
)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from typing import Dict, Optional,
|
|
2
|
+
from typing import Dict, Optional, Any
|
|
3
3
|
|
|
4
4
|
import pixeltable
|
|
5
5
|
import pixeltable.exceptions as excs
|
|
6
|
-
import pixeltable.type_system as ts
|
|
7
6
|
from .function import Function
|
|
8
7
|
from .signature import Signature, Parameter
|
|
9
8
|
|
|
@@ -65,6 +64,16 @@ class ExprTemplateFunction(Function):
|
|
|
65
64
|
assert not result.contains(exprs.Variable)
|
|
66
65
|
return result
|
|
67
66
|
|
|
67
|
+
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
68
|
+
expr = self.instantiate(*args, **kwargs)
|
|
69
|
+
import pixeltable.exprs as exprs
|
|
70
|
+
row_builder = exprs.RowBuilder(output_exprs=[expr], columns=[], input_exprs=[])
|
|
71
|
+
import pixeltable.exec as exec
|
|
72
|
+
row_batch = exec.DataRowBatch(tbl=None, row_builder=row_builder, len=1)
|
|
73
|
+
row = row_batch[0]
|
|
74
|
+
row_builder.eval(row, ctx=row_builder.default_eval_ctx)
|
|
75
|
+
return row[row_builder.get_output_exprs()[0].slot_idx]
|
|
76
|
+
|
|
68
77
|
@property
|
|
69
78
|
def display_name(self) -> str:
|
|
70
79
|
return self.self_name
|
pixeltable/func/function.py
CHANGED
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import importlib
|
|
5
5
|
import inspect
|
|
6
|
-
import
|
|
7
|
-
from typing import Optional, Dict, Any, Tuple
|
|
6
|
+
from typing import Optional, Dict, Any, Tuple, Callable
|
|
8
7
|
|
|
8
|
+
import pixeltable
|
|
9
|
+
import pixeltable.type_system as ts
|
|
9
10
|
from .globals import resolve_symbol
|
|
10
11
|
from .signature import Signature
|
|
11
12
|
|
|
@@ -18,10 +19,13 @@ class Function(abc.ABC):
|
|
|
18
19
|
via the member self_path.
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
|
-
def __init__(
|
|
22
|
+
def __init__(
|
|
23
|
+
self, signature: Signature, py_signature: inspect.Signature, self_path: Optional[str] = None
|
|
24
|
+
):
|
|
22
25
|
self.signature = signature
|
|
23
26
|
self.py_signature = py_signature
|
|
24
27
|
self.self_path = self_path # fully-qualified path to self
|
|
28
|
+
self._conditional_return_type: Optional[Callable[..., ts.ColumnType]] = None
|
|
25
29
|
|
|
26
30
|
@property
|
|
27
31
|
def name(self) -> str:
|
|
@@ -40,7 +44,7 @@ class Function(abc.ABC):
|
|
|
40
44
|
def help_str(self) -> str:
|
|
41
45
|
return self.display_name + str(self.signature)
|
|
42
46
|
|
|
43
|
-
def __call__(self, *args:
|
|
47
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
|
|
44
48
|
from pixeltable import exprs
|
|
45
49
|
bound_args = self.py_signature.bind(*args, **kwargs)
|
|
46
50
|
self.validate_call(bound_args.arguments)
|
|
@@ -50,6 +54,33 @@ class Function(abc.ABC):
|
|
|
50
54
|
"""Override this to do custom validation of the arguments"""
|
|
51
55
|
pass
|
|
52
56
|
|
|
57
|
+
def call_return_type(self, kwargs: dict[str, Any]) -> ts.ColumnType:
|
|
58
|
+
"""Return the type of the value returned by calling this function with the given arguments"""
|
|
59
|
+
if self._conditional_return_type is None:
|
|
60
|
+
return self.signature.return_type
|
|
61
|
+
bound_args = self.py_signature.bind(**kwargs)
|
|
62
|
+
kw_args: dict[str, Any] = {}
|
|
63
|
+
sig = inspect.signature(self._conditional_return_type)
|
|
64
|
+
for param in sig.parameters.values():
|
|
65
|
+
if param.name in bound_args.arguments:
|
|
66
|
+
kw_args[param.name] = bound_args.arguments[param.name]
|
|
67
|
+
return self._conditional_return_type(**kw_args)
|
|
68
|
+
|
|
69
|
+
def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
|
|
70
|
+
"""Instance decorator for specifying a conditional return type for this function"""
|
|
71
|
+
# verify that call_return_type only has parameters that are also present in the signature
|
|
72
|
+
sig = inspect.signature(fn)
|
|
73
|
+
for param in sig.parameters.values():
|
|
74
|
+
if param.name not in self.signature.parameters:
|
|
75
|
+
raise ValueError(f'`conditional_return_type` has parameter `{param.name}` that is not in the signature')
|
|
76
|
+
self._conditional_return_type = fn
|
|
77
|
+
return fn
|
|
78
|
+
|
|
79
|
+
@abc.abstractmethod
|
|
80
|
+
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
81
|
+
"""Execute the function with the given arguments and return the result."""
|
|
82
|
+
pass
|
|
83
|
+
|
|
53
84
|
def __eq__(self, other: object) -> bool:
|
|
54
85
|
if not isinstance(other, self.__class__):
|
|
55
86
|
return False
|
pixeltable/func/signature.py
CHANGED
|
@@ -29,21 +29,12 @@ class Signature:
|
|
|
29
29
|
"""
|
|
30
30
|
Represents the signature of a Pixeltable function.
|
|
31
31
|
|
|
32
|
-
Regarding return type:
|
|
33
|
-
- most functions will have a fixed return type, which is specified directly
|
|
34
|
-
- some functions will have a return type that depends on the argument values;
|
|
35
|
-
ex.: PIL.Image.Image.resize() returns an image with dimensions specified as a parameter
|
|
36
|
-
- in the latter case, the 'return_type' field is a function that takes the bound arguments and returns the
|
|
37
|
-
return type; if no bound arguments are specified, a generic return type is returned (eg, ImageType() without a
|
|
38
|
-
size)
|
|
39
32
|
- self.is_batched: return type is a Batch[...] type
|
|
40
33
|
"""
|
|
41
34
|
SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
|
|
42
35
|
|
|
43
|
-
def __init__(
|
|
44
|
-
|
|
45
|
-
return_type: Union[ts.ColumnType, Callable[[Dict[str, Any]], ts.ColumnType]],
|
|
46
|
-
parameters: List[Parameter], is_batched: bool = False):
|
|
36
|
+
def __init__(self, return_type: ts.ColumnType, parameters: List[Parameter], is_batched: bool = False):
|
|
37
|
+
assert isinstance(return_type, ts.ColumnType)
|
|
47
38
|
self.return_type = return_type
|
|
48
39
|
self.is_batched = is_batched
|
|
49
40
|
# we rely on the ordering guarantee of dicts in Python >=3.7
|
|
@@ -52,10 +43,9 @@ class Signature:
|
|
|
52
43
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
53
44
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
54
45
|
|
|
55
|
-
def get_return_type(self
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
return self.return_type(bound_args)
|
|
46
|
+
def get_return_type(self) -> ts.ColumnType:
|
|
47
|
+
assert isinstance(self.return_type, ts.ColumnType)
|
|
48
|
+
return self.return_type
|
|
59
49
|
|
|
60
50
|
def as_dict(self) -> Dict[str, Any]:
|
|
61
51
|
result = {
|