pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +21 -4
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -31
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -48
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -86
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1086 -258
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -133
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.1.dist-info/LICENSE +18 -0
- pixeltable-0.2.1.dist-info/METADATA +119 -0
- pixeltable-0.2.1.dist-info/RECORD +125 -0
- {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2.dist-info/LICENSE +0 -201
- pixeltable-0.1.2.dist-info/METADATA +0 -89
- pixeltable-0.1.2.dist-info/RECORD +0 -37
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
7
|
+
|
|
8
|
+
import sqlalchemy as sql
|
|
9
|
+
|
|
10
|
+
import pixeltable.catalog as catalog
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.func as func
|
|
13
|
+
import pixeltable.type_system as ts
|
|
14
|
+
from .data_row import DataRow
|
|
15
|
+
from .expr import Expr
|
|
16
|
+
from .inline_array import InlineArray
|
|
17
|
+
from .inline_dict import InlineDict
|
|
18
|
+
from .row_builder import RowBuilder
|
|
19
|
+
from .rowid_ref import RowidRef
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FunctionCall(Expr):
|
|
23
|
+
def __init__(
|
|
24
|
+
self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
|
|
25
|
+
group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
|
|
26
|
+
if order_by_clause is None:
|
|
27
|
+
order_by_clause = []
|
|
28
|
+
if group_by_clause is None:
|
|
29
|
+
group_by_clause = []
|
|
30
|
+
signature = fn.signature
|
|
31
|
+
super().__init__(signature.get_return_type(bound_args))
|
|
32
|
+
self.fn = fn
|
|
33
|
+
self.is_method_call = is_method_call
|
|
34
|
+
self.check_args(signature, bound_args)
|
|
35
|
+
|
|
36
|
+
self.agg_init_args: Dict[str, Any] = {}
|
|
37
|
+
if self.is_agg_fn_call:
|
|
38
|
+
# we separate out the init args for the aggregator
|
|
39
|
+
self.agg_init_args = {
|
|
40
|
+
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
|
|
41
|
+
}
|
|
42
|
+
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
|
|
43
|
+
|
|
44
|
+
# construct components, args, kwargs
|
|
45
|
+
self.components: List[Expr] = []
|
|
46
|
+
|
|
47
|
+
# Tuple[int, Any]:
|
|
48
|
+
# - for Exprs: (index into components, None)
|
|
49
|
+
# - otherwise: (-1, val)
|
|
50
|
+
self.args: List[Tuple[int, Any]] = []
|
|
51
|
+
self.kwargs: Dict[str, Tuple[int, Any]] = {}
|
|
52
|
+
|
|
53
|
+
# we record the types of non-variable parameters for runtime type checks
|
|
54
|
+
self.arg_types: List[ts.ColumnType] = []
|
|
55
|
+
self.kwarg_types: Dict[str, ts.ColumnType] = {}
|
|
56
|
+
# the prefix of parameters that are bound can be passed by position
|
|
57
|
+
for param in fn.py_signature.parameters.values():
|
|
58
|
+
if param.name not in bound_args or param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
59
|
+
break
|
|
60
|
+
arg = bound_args[param.name]
|
|
61
|
+
if isinstance(arg, Expr):
|
|
62
|
+
self.args.append((len(self.components), None))
|
|
63
|
+
self.components.append(arg.copy())
|
|
64
|
+
else:
|
|
65
|
+
self.args.append((-1, arg))
|
|
66
|
+
if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
67
|
+
self.arg_types.append(signature.parameters[param.name].col_type)
|
|
68
|
+
|
|
69
|
+
# the remaining args are passed as keywords
|
|
70
|
+
kw_param_names = set(bound_args.keys()) - set(list(fn.py_signature.parameters.keys())[:len(self.args)])
|
|
71
|
+
for param_name in kw_param_names:
|
|
72
|
+
arg = bound_args[param_name]
|
|
73
|
+
if isinstance(arg, Expr):
|
|
74
|
+
self.kwargs[param_name] = (len(self.components), None)
|
|
75
|
+
self.components.append(arg.copy())
|
|
76
|
+
else:
|
|
77
|
+
self.kwargs[param_name] = (-1, arg)
|
|
78
|
+
if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
79
|
+
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
80
|
+
|
|
81
|
+
# window function state:
|
|
82
|
+
# self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
|
|
83
|
+
self.group_by_start_idx, self.group_by_stop_idx = 0, 0
|
|
84
|
+
if len(group_by_clause) > 0:
|
|
85
|
+
if isinstance(group_by_clause[0], catalog.Table):
|
|
86
|
+
group_by_exprs = self._create_rowid_refs(group_by_clause[0])
|
|
87
|
+
else:
|
|
88
|
+
assert isinstance(group_by_clause[0], Expr)
|
|
89
|
+
group_by_exprs = group_by_clause
|
|
90
|
+
# record grouping exprs in self.components, we need to evaluate them to get partition vals
|
|
91
|
+
self.group_by_start_idx = len(self.components)
|
|
92
|
+
self.group_by_stop_idx = len(self.components) + len(group_by_exprs)
|
|
93
|
+
self.components.extend(group_by_exprs)
|
|
94
|
+
|
|
95
|
+
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
96
|
+
# we instantiate the template to create an Expr that can be evaluated and record that as a component
|
|
97
|
+
fn_expr = self.fn.instantiate(**bound_args)
|
|
98
|
+
self.components.append(fn_expr)
|
|
99
|
+
self.fn_expr_idx = len(self.components) - 1
|
|
100
|
+
else:
|
|
101
|
+
self.fn_expr_idx = sys.maxsize
|
|
102
|
+
|
|
103
|
+
# we want to make sure that order_by_clause get assigned slot_idxs, even though we won't need to evaluate them
|
|
104
|
+
# (that's done in SQL)
|
|
105
|
+
if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
|
|
106
|
+
raise excs.Error(
|
|
107
|
+
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}')
|
|
108
|
+
# don't add components after this, everthing after order_by_start_idx is part of the order_by clause
|
|
109
|
+
self.order_by_start_idx = len(self.components)
|
|
110
|
+
self.components.extend(order_by_clause)
|
|
111
|
+
|
|
112
|
+
self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
|
|
113
|
+
# execution state for aggregate functions
|
|
114
|
+
self.aggregator: Optional[Any] = None
|
|
115
|
+
self.current_partition_vals: Optional[List[Any]] = None
|
|
116
|
+
|
|
117
|
+
self.id = self._create_id()
|
|
118
|
+
|
|
119
|
+
def _create_rowid_refs(self, tbl: catalog.Table) -> List[Expr]:
|
|
120
|
+
target = tbl.tbl_version_path.tbl_version
|
|
121
|
+
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def check_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
|
|
125
|
+
"""Checks that bound_args are compatible with signature.
|
|
126
|
+
|
|
127
|
+
Convert literals to the correct type and update bound_args in place, if necessary.
|
|
128
|
+
"""
|
|
129
|
+
for param_name, arg in bound_args.items():
|
|
130
|
+
param = signature.parameters[param_name]
|
|
131
|
+
if isinstance(arg, dict):
|
|
132
|
+
try:
|
|
133
|
+
arg = InlineDict(arg)
|
|
134
|
+
bound_args[param_name] = arg
|
|
135
|
+
except excs.Error:
|
|
136
|
+
# this didn't work, but it might be a literal
|
|
137
|
+
pass
|
|
138
|
+
if isinstance(arg, list) or isinstance(arg, tuple):
|
|
139
|
+
try:
|
|
140
|
+
# If the column type is JsonType, force the literal to be JSON
|
|
141
|
+
arg = InlineArray(arg, force_json=param.col_type is not None and param.col_type.is_json_type())
|
|
142
|
+
bound_args[param_name] = arg
|
|
143
|
+
except excs.Error:
|
|
144
|
+
# this didn't work, but it might be a literal
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
if not isinstance(arg, Expr):
|
|
148
|
+
# make sure that non-Expr args are json-serializable and are literals of the correct type
|
|
149
|
+
try:
|
|
150
|
+
_ = json.dumps(arg)
|
|
151
|
+
except TypeError:
|
|
152
|
+
raise excs.Error(f"Argument for parameter '{param_name}' is not json-serializable: {arg}")
|
|
153
|
+
if arg is not None:
|
|
154
|
+
try:
|
|
155
|
+
param_type = param.col_type
|
|
156
|
+
bound_args[param_name] = param_type.create_literal(arg)
|
|
157
|
+
except TypeError as e:
|
|
158
|
+
msg = str(e)
|
|
159
|
+
raise excs.Error(f"Argument for parameter '{param_name}': {msg[0].lower() + msg[1:]}")
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# variable parameters don't get type-checked, but they both need to be json-typed
|
|
163
|
+
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
164
|
+
assert isinstance(arg, InlineArray)
|
|
165
|
+
arg.col_type = ts.JsonType()
|
|
166
|
+
continue
|
|
167
|
+
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
168
|
+
assert isinstance(arg, InlineDict)
|
|
169
|
+
arg.col_type = ts.JsonType()
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
if not param_type.is_supertype_of(arg.col_type):
|
|
173
|
+
raise excs.Error(
|
|
174
|
+
f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
|
|
175
|
+
f'{param_type}')
|
|
176
|
+
|
|
177
|
+
def is_nos_call(self) -> bool:
|
|
178
|
+
return isinstance(self.fn, func.NOSFunction)
|
|
179
|
+
|
|
180
|
+
def _equals(self, other: FunctionCall) -> bool:
|
|
181
|
+
if self.fn != other.fn:
|
|
182
|
+
return False
|
|
183
|
+
if len(self.args) != len(other.args):
|
|
184
|
+
return False
|
|
185
|
+
for i in range(len(self.args)):
|
|
186
|
+
if self.args[i] != other.args[i]:
|
|
187
|
+
return False
|
|
188
|
+
if self.group_by_start_idx != other.group_by_start_idx:
|
|
189
|
+
return False
|
|
190
|
+
if self.group_by_stop_idx != other.group_by_stop_idx:
|
|
191
|
+
return False
|
|
192
|
+
if self.order_by_start_idx != other.order_by_start_idx:
|
|
193
|
+
return False
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
197
|
+
return super()._id_attrs() + [
|
|
198
|
+
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
199
|
+
('args', self.args),
|
|
200
|
+
('kwargs', self.kwargs),
|
|
201
|
+
('group_by_start_idx', self.group_by_start_idx),
|
|
202
|
+
('group_by_stop_idx', self.group_by_stop_idx),
|
|
203
|
+
('order_by_start_idx', self.order_by_start_idx)
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
def __str__(self) -> str:
|
|
207
|
+
return self.display_str()
|
|
208
|
+
|
|
209
|
+
def display_str(self, inline: bool = True) -> str:
|
|
210
|
+
if self.is_method_call:
|
|
211
|
+
return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
|
|
212
|
+
else:
|
|
213
|
+
fn_name = self.fn.display_name if self.fn.display_name != '' else 'anonymous_fn'
|
|
214
|
+
return f'{fn_name}({self._print_args()})'
|
|
215
|
+
|
|
216
|
+
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
217
|
+
arg_strs = [
|
|
218
|
+
str(arg) if idx == -1 else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
219
|
+
]
|
|
220
|
+
def print_arg(arg: Any) -> str:
|
|
221
|
+
return f"'{arg}'" if isinstance(arg, str) else str(arg)
|
|
222
|
+
arg_strs.extend([
|
|
223
|
+
f'{param_name}={print_arg(arg) if idx == -1 else str(self.components[idx])}'
|
|
224
|
+
for param_name, (idx, arg) in self.kwargs.items()
|
|
225
|
+
])
|
|
226
|
+
if len(self.order_by) > 0:
|
|
227
|
+
if self.fn.requires_order_by:
|
|
228
|
+
arg_strs.insert(0, Expr.print_list(self.order_by))
|
|
229
|
+
else:
|
|
230
|
+
arg_strs.append(f'order_by={Expr.print_list(self.order_by)}')
|
|
231
|
+
if len(self.group_by) > 0:
|
|
232
|
+
arg_strs.append(f'group_by={Expr.print_list(self.group_by)}')
|
|
233
|
+
# TODO: figure out the function name
|
|
234
|
+
separator = ', ' if inline else ',\n '
|
|
235
|
+
return separator.join(arg_strs)
|
|
236
|
+
|
|
237
|
+
def has_group_by(self) -> List[Expr]:
|
|
238
|
+
return self.group_by_stop_idx != 0
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def group_by(self) -> List[Expr]:
|
|
242
|
+
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def order_by(self) -> List[Expr]:
|
|
246
|
+
return self.components[self.order_by_start_idx:]
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def is_window_fn_call(self) -> bool:
|
|
250
|
+
return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and \
|
|
251
|
+
(not self.fn.allows_std_agg \
|
|
252
|
+
or self.has_group_by() \
|
|
253
|
+
or (len(self.order_by) > 0 and not self.fn.requires_order_by))
|
|
254
|
+
|
|
255
|
+
def get_window_sort_exprs(self) -> Tuple[List[Expr], List[Expr]]:
|
|
256
|
+
return self.group_by, self.order_by
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def is_agg_fn_call(self) -> bool:
|
|
260
|
+
return isinstance(self.fn, func.AggregateFunction)
|
|
261
|
+
|
|
262
|
+
def get_agg_order_by(self) -> List[Expr]:
|
|
263
|
+
assert self.is_agg_fn_call
|
|
264
|
+
return self.order_by
|
|
265
|
+
|
|
266
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
267
|
+
# TODO: implement for standard aggregate functions
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
def reset_agg(self) -> None:
|
|
271
|
+
"""
|
|
272
|
+
Init agg state
|
|
273
|
+
"""
|
|
274
|
+
assert self.is_agg_fn_call
|
|
275
|
+
assert isinstance(self.fn, func.AggregateFunction)
|
|
276
|
+
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
277
|
+
|
|
278
|
+
def update(self, data_row: DataRow) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Update agg state
|
|
281
|
+
"""
|
|
282
|
+
assert self.is_agg_fn_call
|
|
283
|
+
args, kwargs = self._make_args(data_row)
|
|
284
|
+
self.aggregator.update(*args, **kwargs)
|
|
285
|
+
|
|
286
|
+
def _make_args(self, data_row: DataRow) -> Tuple[List[Any], Dict[str, Any]]:
|
|
287
|
+
"""Return args and kwargs, constructed for data_row"""
|
|
288
|
+
kwargs: Dict[str, Any] = {}
|
|
289
|
+
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
290
|
+
val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
|
|
291
|
+
param = self.fn.signature.parameters[param_name]
|
|
292
|
+
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
293
|
+
# expand **kwargs parameter
|
|
294
|
+
kwargs.update(val)
|
|
295
|
+
else:
|
|
296
|
+
assert param.kind != inspect.Parameter.VAR_POSITIONAL
|
|
297
|
+
kwargs[param_name] = val
|
|
298
|
+
|
|
299
|
+
args: List[Any] = []
|
|
300
|
+
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
301
|
+
val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
|
|
302
|
+
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
303
|
+
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
304
|
+
# expand *args parameter
|
|
305
|
+
assert isinstance(val, list)
|
|
306
|
+
args.extend(val)
|
|
307
|
+
elif param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
308
|
+
# expand **kwargs parameter
|
|
309
|
+
assert isinstance(val, dict)
|
|
310
|
+
kwargs.update(val)
|
|
311
|
+
else:
|
|
312
|
+
args.append(val)
|
|
313
|
+
return args, kwargs
|
|
314
|
+
|
|
315
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
316
|
+
args, kwargs = self._make_args(data_row)
|
|
317
|
+
signature = self.fn.signature
|
|
318
|
+
if signature.parameters is not None:
|
|
319
|
+
# check for nulls
|
|
320
|
+
for i in range(len(self.arg_types)):
|
|
321
|
+
if args[i] is None and not self.arg_types[i].nullable:
|
|
322
|
+
# we can't evaluate this function
|
|
323
|
+
data_row[self.slot_idx] = None
|
|
324
|
+
return
|
|
325
|
+
for param_name, param_type in self.kwarg_types.items():
|
|
326
|
+
if kwargs[param_name] is None and not param_type.nullable:
|
|
327
|
+
# we can't evaluate this function
|
|
328
|
+
data_row[self.slot_idx] = None
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
332
|
+
# we need to evaluate the template
|
|
333
|
+
# TODO: can we get rid of this extra copy?
|
|
334
|
+
fn_expr = self.components[self.fn_expr_idx]
|
|
335
|
+
data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
|
|
336
|
+
elif isinstance(self.fn, func.CallableFunction):
|
|
337
|
+
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
338
|
+
elif self.is_window_fn_call:
|
|
339
|
+
if self.has_group_by():
|
|
340
|
+
if self.current_partition_vals is None:
|
|
341
|
+
self.current_partition_vals = [None] * len(self.group_by)
|
|
342
|
+
partition_vals = [data_row[e.slot_idx] for e in self.group_by]
|
|
343
|
+
if partition_vals != self.current_partition_vals:
|
|
344
|
+
# new partition
|
|
345
|
+
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
346
|
+
self.current_partition_vals = partition_vals
|
|
347
|
+
elif self.aggregator is None:
|
|
348
|
+
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
349
|
+
self.aggregator.update(*args)
|
|
350
|
+
data_row[self.slot_idx] = self.aggregator.value()
|
|
351
|
+
else:
|
|
352
|
+
assert self.is_agg_fn_call
|
|
353
|
+
data_row[self.slot_idx] = self.aggregator.value()
|
|
354
|
+
|
|
355
|
+
def _as_dict(self) -> Dict:
|
|
356
|
+
result = {
|
|
357
|
+
'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
|
|
358
|
+
'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
|
|
359
|
+
'order_by_start_idx': self.order_by_start_idx,
|
|
360
|
+
**super()._as_dict()
|
|
361
|
+
}
|
|
362
|
+
return result
|
|
363
|
+
|
|
364
|
+
@classmethod
|
|
365
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
366
|
+
assert 'fn' in d
|
|
367
|
+
assert 'args' in d
|
|
368
|
+
assert 'kwargs' in d
|
|
369
|
+
# reassemble bound args
|
|
370
|
+
fn = func.Function.from_dict(d['fn'])
|
|
371
|
+
param_names = list(fn.signature.parameters.keys())
|
|
372
|
+
bound_args = {param_names[i]: arg if idx == -1 else components[idx] for i, (idx, arg) in enumerate(d['args'])}
|
|
373
|
+
bound_args.update(
|
|
374
|
+
{param_name: val if idx == -1 else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
|
|
375
|
+
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
376
|
+
order_by_exprs = components[d['order_by_start_idx']:]
|
|
377
|
+
fn_call = cls(
|
|
378
|
+
func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
|
|
379
|
+
order_by_clause=order_by_exprs)
|
|
380
|
+
return fn_call
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Union
|
|
3
|
+
import enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Python types corresponding to our literal types
|
|
7
|
+
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
|
|
8
|
+
|
|
9
|
+
def print_slice(s: slice) -> str:
|
|
10
|
+
start_str = f'{str(s.start) if s.start is not None else ""}'
|
|
11
|
+
stop_str = f'{str(s.stop) if s.stop is not None else ""}'
|
|
12
|
+
step_str = f'{str(s.step) if s.step is not None else ""}'
|
|
13
|
+
return f'{start_str}:{stop_str}{":" if s.step is not None else ""}{step_str}'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ComparisonOperator(enum.Enum):
|
|
17
|
+
LT = 0
|
|
18
|
+
LE = 1
|
|
19
|
+
EQ = 2
|
|
20
|
+
NE = 3
|
|
21
|
+
GT = 4
|
|
22
|
+
GE = 5
|
|
23
|
+
|
|
24
|
+
def __str__(self) -> str:
|
|
25
|
+
if self == self.LT:
|
|
26
|
+
return '<'
|
|
27
|
+
if self == self.LE:
|
|
28
|
+
return '<='
|
|
29
|
+
if self == self.EQ:
|
|
30
|
+
return '=='
|
|
31
|
+
if self == self.GT:
|
|
32
|
+
return '>'
|
|
33
|
+
if self == self.GE:
|
|
34
|
+
return '>='
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LogicalOperator(enum.Enum):
|
|
38
|
+
AND = 0
|
|
39
|
+
OR = 1
|
|
40
|
+
NOT = 2
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
if self == self.AND:
|
|
44
|
+
return '&'
|
|
45
|
+
if self == self.OR:
|
|
46
|
+
return '|'
|
|
47
|
+
if self == self.NOT:
|
|
48
|
+
return '~'
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ArithmeticOperator(enum.Enum):
|
|
52
|
+
ADD = 0
|
|
53
|
+
SUB = 1
|
|
54
|
+
MUL = 2
|
|
55
|
+
DIV = 3
|
|
56
|
+
MOD = 4
|
|
57
|
+
|
|
58
|
+
def __str__(self) -> str:
|
|
59
|
+
if self == self.ADD:
|
|
60
|
+
return '+'
|
|
61
|
+
if self == self.SUB:
|
|
62
|
+
return '-'
|
|
63
|
+
if self == self.MUL:
|
|
64
|
+
return '*'
|
|
65
|
+
if self == self.DIV:
|
|
66
|
+
return '/'
|
|
67
|
+
if self == self.MOD:
|
|
68
|
+
return '%'
|
|
69
|
+
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple, Union
|
|
3
|
+
|
|
4
|
+
import PIL
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .column_ref import ColumnRef
|
|
9
|
+
from .function_call import FunctionCall
|
|
10
|
+
from .image_similarity_predicate import ImageSimilarityPredicate
|
|
11
|
+
from .data_row import DataRow
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
|
+
import pixeltable.catalog as catalog
|
|
14
|
+
import pixeltable.func as func
|
|
15
|
+
import pixeltable.exceptions as excs
|
|
16
|
+
import pixeltable.type_system as ts
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# TODO: this doesn't dig up all attrs for actual jpeg images
|
|
20
|
+
def _create_pil_attr_info() -> Dict[str, ts.ColumnType]:
|
|
21
|
+
# create random Image to inspect for attrs
|
|
22
|
+
img = PIL.Image.new('RGB', (100, 100))
|
|
23
|
+
# we're only interested in public attrs (including properties)
|
|
24
|
+
result: Dict[str, ts.ColumnType] = {}
|
|
25
|
+
for name in [name for name in dir(img) if not callable(getattr(img, name)) and not name.startswith('_')]:
|
|
26
|
+
if getattr(img, name) is None:
|
|
27
|
+
continue
|
|
28
|
+
if isinstance(getattr(img, name), str):
|
|
29
|
+
result[name] = ts.StringType()
|
|
30
|
+
if isinstance(getattr(img, name), int):
|
|
31
|
+
result[name] = ts.IntType()
|
|
32
|
+
if getattr(img, name) is dict:
|
|
33
|
+
result[name] = ts.JsonType()
|
|
34
|
+
return result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ImageMemberAccess(Expr):
|
|
38
|
+
"""
|
|
39
|
+
Access of either an attribute or function member of PIL.Image.Image.
|
|
40
|
+
Ex.: tbl.img_col_ref.rotate(90), tbl.img_col_ref.width
|
|
41
|
+
TODO: remove this class and use FunctionCall instead (attributes to be replaced by functions)
|
|
42
|
+
"""
|
|
43
|
+
attr_info = _create_pil_attr_info()
|
|
44
|
+
|
|
45
|
+
def __init__(self, member_name: str, caller: Expr):
|
|
46
|
+
if member_name == 'nearest':
|
|
47
|
+
super().__init__(ts.InvalidType()) # requires FunctionCall to return value
|
|
48
|
+
elif member_name in self.attr_info:
|
|
49
|
+
super().__init__(self.attr_info[member_name])
|
|
50
|
+
else:
|
|
51
|
+
candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
|
|
52
|
+
if len(candidates) == 0:
|
|
53
|
+
raise excs.Error(f'Unknown Image member: {member_name}')
|
|
54
|
+
if len(candidates) > 1:
|
|
55
|
+
raise excs.Error(f'Ambiguous Image method: {member_name}')
|
|
56
|
+
self.img_method = candidates[0]
|
|
57
|
+
super().__init__(ts.InvalidType()) # requires FunctionCall to return value
|
|
58
|
+
self.member_name = member_name
|
|
59
|
+
self.components = [caller]
|
|
60
|
+
self.id = self._create_id()
|
|
61
|
+
|
|
62
|
+
def default_column_name(self) -> Optional[str]:
|
|
63
|
+
return self.member_name.replace('.', '_')
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def _caller(self) -> Expr:
|
|
67
|
+
return self.components[0]
|
|
68
|
+
|
|
69
|
+
def __str__(self) -> str:
|
|
70
|
+
return f'{self._caller}.{self.member_name}'
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> Dict:
|
|
73
|
+
return {'member_name': self.member_name, **super()._as_dict()}
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
77
|
+
assert 'member_name' in d
|
|
78
|
+
assert len(components) == 1
|
|
79
|
+
return cls(d['member_name'], components[0])
|
|
80
|
+
|
|
81
|
+
def __call__(self, *args, **kwargs) -> Union[FunctionCall, ImageSimilarityPredicate]:
|
|
82
|
+
caller = self._caller
|
|
83
|
+
call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
|
|
84
|
+
if self.member_name == 'nearest':
|
|
85
|
+
# - caller must be ColumnRef
|
|
86
|
+
# - signature is (Union[PIL.Image.Image, str])
|
|
87
|
+
if not isinstance(caller, ColumnRef):
|
|
88
|
+
raise excs.Error(f'nearest(): caller must be an image column')
|
|
89
|
+
if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
|
|
90
|
+
raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
|
|
91
|
+
return ImageSimilarityPredicate(
|
|
92
|
+
caller,
|
|
93
|
+
img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
|
|
94
|
+
text=args[0] if isinstance(args[0], str) else None)
|
|
95
|
+
|
|
96
|
+
result = self.img_method(*[caller, *args], **kwargs)
|
|
97
|
+
result.is_method_call = True
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
def _equals(self, other: ImageMemberAccess) -> bool:
|
|
101
|
+
return self.member_name == other.member_name
|
|
102
|
+
|
|
103
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
104
|
+
return super()._id_attrs() + [('member_name', self.member_name)]
|
|
105
|
+
|
|
106
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
110
|
+
caller_val = data_row[self._caller.slot_idx]
|
|
111
|
+
try:
|
|
112
|
+
data_row[self.slot_idx] = getattr(caller_val, self.member_name)
|
|
113
|
+
except AttributeError:
|
|
114
|
+
data_row[self.slot_idx] = None
|
|
115
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
import PIL
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .expr import Expr
|
|
9
|
+
from .predicate import Predicate
|
|
10
|
+
from .column_ref import ColumnRef
|
|
11
|
+
from .data_row import DataRow
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
|
+
import pixeltable.catalog as catalog
|
|
14
|
+
import pixeltable.utils.clip as clip
|
|
15
|
+
|
|
16
|
+
class ImageSimilarityPredicate(Predicate):
|
|
17
|
+
def __init__(self, img_col_ref: ColumnRef, img: Optional[PIL.Image.Image] = None, text: Optional[str] = None):
|
|
18
|
+
assert (img is None) != (text is None)
|
|
19
|
+
super().__init__()
|
|
20
|
+
self.img_col_ref = img_col_ref
|
|
21
|
+
self.components = [img_col_ref]
|
|
22
|
+
self.img = img
|
|
23
|
+
self.text = text
|
|
24
|
+
self.id = self._create_id()
|
|
25
|
+
|
|
26
|
+
def embedding(self) -> np.ndarray:
|
|
27
|
+
if self.text is not None:
|
|
28
|
+
return clip.embed_text(self.text)
|
|
29
|
+
else:
|
|
30
|
+
return clip.embed_image(self.img)
|
|
31
|
+
|
|
32
|
+
def __str__(self) -> str:
|
|
33
|
+
return f'{str(self.img_col_ref)}.nearest({"<img>" if self.img is not None else self.text})'
|
|
34
|
+
|
|
35
|
+
def _equals(self, other: ImageSimilarityPredicate) -> bool:
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
39
|
+
return super()._id_attrs() + [('img', id(self.img)), ('text', self.text)]
|
|
40
|
+
|
|
41
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
45
|
+
assert False
|
|
46
|
+
|
|
47
|
+
def _as_dict(self) -> Dict:
|
|
48
|
+
assert False, 'not implemented'
|
|
49
|
+
# TODO: convert self.img into a serializable string
|
|
50
|
+
return {'img': self.img, 'text': self.text, **super()._as_dict()}
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
54
|
+
assert 'img' in d
|
|
55
|
+
assert 'text' in d
|
|
56
|
+
assert len(components) == 1
|
|
57
|
+
return cls(components[0], d['img'], d['text'])
|
|
58
|
+
|