pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.1.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.1.dist-info/METADATA +119 -0
  124. pixeltable-0.2.1.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,380 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import json
5
+ import sys
6
+ from typing import Optional, List, Any, Dict, Tuple
7
+
8
+ import sqlalchemy as sql
9
+
10
+ import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.func as func
13
+ import pixeltable.type_system as ts
14
+ from .data_row import DataRow
15
+ from .expr import Expr
16
+ from .inline_array import InlineArray
17
+ from .inline_dict import InlineDict
18
+ from .row_builder import RowBuilder
19
+ from .rowid_ref import RowidRef
20
+
21
+
22
+ class FunctionCall(Expr):
23
+ def __init__(
24
+ self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
25
+ group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
26
+ if order_by_clause is None:
27
+ order_by_clause = []
28
+ if group_by_clause is None:
29
+ group_by_clause = []
30
+ signature = fn.signature
31
+ super().__init__(signature.get_return_type(bound_args))
32
+ self.fn = fn
33
+ self.is_method_call = is_method_call
34
+ self.check_args(signature, bound_args)
35
+
36
+ self.agg_init_args: Dict[str, Any] = {}
37
+ if self.is_agg_fn_call:
38
+ # we separate out the init args for the aggregator
39
+ self.agg_init_args = {
40
+ arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
41
+ }
42
+ bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
43
+
44
+ # construct components, args, kwargs
45
+ self.components: List[Expr] = []
46
+
47
+ # Tuple[int, Any]:
48
+ # - for Exprs: (index into components, None)
49
+ # - otherwise: (-1, val)
50
+ self.args: List[Tuple[int, Any]] = []
51
+ self.kwargs: Dict[str, Tuple[int, Any]] = {}
52
+
53
+ # we record the types of non-variable parameters for runtime type checks
54
+ self.arg_types: List[ts.ColumnType] = []
55
+ self.kwarg_types: Dict[str, ts.ColumnType] = {}
56
+ # the prefix of parameters that are bound can be passed by position
57
+ for param in fn.py_signature.parameters.values():
58
+ if param.name not in bound_args or param.kind == inspect.Parameter.KEYWORD_ONLY:
59
+ break
60
+ arg = bound_args[param.name]
61
+ if isinstance(arg, Expr):
62
+ self.args.append((len(self.components), None))
63
+ self.components.append(arg.copy())
64
+ else:
65
+ self.args.append((-1, arg))
66
+ if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
67
+ self.arg_types.append(signature.parameters[param.name].col_type)
68
+
69
+ # the remaining args are passed as keywords
70
+ kw_param_names = set(bound_args.keys()) - set(list(fn.py_signature.parameters.keys())[:len(self.args)])
71
+ for param_name in kw_param_names:
72
+ arg = bound_args[param_name]
73
+ if isinstance(arg, Expr):
74
+ self.kwargs[param_name] = (len(self.components), None)
75
+ self.components.append(arg.copy())
76
+ else:
77
+ self.kwargs[param_name] = (-1, arg)
78
+ if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
79
+ self.kwarg_types[param_name] = signature.parameters[param_name].col_type
80
+
81
+ # window function state:
82
+ # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
83
+ self.group_by_start_idx, self.group_by_stop_idx = 0, 0
84
+ if len(group_by_clause) > 0:
85
+ if isinstance(group_by_clause[0], catalog.Table):
86
+ group_by_exprs = self._create_rowid_refs(group_by_clause[0])
87
+ else:
88
+ assert isinstance(group_by_clause[0], Expr)
89
+ group_by_exprs = group_by_clause
90
+ # record grouping exprs in self.components, we need to evaluate them to get partition vals
91
+ self.group_by_start_idx = len(self.components)
92
+ self.group_by_stop_idx = len(self.components) + len(group_by_exprs)
93
+ self.components.extend(group_by_exprs)
94
+
95
+ if isinstance(self.fn, func.ExprTemplateFunction):
96
+ # we instantiate the template to create an Expr that can be evaluated and record that as a component
97
+ fn_expr = self.fn.instantiate(**bound_args)
98
+ self.components.append(fn_expr)
99
+ self.fn_expr_idx = len(self.components) - 1
100
+ else:
101
+ self.fn_expr_idx = sys.maxsize
102
+
103
+ # we want to make sure that order_by_clause get assigned slot_idxs, even though we won't need to evaluate them
104
+ # (that's done in SQL)
105
+ if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
106
+ raise excs.Error(
107
+ f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}')
108
+ # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
109
+ self.order_by_start_idx = len(self.components)
110
+ self.components.extend(order_by_clause)
111
+
112
+ self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
113
+ # execution state for aggregate functions
114
+ self.aggregator: Optional[Any] = None
115
+ self.current_partition_vals: Optional[List[Any]] = None
116
+
117
+ self.id = self._create_id()
118
+
119
+ def _create_rowid_refs(self, tbl: catalog.Table) -> List[Expr]:
120
+ target = tbl.tbl_version_path.tbl_version
121
+ return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
122
+
123
+ @classmethod
124
+ def check_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
125
+ """Checks that bound_args are compatible with signature.
126
+
127
+ Convert literals to the correct type and update bound_args in place, if necessary.
128
+ """
129
+ for param_name, arg in bound_args.items():
130
+ param = signature.parameters[param_name]
131
+ if isinstance(arg, dict):
132
+ try:
133
+ arg = InlineDict(arg)
134
+ bound_args[param_name] = arg
135
+ except excs.Error:
136
+ # this didn't work, but it might be a literal
137
+ pass
138
+ if isinstance(arg, list) or isinstance(arg, tuple):
139
+ try:
140
+ # If the column type is JsonType, force the literal to be JSON
141
+ arg = InlineArray(arg, force_json=param.col_type is not None and param.col_type.is_json_type())
142
+ bound_args[param_name] = arg
143
+ except excs.Error:
144
+ # this didn't work, but it might be a literal
145
+ pass
146
+
147
+ if not isinstance(arg, Expr):
148
+ # make sure that non-Expr args are json-serializable and are literals of the correct type
149
+ try:
150
+ _ = json.dumps(arg)
151
+ except TypeError:
152
+ raise excs.Error(f"Argument for parameter '{param_name}' is not json-serializable: {arg}")
153
+ if arg is not None:
154
+ try:
155
+ param_type = param.col_type
156
+ bound_args[param_name] = param_type.create_literal(arg)
157
+ except TypeError as e:
158
+ msg = str(e)
159
+ raise excs.Error(f"Argument for parameter '{param_name}': {msg[0].lower() + msg[1:]}")
160
+ continue
161
+
162
+ # variable parameters don't get type-checked, but they both need to be json-typed
163
+ if param.kind == inspect.Parameter.VAR_POSITIONAL:
164
+ assert isinstance(arg, InlineArray)
165
+ arg.col_type = ts.JsonType()
166
+ continue
167
+ if param.kind == inspect.Parameter.VAR_KEYWORD:
168
+ assert isinstance(arg, InlineDict)
169
+ arg.col_type = ts.JsonType()
170
+ continue
171
+
172
+ if not param_type.is_supertype_of(arg.col_type):
173
+ raise excs.Error(
174
+ f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
175
+ f'{param_type}')
176
+
177
+ def is_nos_call(self) -> bool:
178
+ return isinstance(self.fn, func.NOSFunction)
179
+
180
+ def _equals(self, other: FunctionCall) -> bool:
181
+ if self.fn != other.fn:
182
+ return False
183
+ if len(self.args) != len(other.args):
184
+ return False
185
+ for i in range(len(self.args)):
186
+ if self.args[i] != other.args[i]:
187
+ return False
188
+ if self.group_by_start_idx != other.group_by_start_idx:
189
+ return False
190
+ if self.group_by_stop_idx != other.group_by_stop_idx:
191
+ return False
192
+ if self.order_by_start_idx != other.order_by_start_idx:
193
+ return False
194
+ return True
195
+
196
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
197
+ return super()._id_attrs() + [
198
+ ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
199
+ ('args', self.args),
200
+ ('kwargs', self.kwargs),
201
+ ('group_by_start_idx', self.group_by_start_idx),
202
+ ('group_by_stop_idx', self.group_by_stop_idx),
203
+ ('order_by_start_idx', self.order_by_start_idx)
204
+ ]
205
+
206
+ def __str__(self) -> str:
207
+ return self.display_str()
208
+
209
+ def display_str(self, inline: bool = True) -> str:
210
+ if self.is_method_call:
211
+ return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
212
+ else:
213
+ fn_name = self.fn.display_name if self.fn.display_name != '' else 'anonymous_fn'
214
+ return f'{fn_name}({self._print_args()})'
215
+
216
+ def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
217
+ arg_strs = [
218
+ str(arg) if idx == -1 else str(self.components[idx]) for idx, arg in self.args[start_idx:]
219
+ ]
220
+ def print_arg(arg: Any) -> str:
221
+ return f"'{arg}'" if isinstance(arg, str) else str(arg)
222
+ arg_strs.extend([
223
+ f'{param_name}={print_arg(arg) if idx == -1 else str(self.components[idx])}'
224
+ for param_name, (idx, arg) in self.kwargs.items()
225
+ ])
226
+ if len(self.order_by) > 0:
227
+ if self.fn.requires_order_by:
228
+ arg_strs.insert(0, Expr.print_list(self.order_by))
229
+ else:
230
+ arg_strs.append(f'order_by={Expr.print_list(self.order_by)}')
231
+ if len(self.group_by) > 0:
232
+ arg_strs.append(f'group_by={Expr.print_list(self.group_by)}')
233
+ # TODO: figure out the function name
234
+ separator = ', ' if inline else ',\n '
235
+ return separator.join(arg_strs)
236
+
237
+ def has_group_by(self) -> List[Expr]:
238
+ return self.group_by_stop_idx != 0
239
+
240
+ @property
241
+ def group_by(self) -> List[Expr]:
242
+ return self.components[self.group_by_start_idx:self.group_by_stop_idx]
243
+
244
+ @property
245
+ def order_by(self) -> List[Expr]:
246
+ return self.components[self.order_by_start_idx:]
247
+
248
+ @property
249
+ def is_window_fn_call(self) -> bool:
250
+ return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and \
251
+ (not self.fn.allows_std_agg \
252
+ or self.has_group_by() \
253
+ or (len(self.order_by) > 0 and not self.fn.requires_order_by))
254
+
255
+ def get_window_sort_exprs(self) -> Tuple[List[Expr], List[Expr]]:
256
+ return self.group_by, self.order_by
257
+
258
+ @property
259
+ def is_agg_fn_call(self) -> bool:
260
+ return isinstance(self.fn, func.AggregateFunction)
261
+
262
+ def get_agg_order_by(self) -> List[Expr]:
263
+ assert self.is_agg_fn_call
264
+ return self.order_by
265
+
266
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
267
+ # TODO: implement for standard aggregate functions
268
+ return None
269
+
270
+ def reset_agg(self) -> None:
271
+ """
272
+ Init agg state
273
+ """
274
+ assert self.is_agg_fn_call
275
+ assert isinstance(self.fn, func.AggregateFunction)
276
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
277
+
278
+ def update(self, data_row: DataRow) -> None:
279
+ """
280
+ Update agg state
281
+ """
282
+ assert self.is_agg_fn_call
283
+ args, kwargs = self._make_args(data_row)
284
+ self.aggregator.update(*args, **kwargs)
285
+
286
+ def _make_args(self, data_row: DataRow) -> Tuple[List[Any], Dict[str, Any]]:
287
+ """Return args and kwargs, constructed for data_row"""
288
+ kwargs: Dict[str, Any] = {}
289
+ for param_name, (component_idx, arg) in self.kwargs.items():
290
+ val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
291
+ param = self.fn.signature.parameters[param_name]
292
+ if param.kind == inspect.Parameter.VAR_KEYWORD:
293
+ # expand **kwargs parameter
294
+ kwargs.update(val)
295
+ else:
296
+ assert param.kind != inspect.Parameter.VAR_POSITIONAL
297
+ kwargs[param_name] = val
298
+
299
+ args: List[Any] = []
300
+ for param_idx, (component_idx, arg) in enumerate(self.args):
301
+ val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
302
+ param = self.fn.signature.parameters_by_pos[param_idx]
303
+ if param.kind == inspect.Parameter.VAR_POSITIONAL:
304
+ # expand *args parameter
305
+ assert isinstance(val, list)
306
+ args.extend(val)
307
+ elif param.kind == inspect.Parameter.VAR_KEYWORD:
308
+ # expand **kwargs parameter
309
+ assert isinstance(val, dict)
310
+ kwargs.update(val)
311
+ else:
312
+ args.append(val)
313
+ return args, kwargs
314
+
315
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
316
+ args, kwargs = self._make_args(data_row)
317
+ signature = self.fn.signature
318
+ if signature.parameters is not None:
319
+ # check for nulls
320
+ for i in range(len(self.arg_types)):
321
+ if args[i] is None and not self.arg_types[i].nullable:
322
+ # we can't evaluate this function
323
+ data_row[self.slot_idx] = None
324
+ return
325
+ for param_name, param_type in self.kwarg_types.items():
326
+ if kwargs[param_name] is None and not param_type.nullable:
327
+ # we can't evaluate this function
328
+ data_row[self.slot_idx] = None
329
+ return
330
+
331
+ if isinstance(self.fn, func.ExprTemplateFunction):
332
+ # we need to evaluate the template
333
+ # TODO: can we get rid of this extra copy?
334
+ fn_expr = self.components[self.fn_expr_idx]
335
+ data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
336
+ elif isinstance(self.fn, func.CallableFunction):
337
+ data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
338
+ elif self.is_window_fn_call:
339
+ if self.has_group_by():
340
+ if self.current_partition_vals is None:
341
+ self.current_partition_vals = [None] * len(self.group_by)
342
+ partition_vals = [data_row[e.slot_idx] for e in self.group_by]
343
+ if partition_vals != self.current_partition_vals:
344
+ # new partition
345
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
346
+ self.current_partition_vals = partition_vals
347
+ elif self.aggregator is None:
348
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
349
+ self.aggregator.update(*args)
350
+ data_row[self.slot_idx] = self.aggregator.value()
351
+ else:
352
+ assert self.is_agg_fn_call
353
+ data_row[self.slot_idx] = self.aggregator.value()
354
+
355
+ def _as_dict(self) -> Dict:
356
+ result = {
357
+ 'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
358
+ 'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
359
+ 'order_by_start_idx': self.order_by_start_idx,
360
+ **super()._as_dict()
361
+ }
362
+ return result
363
+
364
+ @classmethod
365
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
366
+ assert 'fn' in d
367
+ assert 'args' in d
368
+ assert 'kwargs' in d
369
+ # reassemble bound args
370
+ fn = func.Function.from_dict(d['fn'])
371
+ param_names = list(fn.signature.parameters.keys())
372
+ bound_args = {param_names[i]: arg if idx == -1 else components[idx] for i, (idx, arg) in enumerate(d['args'])}
373
+ bound_args.update(
374
+ {param_name: val if idx == -1 else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
375
+ group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
376
+ order_by_exprs = components[d['order_by_start_idx']:]
377
+ fn_call = cls(
378
+ func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
379
+ order_by_clause=order_by_exprs)
380
+ return fn_call
@@ -0,0 +1,69 @@
1
+ import datetime
2
+ from typing import Union
3
+ import enum
4
+
5
+
6
+ # Python types corresponding to our literal types
7
+ LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
8
+
9
+ def print_slice(s: slice) -> str:
10
+ start_str = f'{str(s.start) if s.start is not None else ""}'
11
+ stop_str = f'{str(s.stop) if s.stop is not None else ""}'
12
+ step_str = f'{str(s.step) if s.step is not None else ""}'
13
+ return f'{start_str}:{stop_str}{":" if s.step is not None else ""}{step_str}'
14
+
15
+
16
+ class ComparisonOperator(enum.Enum):
17
+ LT = 0
18
+ LE = 1
19
+ EQ = 2
20
+ NE = 3
21
+ GT = 4
22
+ GE = 5
23
+
24
+ def __str__(self) -> str:
25
+ if self == self.LT:
26
+ return '<'
27
+ if self == self.LE:
28
+ return '<='
29
+ if self == self.EQ:
30
+ return '=='
31
+ if self == self.GT:
32
+ return '>'
33
+ if self == self.GE:
34
+ return '>='
35
+
36
+
37
+ class LogicalOperator(enum.Enum):
38
+ AND = 0
39
+ OR = 1
40
+ NOT = 2
41
+
42
+ def __str__(self) -> str:
43
+ if self == self.AND:
44
+ return '&'
45
+ if self == self.OR:
46
+ return '|'
47
+ if self == self.NOT:
48
+ return '~'
49
+
50
+
51
+ class ArithmeticOperator(enum.Enum):
52
+ ADD = 0
53
+ SUB = 1
54
+ MUL = 2
55
+ DIV = 3
56
+ MOD = 4
57
+
58
+ def __str__(self) -> str:
59
+ if self == self.ADD:
60
+ return '+'
61
+ if self == self.SUB:
62
+ return '-'
63
+ if self == self.MUL:
64
+ return '*'
65
+ if self == self.DIV:
66
+ return '/'
67
+ if self == self.MOD:
68
+ return '%'
69
+
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple, Union
3
+
4
+ import PIL
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .column_ref import ColumnRef
9
+ from .function_call import FunctionCall
10
+ from .image_similarity_predicate import ImageSimilarityPredicate
11
+ from .data_row import DataRow
12
+ from .row_builder import RowBuilder
13
+ import pixeltable.catalog as catalog
14
+ import pixeltable.func as func
15
+ import pixeltable.exceptions as excs
16
+ import pixeltable.type_system as ts
17
+
18
+
19
+ # TODO: this doesn't dig up all attrs for actual jpeg images
20
+ def _create_pil_attr_info() -> Dict[str, ts.ColumnType]:
21
+ # create random Image to inspect for attrs
22
+ img = PIL.Image.new('RGB', (100, 100))
23
+ # we're only interested in public attrs (including properties)
24
+ result: Dict[str, ts.ColumnType] = {}
25
+ for name in [name for name in dir(img) if not callable(getattr(img, name)) and not name.startswith('_')]:
26
+ if getattr(img, name) is None:
27
+ continue
28
+ if isinstance(getattr(img, name), str):
29
+ result[name] = ts.StringType()
30
+ if isinstance(getattr(img, name), int):
31
+ result[name] = ts.IntType()
32
+ if getattr(img, name) is dict:
33
+ result[name] = ts.JsonType()
34
+ return result
35
+
36
+
37
+ class ImageMemberAccess(Expr):
38
+ """
39
+ Access of either an attribute or function member of PIL.Image.Image.
40
+ Ex.: tbl.img_col_ref.rotate(90), tbl.img_col_ref.width
41
+ TODO: remove this class and use FunctionCall instead (attributes to be replaced by functions)
42
+ """
43
+ attr_info = _create_pil_attr_info()
44
+
45
+ def __init__(self, member_name: str, caller: Expr):
46
+ if member_name == 'nearest':
47
+ super().__init__(ts.InvalidType()) # requires FunctionCall to return value
48
+ elif member_name in self.attr_info:
49
+ super().__init__(self.attr_info[member_name])
50
+ else:
51
+ candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
52
+ if len(candidates) == 0:
53
+ raise excs.Error(f'Unknown Image member: {member_name}')
54
+ if len(candidates) > 1:
55
+ raise excs.Error(f'Ambiguous Image method: {member_name}')
56
+ self.img_method = candidates[0]
57
+ super().__init__(ts.InvalidType()) # requires FunctionCall to return value
58
+ self.member_name = member_name
59
+ self.components = [caller]
60
+ self.id = self._create_id()
61
+
62
+ def default_column_name(self) -> Optional[str]:
63
+ return self.member_name.replace('.', '_')
64
+
65
+ @property
66
+ def _caller(self) -> Expr:
67
+ return self.components[0]
68
+
69
+ def __str__(self) -> str:
70
+ return f'{self._caller}.{self.member_name}'
71
+
72
+ def _as_dict(self) -> Dict:
73
+ return {'member_name': self.member_name, **super()._as_dict()}
74
+
75
+ @classmethod
76
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
77
+ assert 'member_name' in d
78
+ assert len(components) == 1
79
+ return cls(d['member_name'], components[0])
80
+
81
+ def __call__(self, *args, **kwargs) -> Union[FunctionCall, ImageSimilarityPredicate]:
82
+ caller = self._caller
83
+ call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
84
+ if self.member_name == 'nearest':
85
+ # - caller must be ColumnRef
86
+ # - signature is (Union[PIL.Image.Image, str])
87
+ if not isinstance(caller, ColumnRef):
88
+ raise excs.Error(f'nearest(): caller must be an image column')
89
+ if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
90
+ raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
91
+ return ImageSimilarityPredicate(
92
+ caller,
93
+ img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
94
+ text=args[0] if isinstance(args[0], str) else None)
95
+
96
+ result = self.img_method(*[caller, *args], **kwargs)
97
+ result.is_method_call = True
98
+ return result
99
+
100
+ def _equals(self, other: ImageMemberAccess) -> bool:
101
+ return self.member_name == other.member_name
102
+
103
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
104
+ return super()._id_attrs() + [('member_name', self.member_name)]
105
+
106
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
107
+ return None
108
+
109
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
110
+ caller_val = data_row[self._caller.slot_idx]
111
+ try:
112
+ data_row[self.slot_idx] = getattr(caller_val, self.member_name)
113
+ except AttributeError:
114
+ data_row[self.slot_idx] = None
115
+
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+
4
+ import sqlalchemy as sql
5
+ import PIL
6
+ import numpy as np
7
+
8
+ from .expr import Expr
9
+ from .predicate import Predicate
10
+ from .column_ref import ColumnRef
11
+ from .data_row import DataRow
12
+ from .row_builder import RowBuilder
13
+ import pixeltable.catalog as catalog
14
+ import pixeltable.utils.clip as clip
15
+
16
+ class ImageSimilarityPredicate(Predicate):
17
+ def __init__(self, img_col_ref: ColumnRef, img: Optional[PIL.Image.Image] = None, text: Optional[str] = None):
18
+ assert (img is None) != (text is None)
19
+ super().__init__()
20
+ self.img_col_ref = img_col_ref
21
+ self.components = [img_col_ref]
22
+ self.img = img
23
+ self.text = text
24
+ self.id = self._create_id()
25
+
26
+ def embedding(self) -> np.ndarray:
27
+ if self.text is not None:
28
+ return clip.embed_text(self.text)
29
+ else:
30
+ return clip.embed_image(self.img)
31
+
32
+ def __str__(self) -> str:
33
+ return f'{str(self.img_col_ref)}.nearest({"<img>" if self.img is not None else self.text})'
34
+
35
+ def _equals(self, other: ImageSimilarityPredicate) -> bool:
36
+ return False
37
+
38
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
39
+ return super()._id_attrs() + [('img', id(self.img)), ('text', self.text)]
40
+
41
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
42
+ return None
43
+
44
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
45
+ assert False
46
+
47
+ def _as_dict(self) -> Dict:
48
+ assert False, 'not implemented'
49
+ # TODO: convert self.img into a serializable string
50
+ return {'img': self.img, 'text': self.text, **super()._as_dict()}
51
+
52
+ @classmethod
53
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
54
+ assert 'img' in d
55
+ assert 'text' in d
56
+ assert len(components) == 1
57
+ return cls(components[0], d['img'], d['text'])
58
+