pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,382 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import json
5
+ import sys
6
+ from typing import Optional, List, Any, Dict, Tuple
7
+
8
+ import sqlalchemy as sql
9
+
10
+ import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.func as func
13
+ import pixeltable.type_system as ts
14
+ from .data_row import DataRow
15
+ from .expr import Expr
16
+ from .inline_array import InlineArray
17
+ from .inline_dict import InlineDict
18
+ from .row_builder import RowBuilder
19
+ from .rowid_ref import RowidRef
20
+
21
+
22
+ class FunctionCall(Expr):
23
+ def __init__(
24
+ self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
25
+ group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
26
+ if order_by_clause is None:
27
+ order_by_clause = []
28
+ if group_by_clause is None:
29
+ group_by_clause = []
30
+ signature = fn.signature
31
+ super().__init__(fn.call_return_type(bound_args))
32
+ self.fn = fn
33
+ self.is_method_call = is_method_call
34
+ self.check_args(signature, bound_args)
35
+
36
+ self.agg_init_args: Dict[str, Any] = {}
37
+ if self.is_agg_fn_call:
38
+ # we separate out the init args for the aggregator
39
+ self.agg_init_args = {
40
+ arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
41
+ }
42
+ bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
43
+
44
+ # construct components, args, kwargs
45
+ self.components: List[Expr] = []
46
+
47
+ # Tuple[int, Any]:
48
+ # - for Exprs: (index into components, None)
49
+ # - otherwise: (None, val)
50
+ self.args: List[Tuple[Optional[int], Optional[Any]]] = []
51
+ self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
52
+
53
+ # we record the types of non-variable parameters for runtime type checks
54
+ self.arg_types: List[ts.ColumnType] = []
55
+ self.kwarg_types: Dict[str, ts.ColumnType] = {}
56
+ # the prefix of parameters that are bound can be passed by position
57
+ for param in fn.py_signature.parameters.values():
58
+ if param.name not in bound_args or param.kind == inspect.Parameter.KEYWORD_ONLY:
59
+ break
60
+ arg = bound_args[param.name]
61
+ if isinstance(arg, Expr):
62
+ self.args.append((len(self.components), None))
63
+ self.components.append(arg.copy())
64
+ else:
65
+ self.args.append((None, arg))
66
+ if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
67
+ self.arg_types.append(signature.parameters[param.name].col_type)
68
+
69
+ # the remaining args are passed as keywords
70
+ kw_param_names = set(bound_args.keys()) - set(list(fn.py_signature.parameters.keys())[:len(self.args)])
71
+ for param_name in kw_param_names:
72
+ arg = bound_args[param_name]
73
+ if isinstance(arg, Expr):
74
+ self.kwargs[param_name] = (len(self.components), None)
75
+ self.components.append(arg.copy())
76
+ else:
77
+ self.kwargs[param_name] = (None, arg)
78
+ if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
79
+ self.kwarg_types[param_name] = signature.parameters[param_name].col_type
80
+
81
+ # window function state:
82
+ # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
83
+ self.group_by_start_idx, self.group_by_stop_idx = 0, 0
84
+ if len(group_by_clause) > 0:
85
+ if isinstance(group_by_clause[0], catalog.Table):
86
+ group_by_exprs = self._create_rowid_refs(group_by_clause[0])
87
+ else:
88
+ assert isinstance(group_by_clause[0], Expr)
89
+ group_by_exprs = group_by_clause
90
+ # record grouping exprs in self.components, we need to evaluate them to get partition vals
91
+ self.group_by_start_idx = len(self.components)
92
+ self.group_by_stop_idx = len(self.components) + len(group_by_exprs)
93
+ self.components.extend(group_by_exprs)
94
+
95
+ if isinstance(self.fn, func.ExprTemplateFunction):
96
+ # we instantiate the template to create an Expr that can be evaluated and record that as a component
97
+ fn_expr = self.fn.instantiate(**bound_args)
98
+ self.components.append(fn_expr)
99
+ self.fn_expr_idx = len(self.components) - 1
100
+ else:
101
+ self.fn_expr_idx = sys.maxsize
102
+
103
+ # we want to make sure that order_by_clause get assigned slot_idxs, even though we won't need to evaluate them
104
+ # (that's done in SQL)
105
+ if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
106
+ raise excs.Error(
107
+ f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}')
108
+ # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
109
+ self.order_by_start_idx = len(self.components)
110
+ self.components.extend(order_by_clause)
111
+
112
+ self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
113
+ # execution state for aggregate functions
114
+ self.aggregator: Optional[Any] = None
115
+ self.current_partition_vals: Optional[List[Any]] = None
116
+
117
+ self.id = self._create_id()
118
+
119
+ def _create_rowid_refs(self, tbl: catalog.Table) -> List[Expr]:
120
+ target = tbl.tbl_version_path.tbl_version
121
+ return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
122
+
123
+ @classmethod
124
+ def check_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
125
+ """Checks that bound_args are compatible with signature.
126
+
127
+ Convert literals to the correct type and update bound_args in place, if necessary.
128
+ """
129
+ for param_name, arg in bound_args.items():
130
+ param = signature.parameters[param_name]
131
+ if isinstance(arg, dict):
132
+ try:
133
+ arg = InlineDict(arg)
134
+ bound_args[param_name] = arg
135
+ except excs.Error:
136
+ # this didn't work, but it might be a literal
137
+ pass
138
+ if isinstance(arg, list) or isinstance(arg, tuple):
139
+ try:
140
+ # If the column type is JsonType, force the literal to be JSON
141
+ arg = InlineArray(arg, force_json=param.col_type is not None and param.col_type.is_json_type())
142
+ bound_args[param_name] = arg
143
+ except excs.Error:
144
+ # this didn't work, but it might be a literal
145
+ pass
146
+
147
+ if not isinstance(arg, Expr):
148
+ # make sure that non-Expr args are json-serializable and are literals of the correct type
149
+ try:
150
+ _ = json.dumps(arg)
151
+ except TypeError:
152
+ raise excs.Error(f"Argument for parameter '{param_name}' is not json-serializable: {arg}")
153
+ if arg is not None:
154
+ try:
155
+ param_type = param.col_type
156
+ bound_args[param_name] = param_type.create_literal(arg)
157
+ except TypeError as e:
158
+ msg = str(e)
159
+ raise excs.Error(f"Argument for parameter '{param_name}': {msg[0].lower() + msg[1:]}")
160
+ continue
161
+
162
+ # variable parameters don't get type-checked, but they both need to be json-typed
163
+ if param.kind == inspect.Parameter.VAR_POSITIONAL:
164
+ assert isinstance(arg, InlineArray)
165
+ arg.col_type = ts.JsonType()
166
+ continue
167
+ if param.kind == inspect.Parameter.VAR_KEYWORD:
168
+ assert isinstance(arg, InlineDict)
169
+ arg.col_type = ts.JsonType()
170
+ continue
171
+
172
+ if not param_type.is_supertype_of(arg.col_type):
173
+ raise excs.Error(
174
+ f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
175
+ f'{param_type}')
176
+
177
+ def is_nos_call(self) -> bool:
178
+ return isinstance(self.fn, func.NOSFunction)
179
+
180
+ def _equals(self, other: FunctionCall) -> bool:
181
+ if self.fn != other.fn:
182
+ return False
183
+ if len(self.args) != len(other.args):
184
+ return False
185
+ for i in range(len(self.args)):
186
+ if self.args[i] != other.args[i]:
187
+ return False
188
+ if self.group_by_start_idx != other.group_by_start_idx:
189
+ return False
190
+ if self.group_by_stop_idx != other.group_by_stop_idx:
191
+ return False
192
+ if self.order_by_start_idx != other.order_by_start_idx:
193
+ return False
194
+ return True
195
+
196
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
197
+ return super()._id_attrs() + [
198
+ ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
199
+ ('args', self.args),
200
+ ('kwargs', self.kwargs),
201
+ ('group_by_start_idx', self.group_by_start_idx),
202
+ ('group_by_stop_idx', self.group_by_stop_idx),
203
+ ('order_by_start_idx', self.order_by_start_idx)
204
+ ]
205
+
206
+ def __str__(self) -> str:
207
+ return self.display_str()
208
+
209
+ def display_str(self, inline: bool = True) -> str:
210
+ if self.is_method_call:
211
+ return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
212
+ else:
213
+ fn_name = self.fn.display_name if self.fn.display_name != '' else 'anonymous_fn'
214
+ return f'{fn_name}({self._print_args()})'
215
+
216
+ def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
217
+ arg_strs = [
218
+ str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
219
+ ]
220
+ def print_arg(arg: Any) -> str:
221
+ return f"'{arg}'" if isinstance(arg, str) else str(arg)
222
+ arg_strs.extend([
223
+ f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
224
+ for param_name, (idx, arg) in self.kwargs.items()
225
+ ])
226
+ if len(self.order_by) > 0:
227
+ if self.fn.requires_order_by:
228
+ arg_strs.insert(0, Expr.print_list(self.order_by))
229
+ else:
230
+ arg_strs.append(f'order_by={Expr.print_list(self.order_by)}')
231
+ if len(self.group_by) > 0:
232
+ arg_strs.append(f'group_by={Expr.print_list(self.group_by)}')
233
+ # TODO: figure out the function name
234
+ separator = ', ' if inline else ',\n '
235
+ return separator.join(arg_strs)
236
+
237
+ def has_group_by(self) -> List[Expr]:
238
+ return self.group_by_stop_idx != 0
239
+
240
+ @property
241
+ def group_by(self) -> List[Expr]:
242
+ return self.components[self.group_by_start_idx:self.group_by_stop_idx]
243
+
244
+ @property
245
+ def order_by(self) -> List[Expr]:
246
+ return self.components[self.order_by_start_idx:]
247
+
248
+ @property
249
+ def is_window_fn_call(self) -> bool:
250
+ return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and \
251
+ (not self.fn.allows_std_agg \
252
+ or self.has_group_by() \
253
+ or (len(self.order_by) > 0 and not self.fn.requires_order_by))
254
+
255
+ def get_window_sort_exprs(self) -> Tuple[List[Expr], List[Expr]]:
256
+ return self.group_by, self.order_by
257
+
258
+ @property
259
+ def is_agg_fn_call(self) -> bool:
260
+ return isinstance(self.fn, func.AggregateFunction)
261
+
262
+ def get_agg_order_by(self) -> List[Expr]:
263
+ assert self.is_agg_fn_call
264
+ return self.order_by
265
+
266
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
267
+ # TODO: implement for standard aggregate functions
268
+ return None
269
+
270
+ def reset_agg(self) -> None:
271
+ """
272
+ Init agg state
273
+ """
274
+ assert self.is_agg_fn_call
275
+ assert isinstance(self.fn, func.AggregateFunction)
276
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
277
+
278
+ def update(self, data_row: DataRow) -> None:
279
+ """
280
+ Update agg state
281
+ """
282
+ assert self.is_agg_fn_call
283
+ args, kwargs = self._make_args(data_row)
284
+ self.aggregator.update(*args, **kwargs)
285
+
286
+ def _make_args(self, data_row: DataRow) -> Tuple[List[Any], Dict[str, Any]]:
287
+ """Return args and kwargs, constructed for data_row"""
288
+ kwargs: Dict[str, Any] = {}
289
+ for param_name, (component_idx, arg) in self.kwargs.items():
290
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
291
+ param = self.fn.signature.parameters[param_name]
292
+ if param.kind == inspect.Parameter.VAR_KEYWORD:
293
+ # expand **kwargs parameter
294
+ kwargs.update(val)
295
+ else:
296
+ assert param.kind != inspect.Parameter.VAR_POSITIONAL
297
+ kwargs[param_name] = val
298
+
299
+ args: List[Any] = []
300
+ for param_idx, (component_idx, arg) in enumerate(self.args):
301
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
302
+ param = self.fn.signature.parameters_by_pos[param_idx]
303
+ if param.kind == inspect.Parameter.VAR_POSITIONAL:
304
+ # expand *args parameter
305
+ assert isinstance(val, list)
306
+ args.extend(val)
307
+ elif param.kind == inspect.Parameter.VAR_KEYWORD:
308
+ # expand **kwargs parameter
309
+ assert isinstance(val, dict)
310
+ kwargs.update(val)
311
+ else:
312
+ args.append(val)
313
+ return args, kwargs
314
+
315
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
316
+ args, kwargs = self._make_args(data_row)
317
+ signature = self.fn.signature
318
+ if signature.parameters is not None:
319
+ # check for nulls
320
+ for i in range(len(self.arg_types)):
321
+ if args[i] is None and not self.arg_types[i].nullable:
322
+ # we can't evaluate this function
323
+ data_row[self.slot_idx] = None
324
+ return
325
+ for param_name, param_type in self.kwarg_types.items():
326
+ if kwargs[param_name] is None and not param_type.nullable:
327
+ # we can't evaluate this function
328
+ data_row[self.slot_idx] = None
329
+ return
330
+
331
+ if isinstance(self.fn, func.ExprTemplateFunction):
332
+ # we need to evaluate the template
333
+ # TODO: can we get rid of this extra copy?
334
+ fn_expr = self.components[self.fn_expr_idx]
335
+ data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
336
+ elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
337
+ # optimization: avoid additional level of indirection we'd get from calling Function.exec()
338
+ data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
339
+ elif self.is_window_fn_call:
340
+ if self.has_group_by():
341
+ if self.current_partition_vals is None:
342
+ self.current_partition_vals = [None] * len(self.group_by)
343
+ partition_vals = [data_row[e.slot_idx] for e in self.group_by]
344
+ if partition_vals != self.current_partition_vals:
345
+ # new partition
346
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
347
+ self.current_partition_vals = partition_vals
348
+ elif self.aggregator is None:
349
+ self.aggregator = self.fn.agg_cls(**self.agg_init_args)
350
+ self.aggregator.update(*args)
351
+ data_row[self.slot_idx] = self.aggregator.value()
352
+ elif self.is_agg_fn_call:
353
+ data_row[self.slot_idx] = self.aggregator.value()
354
+ else:
355
+ data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
356
+
357
+ def _as_dict(self) -> Dict:
358
+ result = {
359
+ 'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
360
+ 'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
361
+ 'order_by_start_idx': self.order_by_start_idx,
362
+ **super()._as_dict()
363
+ }
364
+ return result
365
+
366
+ @classmethod
367
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
368
+ assert 'fn' in d
369
+ assert 'args' in d
370
+ assert 'kwargs' in d
371
+ # reassemble bound args
372
+ fn = func.Function.from_dict(d['fn'])
373
+ param_names = list(fn.signature.parameters.keys())
374
+ bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
375
+ bound_args.update(
376
+ {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
377
+ group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
378
+ order_by_exprs = components[d['order_by_start_idx']:]
379
+ fn_call = cls(
380
+ func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
381
+ order_by_clause=order_by_exprs)
382
+ return fn_call
@@ -0,0 +1,69 @@
1
+ import datetime
2
+ from typing import Union
3
+ import enum
4
+
5
+
6
+ # Python types corresponding to our literal types
7
+ LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
8
+
9
+ def print_slice(s: slice) -> str:
10
+ start_str = f'{str(s.start) if s.start is not None else ""}'
11
+ stop_str = f'{str(s.stop) if s.stop is not None else ""}'
12
+ step_str = f'{str(s.step) if s.step is not None else ""}'
13
+ return f'{start_str}:{stop_str}{":" if s.step is not None else ""}{step_str}'
14
+
15
+
16
+ class ComparisonOperator(enum.Enum):
17
+ LT = 0
18
+ LE = 1
19
+ EQ = 2
20
+ NE = 3
21
+ GT = 4
22
+ GE = 5
23
+
24
+ def __str__(self) -> str:
25
+ if self == self.LT:
26
+ return '<'
27
+ if self == self.LE:
28
+ return '<='
29
+ if self == self.EQ:
30
+ return '=='
31
+ if self == self.GT:
32
+ return '>'
33
+ if self == self.GE:
34
+ return '>='
35
+
36
+
37
+ class LogicalOperator(enum.Enum):
38
+ AND = 0
39
+ OR = 1
40
+ NOT = 2
41
+
42
+ def __str__(self) -> str:
43
+ if self == self.AND:
44
+ return '&'
45
+ if self == self.OR:
46
+ return '|'
47
+ if self == self.NOT:
48
+ return '~'
49
+
50
+
51
+ class ArithmeticOperator(enum.Enum):
52
+ ADD = 0
53
+ SUB = 1
54
+ MUL = 2
55
+ DIV = 3
56
+ MOD = 4
57
+
58
+ def __str__(self) -> str:
59
+ if self == self.ADD:
60
+ return '+'
61
+ if self == self.SUB:
62
+ return '-'
63
+ if self == self.MUL:
64
+ return '*'
65
+ if self == self.DIV:
66
+ return '/'
67
+ if self == self.MOD:
68
+ return '%'
69
+
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple
4
+
5
+ import PIL
6
+ import sqlalchemy as sql
7
+
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.func as func
10
+ import pixeltable.type_system as ts
11
+ from .data_row import DataRow
12
+ from .expr import Expr
13
+ from .function_call import FunctionCall
14
+ from .row_builder import RowBuilder
15
+
16
+
17
+ # TODO: this doesn't dig up all attrs for actual jpeg images
18
+ def _create_pil_attr_info() -> Dict[str, ts.ColumnType]:
19
+ # create random Image to inspect for attrs
20
+ img = PIL.Image.new('RGB', (100, 100))
21
+ # we're only interested in public attrs (including properties)
22
+ result: Dict[str, ts.ColumnType] = {}
23
+ for name in [name for name in dir(img) if not callable(getattr(img, name)) and not name.startswith('_')]:
24
+ if getattr(img, name) is None:
25
+ continue
26
+ if isinstance(getattr(img, name), str):
27
+ result[name] = ts.StringType()
28
+ if isinstance(getattr(img, name), int):
29
+ result[name] = ts.IntType()
30
+ if getattr(img, name) is dict:
31
+ result[name] = ts.JsonType()
32
+ return result
33
+
34
+
35
+ class ImageMemberAccess(Expr):
36
+ """
37
+ Access of either an attribute or function member of PIL.Image.Image.
38
+ Ex.: tbl.img_col_ref.rotate(90), tbl.img_col_ref.width
39
+ TODO: remove this class and use FunctionCall instead (attributes to be replaced by functions)
40
+ """
41
+ attr_info = _create_pil_attr_info()
42
+
43
+ def __init__(self, member_name: str, caller: Expr):
44
+ if member_name in self.attr_info:
45
+ super().__init__(self.attr_info[member_name])
46
+ else:
47
+ candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
48
+ if len(candidates) == 0:
49
+ raise excs.Error(f'Unknown Image member: {member_name}')
50
+ if len(candidates) > 1:
51
+ raise excs.Error(f'Ambiguous Image method: {member_name}')
52
+ self.img_method = candidates[0]
53
+ super().__init__(ts.InvalidType()) # requires FunctionCall to return value
54
+ self.member_name = member_name
55
+ self.components = [caller]
56
+ self.id = self._create_id()
57
+
58
+ def default_column_name(self) -> Optional[str]:
59
+ return self.member_name.replace('.', '_')
60
+
61
+ @property
62
+ def _caller(self) -> Expr:
63
+ return self.components[0]
64
+
65
+ def __str__(self) -> str:
66
+ return f'{self._caller}.{self.member_name}'
67
+
68
+ def _as_dict(self) -> Dict:
69
+ return {'member_name': self.member_name, **super()._as_dict()}
70
+
71
+ @classmethod
72
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
73
+ assert 'member_name' in d
74
+ assert len(components) == 1
75
+ return cls(d['member_name'], components[0])
76
+
77
+ def __call__(self, *args, **kwargs) -> FunctionCall:
78
+ result = self.img_method(*[self._caller, *args], **kwargs)
79
+ result.is_method_call = True
80
+ return result
81
+
82
+ def _equals(self, other: ImageMemberAccess) -> bool:
83
+ return self.member_name == other.member_name
84
+
85
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
86
+ return super()._id_attrs() + [('member_name', self.member_name)]
87
+
88
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
89
+ return None
90
+
91
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
92
+ caller_val = data_row[self._caller.slot_idx]
93
+ try:
94
+ data_row[self.slot_idx] = getattr(caller_val, self.member_name)
95
+ except AttributeError:
96
+ data_row[self.slot_idx] = None
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple, Iterable
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable.exceptions as excs
8
+ from .data_row import DataRow
9
+ from .expr import Expr
10
+ from .predicate import Predicate
11
+ from .row_builder import RowBuilder
12
+
13
+
14
+ class InPredicate(Predicate):
15
+ """Predicate corresponding to the SQL IN operator."""
16
+
17
+ def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
18
+ assert (value_set_literal is None) != (value_set_expr is None)
19
+ if not lhs.col_type.is_scalar_type():
20
+ raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
21
+ super().__init__()
22
+
23
+ self.value_list: Optional[list] = None # only contains values of the correct type
24
+ if value_set_expr is not None:
25
+ if not value_set_expr.col_type.is_json_type():
26
+ raise excs.Error(
27
+ f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
28
+ self.components = [lhs.copy(), value_set_expr.copy()]
29
+ else:
30
+ assert value_set_literal is not None
31
+ self.components = [lhs.copy()]
32
+ self.value_list = self._normalize_value_set(value_set_literal)
33
+
34
+ self.id = self._create_id()
35
+
36
+ @property
37
+ def _lhs(self) -> Expr:
38
+ return self.components[0]
39
+
40
+ @property
41
+ def _value_set_expr(self) -> Expr:
42
+ assert len(self.components) == 2
43
+ return self.components[1]
44
+
45
+ def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
46
+ if not isinstance(value_set, Iterable):
47
+ raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
48
+ value_list = list(value_set)
49
+ if not filter_type_mismatches:
50
+ return value_list
51
+
52
+ # ignore elements of the wrong type
53
+ result = []
54
+ for val in value_list:
55
+ try:
56
+ self._lhs.col_type.validate_literal(val)
57
+ result.append(val)
58
+ except TypeError:
59
+ pass
60
+ return result
61
+
62
+ def __str__(self) -> str:
63
+ if self.value_list is not None:
64
+ return f'{self.components[0]}.isin({self.value_list})'
65
+ return f'{self.components[0]}.isin({self.components[1]})'
66
+
67
+ def _equals(self, other: InPredicate) -> bool:
68
+ return self.value_list == other.value_list
69
+
70
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
71
+ return super()._id_attrs() + [('value_list', self.value_list)]
72
+
73
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
74
+ lhs_sql_exprs = self.components[0].sql_expr()
75
+ if lhs_sql_exprs is None or self.value_list is None:
76
+ return None
77
+ return lhs_sql_exprs.in_(self.value_list)
78
+
79
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
80
+ lhs_val = data_row[self._lhs.slot_idx]
81
+ if self.value_list is not None:
82
+ data_row[self.slot_idx] = lhs_val in self.value_list
83
+ else:
84
+ value_set = data_row[self._value_set_expr.slot_idx]
85
+ value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
86
+ data_row[self.slot_idx] = lhs_val in value_list
87
+
88
+ def _as_dict(self) -> Dict:
89
+ return {'value_list': self.value_list, **super()._as_dict()}
90
+
91
+ @classmethod
92
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
93
+ assert 'value_list' in d
94
+ assert len(components) <= 2
95
+ return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
96
+