pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
@@ -6,9 +6,10 @@ from .comparison import Comparison
6
6
  from .compound_predicate import CompoundPredicate
7
7
  from .data_row import DataRow
8
8
  from .expr import Expr
9
+ from .expr_set import ExprSet
9
10
  from .function_call import FunctionCall
10
11
  from .image_member_access import ImageMemberAccess
11
- from .image_similarity_predicate import ImageSimilarityPredicate
12
+ from .in_predicate import InPredicate
12
13
  from .inline_array import InlineArray
13
14
  from .inline_dict import InlineDict
14
15
  from .is_null import IsNull
@@ -16,9 +17,9 @@ from .json_mapper import JsonMapper
16
17
  from .json_path import RELATIVE_PATH_ROOT, JsonPath
17
18
  from .literal import Literal
18
19
  from .object_ref import ObjectRef
19
- from .variable import Variable
20
20
  from .predicate import Predicate
21
21
  from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
22
22
  from .rowid_ref import RowidRef
23
- from .expr_set import ExprSet
23
+ from .similarity_expr import SimilarityExpr
24
24
  from .type_cast import TypeCast
25
+ from .variable import Variable
@@ -63,6 +63,15 @@ class ColumnRef(Expr):
63
63
 
64
64
  return super().__getattr__(name)
65
65
 
66
+ def similarity(self, other: Any) -> Expr:
67
+ if isinstance(other, Expr):
68
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
+ item = Expr.from_object(other)
70
+ if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
71
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
72
+ from .similarity_expr import SimilarityExpr
73
+ return SimilarityExpr(self, item)
74
+
66
75
  def default_column_name(self) -> Optional[str]:
67
76
  return str(self)
68
77
 
@@ -99,7 +108,7 @@ class ColumnRef(Expr):
99
108
  def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
100
109
  tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
101
110
  tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
102
- assert col_id in tbl_version.cols_by_id
103
- col = tbl_version.cols_by_id[col_id]
111
+ # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
112
+ col = next(col for col in tbl_version.cols if col.id == col_id)
104
113
  return cls(col)
105
114
 
@@ -4,18 +4,44 @@ from typing import Optional, List, Any, Dict, Tuple
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
+ from .column_ref import ColumnRef
7
8
  from .data_row import DataRow
8
9
  from .expr import Expr
9
10
  from .globals import ComparisonOperator
11
+ from .literal import Literal
10
12
  from .predicate import Predicate
11
13
  from .row_builder import RowBuilder
14
+ import pixeltable.exceptions as excs
15
+ import pixeltable.index as index
12
16
 
13
17
 
14
18
  class Comparison(Predicate):
15
19
  def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
16
20
  super().__init__()
17
21
  self.operator = operator
18
- self.components = [op1, op2]
22
+
23
+ # if this is a comparison of a column to a literal (ie, could be used as a search argument in an index lookup),
24
+ # normalize it to <column> <operator> <literal>.
25
+ if isinstance(op1, ColumnRef) and isinstance(op2, Literal):
26
+ self.is_search_arg_comparison = True
27
+ self.components = [op1, op2]
28
+ elif isinstance(op1, Literal) and isinstance(op2, ColumnRef):
29
+ self.is_search_arg_comparison = True
30
+ self.components = [op2, op1]
31
+ self.operator = self.operator.reverse()
32
+ else:
33
+ self.is_search_arg_comparison = False
34
+ self.components = [op1, op2]
35
+
36
+ import pixeltable.index as index
37
+ if self.is_search_arg_comparison and self._op2.col_type.is_string_type() \
38
+ and len(self._op2.val) >= index.BtreeIndex.MAX_STRING_LEN:
39
+ # we can't use an index for this after all
40
+ raise excs.Error(
41
+ f'String literal too long for comparison against indexed column {self._op1.col.name!r} '
42
+ f'(max length is {index.BtreeIndex.MAX_STRING_LEN - 1})'
43
+ )
44
+
19
45
  self.id = self._create_id()
20
46
 
21
47
  def __str__(self) -> str:
@@ -37,6 +63,18 @@ class Comparison(Predicate):
37
63
 
38
64
  def sql_expr(self) -> Optional[sql.ClauseElement]:
39
65
  left = self._op1.sql_expr()
66
+ if self.is_search_arg_comparison:
67
+ # reference the index value column if there is an index and this is not a snapshot
68
+ # (indices don't apply to snapshots)
69
+ tbl = self._op1.col.tbl
70
+ idx_info = [
71
+ info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
72
+ ]
73
+ if len(idx_info) > 0 and not tbl.is_snapshot:
74
+ # there shouldn't be multiple B-tree indices on a column
75
+ assert len(idx_info) == 1
76
+ left = idx_info[0].val_col.sa_col
77
+
40
78
  right = self._op2.sql_expr()
41
79
  if left is None or right is None:
42
80
  return None
@@ -197,3 +197,10 @@ class DataRow:
197
197
  pass
198
198
  self.vals[index] = None
199
199
 
200
+ @property
201
+ def rowid(self) -> Tuple[int]:
202
+ return self.pk[:-1]
203
+
204
+ @property
205
+ def v_min(self) -> int:
206
+ return self.pk[-1]
pixeltable/exprs/expr.py CHANGED
@@ -60,9 +60,9 @@ class Expr(abc.ABC):
60
60
 
61
61
  # index of the expr's value in the data row:
62
62
  # - set for all materialized exprs
63
- # - -1: not executable
63
+ # - None: not executable
64
64
  # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
65
- self.slot_idx = -1
65
+ self.slot_idx: Optional[int] = None
66
66
  self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
67
67
 
68
68
  def dependencies(self) -> List[Expr]:
@@ -110,6 +110,11 @@ class Expr(abc.ABC):
110
110
  return False
111
111
  return self._equals(other)
112
112
 
113
+ def _equals(self, other: Expr) -> bool:
114
+ # we already compared the type and components in equals(); subclasses that require additional comparisons
115
+ # override this
116
+ return True
117
+
113
118
  def _id_attrs(self) -> List[Tuple[str, Any]]:
114
119
  """Returns attribute name/value pairs that are used to construct the instance id.
115
120
 
@@ -148,7 +153,7 @@ class Expr(abc.ABC):
148
153
  cls = self.__class__
149
154
  result = cls.__new__(cls)
150
155
  result.__dict__.update(self.__dict__)
151
- result.slot_idx = -1
156
+ result.slot_idx = None
152
157
  result.components = [c.copy() for c in self.components]
153
158
  return result
154
159
 
@@ -164,16 +169,22 @@ class Expr(abc.ABC):
164
169
  memo[id(self)] = result
165
170
  return result
166
171
 
167
- def substitute(self, old: Expr, new: Expr) -> Expr:
172
+ def substitute(self, spec: dict[Expr, Expr]) -> Expr:
168
173
  """
169
174
  Replace 'old' with 'new' recursively.
170
175
  """
171
- if self.equals(old):
172
- return new.copy()
176
+ for old, new in spec.items():
177
+ if self.equals(old):
178
+ return new.copy()
173
179
  for i in range(len(self.components)):
174
- self.components[i] = self.components[i].substitute(old, new)
180
+ self.components[i] = self.components[i].substitute(spec)
175
181
  return self
176
182
 
183
+ @classmethod
184
+ def list_substitute(cls, expr_list: List[Expr], spec: dict[Expr, Expr]) -> None:
185
+ for i in range(len(expr_list)):
186
+ expr_list[i] = expr_list[i].substitute(spec)
187
+
177
188
  def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
178
189
  """
179
190
  Recursively replace ColRefs to unstored computed columns with their value exprs.
@@ -191,9 +202,7 @@ class Expr(abc.ABC):
191
202
  ])
192
203
  if len(target_col_refs) == 0:
193
204
  return result
194
- for ref in target_col_refs:
195
- assert ref.col.value_expr is not None
196
- result = result.substitute(ref, ref.col.value_expr)
205
+ result = result.substitute({ref: ref.col.value_expr for ref in target_col_refs})
197
206
 
198
207
  def is_bound_by(self, tbl: catalog.TableVersionPath) -> bool:
199
208
  """Returns True if this expr can be evaluated in the context of tbl."""
@@ -220,11 +229,6 @@ class Expr(abc.ABC):
220
229
  self.components[i] = self.components[i]._retarget(tbl_versions)
221
230
  return self
222
231
 
223
- @classmethod
224
- def list_substitute(cls, expr_list: List[Expr], old: Expr, new: Expr) -> None:
225
- for i in range(len(expr_list)):
226
- expr_list[i] = expr_list[i].substitute(old, new)
227
-
228
232
  @abc.abstractmethod
229
233
  def __str__(self) -> str:
230
234
  pass
@@ -313,10 +317,6 @@ class Expr(abc.ABC):
313
317
  return InlineArray(tuple(o))
314
318
  return None
315
319
 
316
- @abc.abstractmethod
317
- def _equals(self, other: Expr) -> bool:
318
- pass
319
-
320
320
  @abc.abstractmethod
321
321
  def sql_expr(self) -> Optional[sql.ClauseElement]:
322
322
  """
@@ -396,6 +396,13 @@ class Expr(abc.ABC):
396
396
  def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
397
397
  assert False, 'not implemented'
398
398
 
399
+ def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
400
+ from .in_predicate import InPredicate
401
+ if isinstance(value_set, Expr):
402
+ return InPredicate(self, value_set_expr=value_set)
403
+ else:
404
+ return InPredicate(self, value_set_literal=value_set)
405
+
399
406
  def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
400
407
  from pixeltable.exprs import TypeCast
401
408
  return TypeCast(self, new_type)
@@ -28,7 +28,7 @@ class FunctionCall(Expr):
28
28
  if group_by_clause is None:
29
29
  group_by_clause = []
30
30
  signature = fn.signature
31
- super().__init__(signature.get_return_type(bound_args))
31
+ super().__init__(fn.call_return_type(bound_args))
32
32
  self.fn = fn
33
33
  self.is_method_call = is_method_call
34
34
  self.check_args(signature, bound_args)
@@ -46,9 +46,9 @@ class FunctionCall(Expr):
46
46
 
47
47
  # Tuple[int, Any]:
48
48
  # - for Exprs: (index into components, None)
49
- # - otherwise: (-1, val)
50
- self.args: List[Tuple[int, Any]] = []
51
- self.kwargs: Dict[str, Tuple[int, Any]] = {}
49
+ # - otherwise: (None, val)
50
+ self.args: List[Tuple[Optional[int], Optional[Any]]] = []
51
+ self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
52
52
 
53
53
  # we record the types of non-variable parameters for runtime type checks
54
54
  self.arg_types: List[ts.ColumnType] = []
@@ -62,7 +62,7 @@ class FunctionCall(Expr):
62
62
  self.args.append((len(self.components), None))
63
63
  self.components.append(arg.copy())
64
64
  else:
65
- self.args.append((-1, arg))
65
+ self.args.append((None, arg))
66
66
  if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
67
67
  self.arg_types.append(signature.parameters[param.name].col_type)
68
68
 
@@ -74,7 +74,7 @@ class FunctionCall(Expr):
74
74
  self.kwargs[param_name] = (len(self.components), None)
75
75
  self.components.append(arg.copy())
76
76
  else:
77
- self.kwargs[param_name] = (-1, arg)
77
+ self.kwargs[param_name] = (None, arg)
78
78
  if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
79
79
  self.kwarg_types[param_name] = signature.parameters[param_name].col_type
80
80
 
@@ -174,9 +174,6 @@ class FunctionCall(Expr):
174
174
  f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
175
175
  f'{param_type}')
176
176
 
177
- def is_nos_call(self) -> bool:
178
- return isinstance(self.fn, func.NOSFunction)
179
-
180
177
  def _equals(self, other: FunctionCall) -> bool:
181
178
  if self.fn != other.fn:
182
179
  return False
@@ -215,12 +212,12 @@ class FunctionCall(Expr):
215
212
 
216
213
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
217
214
  arg_strs = [
218
- str(arg) if idx == -1 else str(self.components[idx]) for idx, arg in self.args[start_idx:]
215
+ str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
219
216
  ]
220
217
  def print_arg(arg: Any) -> str:
221
218
  return f"'{arg}'" if isinstance(arg, str) else str(arg)
222
219
  arg_strs.extend([
223
- f'{param_name}={print_arg(arg) if idx == -1 else str(self.components[idx])}'
220
+ f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
224
221
  for param_name, (idx, arg) in self.kwargs.items()
225
222
  ])
226
223
  if len(self.order_by) > 0:
@@ -287,7 +284,7 @@ class FunctionCall(Expr):
287
284
  """Return args and kwargs, constructed for data_row"""
288
285
  kwargs: Dict[str, Any] = {}
289
286
  for param_name, (component_idx, arg) in self.kwargs.items():
290
- val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
287
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
291
288
  param = self.fn.signature.parameters[param_name]
292
289
  if param.kind == inspect.Parameter.VAR_KEYWORD:
293
290
  # expand **kwargs parameter
@@ -298,7 +295,7 @@ class FunctionCall(Expr):
298
295
 
299
296
  args: List[Any] = []
300
297
  for param_idx, (component_idx, arg) in enumerate(self.args):
301
- val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
298
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
302
299
  param = self.fn.signature.parameters_by_pos[param_idx]
303
300
  if param.kind == inspect.Parameter.VAR_POSITIONAL:
304
301
  # expand *args parameter
@@ -333,7 +330,8 @@ class FunctionCall(Expr):
333
330
  # TODO: can we get rid of this extra copy?
334
331
  fn_expr = self.components[self.fn_expr_idx]
335
332
  data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
336
- elif isinstance(self.fn, func.CallableFunction):
333
+ elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
334
+ # optimization: avoid additional level of indirection we'd get from calling Function.exec()
337
335
  data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
338
336
  elif self.is_window_fn_call:
339
337
  if self.has_group_by():
@@ -348,9 +346,10 @@ class FunctionCall(Expr):
348
346
  self.aggregator = self.fn.agg_cls(**self.agg_init_args)
349
347
  self.aggregator.update(*args)
350
348
  data_row[self.slot_idx] = self.aggregator.value()
351
- else:
352
- assert self.is_agg_fn_call
349
+ elif self.is_agg_fn_call:
353
350
  data_row[self.slot_idx] = self.aggregator.value()
351
+ else:
352
+ data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
354
353
 
355
354
  def _as_dict(self) -> Dict:
356
355
  result = {
@@ -369,9 +368,9 @@ class FunctionCall(Expr):
369
368
  # reassemble bound args
370
369
  fn = func.Function.from_dict(d['fn'])
371
370
  param_names = list(fn.signature.parameters.keys())
372
- bound_args = {param_names[i]: arg if idx == -1 else components[idx] for i, (idx, arg) in enumerate(d['args'])}
371
+ bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
373
372
  bound_args.update(
374
- {param_name: val if idx == -1 else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
373
+ {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
375
374
  group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
376
375
  order_by_exprs = components[d['order_by_start_idx']:]
377
376
  fn_call = cls(
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import datetime
2
- from typing import Union
3
4
  import enum
4
-
5
+ from typing import Union
5
6
 
6
7
  # Python types corresponding to our literal types
7
8
  LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
@@ -33,6 +34,17 @@ class ComparisonOperator(enum.Enum):
33
34
  if self == self.GE:
34
35
  return '>='
35
36
 
37
+ def reverse(self) -> ComparisonOperator:
38
+ if self == self.LT:
39
+ return self.GT
40
+ if self == self.LE:
41
+ return self.GE
42
+ if self == self.GT:
43
+ return self.LT
44
+ if self == self.GE:
45
+ return self.LE
46
+ return self
47
+
36
48
 
37
49
  class LogicalOperator(enum.Enum):
38
50
  AND = 0
@@ -1,19 +1,17 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple, Union
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple
3
4
 
4
5
  import PIL
5
6
  import sqlalchemy as sql
6
7
 
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.func as func
10
+ import pixeltable.type_system as ts
11
+ from .data_row import DataRow
7
12
  from .expr import Expr
8
- from .column_ref import ColumnRef
9
13
  from .function_call import FunctionCall
10
- from .image_similarity_predicate import ImageSimilarityPredicate
11
- from .data_row import DataRow
12
14
  from .row_builder import RowBuilder
13
- import pixeltable.catalog as catalog
14
- import pixeltable.func as func
15
- import pixeltable.exceptions as excs
16
- import pixeltable.type_system as ts
17
15
 
18
16
 
19
17
  # TODO: this doesn't dig up all attrs for actual jpeg images
@@ -43,9 +41,7 @@ class ImageMemberAccess(Expr):
43
41
  attr_info = _create_pil_attr_info()
44
42
 
45
43
  def __init__(self, member_name: str, caller: Expr):
46
- if member_name == 'nearest':
47
- super().__init__(ts.InvalidType()) # requires FunctionCall to return value
48
- elif member_name in self.attr_info:
44
+ if member_name in self.attr_info:
49
45
  super().__init__(self.attr_info[member_name])
50
46
  else:
51
47
  candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
@@ -78,22 +74,8 @@ class ImageMemberAccess(Expr):
78
74
  assert len(components) == 1
79
75
  return cls(d['member_name'], components[0])
80
76
 
81
- def __call__(self, *args, **kwargs) -> Union[FunctionCall, ImageSimilarityPredicate]:
82
- caller = self._caller
83
- call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
84
- if self.member_name == 'nearest':
85
- # - caller must be ColumnRef
86
- # - signature is (Union[PIL.Image.Image, str])
87
- if not isinstance(caller, ColumnRef):
88
- raise excs.Error(f'nearest(): caller must be an image column')
89
- if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
90
- raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
91
- return ImageSimilarityPredicate(
92
- caller,
93
- img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
94
- text=args[0] if isinstance(args[0], str) else None)
95
-
96
- result = self.img_method(*[caller, *args], **kwargs)
77
+ def __call__(self, *args, **kwargs) -> FunctionCall:
78
+ result = self.img_method(*[self._caller, *args], **kwargs)
97
79
  result.is_method_call = True
98
80
  return result
99
81
 
@@ -112,4 +94,3 @@ class ImageMemberAccess(Expr):
112
94
  data_row[self.slot_idx] = getattr(caller_val, self.member_name)
113
95
  except AttributeError:
114
96
  data_row[self.slot_idx] = None
115
-
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple, Iterable
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable.exceptions as excs
8
+ from .data_row import DataRow
9
+ from .expr import Expr
10
+ from .predicate import Predicate
11
+ from .row_builder import RowBuilder
12
+
13
+
14
+ class InPredicate(Predicate):
15
+ """Predicate corresponding to the SQL IN operator."""
16
+
17
+ def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
18
+ assert (value_set_literal is None) != (value_set_expr is None)
19
+ if not lhs.col_type.is_scalar_type():
20
+ raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
21
+ super().__init__()
22
+
23
+ self.value_list: Optional[list] = None # only contains values of the correct type
24
+ if value_set_expr is not None:
25
+ if not value_set_expr.col_type.is_json_type():
26
+ raise excs.Error(
27
+ f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
28
+ self.components = [lhs.copy(), value_set_expr.copy()]
29
+ else:
30
+ assert value_set_literal is not None
31
+ self.components = [lhs.copy()]
32
+ self.value_list = self._normalize_value_set(value_set_literal)
33
+
34
+ self.id = self._create_id()
35
+
36
+ @property
37
+ def _lhs(self) -> Expr:
38
+ return self.components[0]
39
+
40
+ @property
41
+ def _value_set_expr(self) -> Expr:
42
+ assert len(self.components) == 2
43
+ return self.components[1]
44
+
45
+ def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
46
+ if not isinstance(value_set, Iterable):
47
+ raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
48
+ value_list = list(value_set)
49
+ if not filter_type_mismatches:
50
+ return value_list
51
+
52
+ # ignore elements of the wrong type
53
+ result = []
54
+ for val in value_list:
55
+ try:
56
+ self._lhs.col_type.validate_literal(val)
57
+ result.append(val)
58
+ except TypeError:
59
+ pass
60
+ return result
61
+
62
+ def __str__(self) -> str:
63
+ if self.value_list is not None:
64
+ return f'{self.components[0]}.isin({self.value_list})'
65
+ return f'{self.components[0]}.isin({self.components[1]})'
66
+
67
+ def _equals(self, other: InPredicate) -> bool:
68
+ return self.value_list == other.value_list
69
+
70
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
71
+ return super()._id_attrs() + [('value_list', self.value_list)]
72
+
73
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
74
+ lhs_sql_exprs = self.components[0].sql_expr()
75
+ if lhs_sql_exprs is None or self.value_list is None:
76
+ return None
77
+ return lhs_sql_exprs.in_(self.value_list)
78
+
79
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
80
+ lhs_val = data_row[self._lhs.slot_idx]
81
+ if self.value_list is not None:
82
+ data_row[self.slot_idx] = lhs_val in self.value_list
83
+ else:
84
+ value_set = data_row[self._value_set_expr.slot_idx]
85
+ value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
86
+ data_row[self.slot_idx] = lhs_val in value_list
87
+
88
+ def _as_dict(self) -> Dict:
89
+ return {'value_list': self.value_list, **super()._as_dict()}
90
+
91
+ @classmethod
92
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
93
+ assert 'value_list' in d
94
+ assert len(components) <= 2
95
+ return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
96
+
@@ -1,16 +1,16 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
2
+
3
3
  import copy
4
+ from typing import Optional, List, Any, Dict, Tuple
4
5
 
5
- import sqlalchemy as sql
6
6
  import numpy as np
7
+ import sqlalchemy as sql
7
8
 
8
- from .expr import Expr
9
+ import pixeltable.type_system as ts
9
10
  from .data_row import DataRow
11
+ from .expr import Expr
10
12
  from .inline_dict import InlineDict
11
13
  from .row_builder import RowBuilder
12
- import pixeltable.catalog as catalog
13
- import pixeltable.type_system as ts
14
14
 
15
15
 
16
16
  class InlineArray(Expr):
@@ -27,8 +27,8 @@ class InlineArray(Expr):
27
27
 
28
28
  # elements contains
29
29
  # - for Expr elements: (index into components, None)
30
- # - for non-Expr elements: (-1, value)
31
- self.elements: List[Tuple[int, Any]] = []
30
+ # - for non-Expr elements: (None, value)
31
+ self.elements: List[Tuple[Optional[int], Any]] = []
32
32
  for el in elements:
33
33
  el = copy.deepcopy(el)
34
34
  if isinstance(el, list):
@@ -41,11 +41,11 @@ class InlineArray(Expr):
41
41
  self.elements.append((len(self.components), None))
42
42
  self.components.append(el)
43
43
  else:
44
- self.elements.append((-1, el))
44
+ self.elements.append((None, el))
45
45
 
46
46
  inferred_element_type = ts.InvalidType()
47
47
  for idx, val in self.elements:
48
- if idx >= 0:
48
+ if idx is not None:
49
49
  inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
50
50
  else:
51
51
  inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
@@ -83,7 +83,7 @@ class InlineArray(Expr):
83
83
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
84
84
  result = [None] * len(self.elements)
85
85
  for i, (child_idx, val) in enumerate(self.elements):
86
- if child_idx >= 0:
86
+ if child_idx is not None:
87
87
  result[i] = data_row[self.components[child_idx].slot_idx]
88
88
  else:
89
89
  result[i] = copy.deepcopy(val)
@@ -100,7 +100,9 @@ class InlineArray(Expr):
100
100
  assert 'elements' in d
101
101
  arg: List[Any] = []
102
102
  for idx, val in d['elements']:
103
- if idx >= 0:
103
+ # TODO Normalize idx -1 to None via schema migrations.
104
+ # Long-term we should not be allowing idx == -1.
105
+ if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
104
106
  arg.append(components[idx])
105
107
  else:
106
108
  arg.append(val)