pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +31 -50
  4. pixeltable/catalog/insertable_table.py +7 -6
  5. pixeltable/catalog/table.py +171 -57
  6. pixeltable/catalog/table_version.py +417 -140
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/dataframe.py +239 -121
  9. pixeltable/env.py +82 -16
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/cache_prefetch_node.py +1 -1
  12. pixeltable/exec/data_row_batch.py +6 -7
  13. pixeltable/exec/expr_eval_node.py +28 -28
  14. pixeltable/exec/in_memory_data_node.py +11 -7
  15. pixeltable/exec/sql_scan_node.py +7 -6
  16. pixeltable/exprs/__init__.py +4 -3
  17. pixeltable/exprs/column_ref.py +9 -0
  18. pixeltable/exprs/comparison.py +3 -3
  19. pixeltable/exprs/data_row.py +5 -1
  20. pixeltable/exprs/expr.py +15 -7
  21. pixeltable/exprs/function_call.py +17 -15
  22. pixeltable/exprs/image_member_access.py +9 -28
  23. pixeltable/exprs/in_predicate.py +96 -0
  24. pixeltable/exprs/inline_array.py +13 -11
  25. pixeltable/exprs/inline_dict.py +15 -13
  26. pixeltable/exprs/literal.py +16 -4
  27. pixeltable/exprs/row_builder.py +15 -41
  28. pixeltable/exprs/similarity_expr.py +65 -0
  29. pixeltable/ext/__init__.py +5 -0
  30. pixeltable/ext/functions/yolox.py +92 -0
  31. pixeltable/func/__init__.py +0 -2
  32. pixeltable/func/aggregate_function.py +18 -15
  33. pixeltable/func/callable_function.py +57 -13
  34. pixeltable/func/expr_template_function.py +20 -3
  35. pixeltable/func/function.py +35 -4
  36. pixeltable/func/globals.py +24 -14
  37. pixeltable/func/signature.py +23 -27
  38. pixeltable/func/udf.py +13 -12
  39. pixeltable/functions/__init__.py +8 -8
  40. pixeltable/functions/eval.py +7 -8
  41. pixeltable/functions/huggingface.py +64 -17
  42. pixeltable/functions/openai.py +36 -3
  43. pixeltable/functions/pil/image.py +61 -64
  44. pixeltable/functions/together.py +21 -0
  45. pixeltable/functions/util.py +11 -0
  46. pixeltable/globals.py +425 -0
  47. pixeltable/index/__init__.py +2 -0
  48. pixeltable/index/base.py +51 -0
  49. pixeltable/index/embedding_index.py +168 -0
  50. pixeltable/io/__init__.py +3 -0
  51. pixeltable/{utils → io}/hf_datasets.py +48 -17
  52. pixeltable/io/pandas.py +148 -0
  53. pixeltable/{utils → io}/parquet.py +58 -33
  54. pixeltable/iterators/__init__.py +1 -1
  55. pixeltable/iterators/base.py +4 -0
  56. pixeltable/iterators/document.py +218 -97
  57. pixeltable/iterators/video.py +8 -9
  58. pixeltable/metadata/__init__.py +7 -3
  59. pixeltable/metadata/converters/convert_12.py +3 -0
  60. pixeltable/metadata/converters/convert_13.py +41 -0
  61. pixeltable/metadata/schema.py +45 -22
  62. pixeltable/plan.py +15 -51
  63. pixeltable/store.py +38 -41
  64. pixeltable/tool/create_test_db_dump.py +39 -4
  65. pixeltable/type_system.py +47 -96
  66. pixeltable/utils/documents.py +42 -12
  67. pixeltable/utils/http_server.py +70 -0
  68. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
  69. pixeltable-0.2.6.dist-info/RECORD +119 -0
  70. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  71. pixeltable/client.py +0 -604
  72. pixeltable/exprs/image_similarity_predicate.py +0 -58
  73. pixeltable/func/batched_function.py +0 -53
  74. pixeltable/tests/conftest.py +0 -177
  75. pixeltable/tests/functions/test_fireworks.py +0 -42
  76. pixeltable/tests/functions/test_functions.py +0 -60
  77. pixeltable/tests/functions/test_huggingface.py +0 -158
  78. pixeltable/tests/functions/test_openai.py +0 -152
  79. pixeltable/tests/functions/test_together.py +0 -111
  80. pixeltable/tests/test_audio.py +0 -65
  81. pixeltable/tests/test_catalog.py +0 -27
  82. pixeltable/tests/test_client.py +0 -21
  83. pixeltable/tests/test_component_view.py +0 -370
  84. pixeltable/tests/test_dataframe.py +0 -439
  85. pixeltable/tests/test_dirs.py +0 -107
  86. pixeltable/tests/test_document.py +0 -120
  87. pixeltable/tests/test_exprs.py +0 -805
  88. pixeltable/tests/test_function.py +0 -324
  89. pixeltable/tests/test_migration.py +0 -43
  90. pixeltable/tests/test_nos.py +0 -54
  91. pixeltable/tests/test_snapshot.py +0 -208
  92. pixeltable/tests/test_table.py +0 -1267
  93. pixeltable/tests/test_transactional_directory.py +0 -42
  94. pixeltable/tests/test_types.py +0 -22
  95. pixeltable/tests/test_video.py +0 -159
  96. pixeltable/tests/test_view.py +0 -530
  97. pixeltable/tests/utils.py +0 -408
  98. pixeltable-0.2.4.dist-info/RECORD +0 -132
  99. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -28,7 +28,7 @@ class FunctionCall(Expr):
28
28
  if group_by_clause is None:
29
29
  group_by_clause = []
30
30
  signature = fn.signature
31
- super().__init__(signature.get_return_type(bound_args))
31
+ super().__init__(fn.call_return_type(bound_args))
32
32
  self.fn = fn
33
33
  self.is_method_call = is_method_call
34
34
  self.check_args(signature, bound_args)
@@ -46,9 +46,9 @@ class FunctionCall(Expr):
46
46
 
47
47
  # Tuple[int, Any]:
48
48
  # - for Exprs: (index into components, None)
49
- # - otherwise: (-1, val)
50
- self.args: List[Tuple[int, Any]] = []
51
- self.kwargs: Dict[str, Tuple[int, Any]] = {}
49
+ # - otherwise: (None, val)
50
+ self.args: List[Tuple[Optional[int], Optional[Any]]] = []
51
+ self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
52
52
 
53
53
  # we record the types of non-variable parameters for runtime type checks
54
54
  self.arg_types: List[ts.ColumnType] = []
@@ -62,7 +62,7 @@ class FunctionCall(Expr):
62
62
  self.args.append((len(self.components), None))
63
63
  self.components.append(arg.copy())
64
64
  else:
65
- self.args.append((-1, arg))
65
+ self.args.append((None, arg))
66
66
  if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
67
67
  self.arg_types.append(signature.parameters[param.name].col_type)
68
68
 
@@ -74,7 +74,7 @@ class FunctionCall(Expr):
74
74
  self.kwargs[param_name] = (len(self.components), None)
75
75
  self.components.append(arg.copy())
76
76
  else:
77
- self.kwargs[param_name] = (-1, arg)
77
+ self.kwargs[param_name] = (None, arg)
78
78
  if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
79
79
  self.kwarg_types[param_name] = signature.parameters[param_name].col_type
80
80
 
@@ -215,12 +215,12 @@ class FunctionCall(Expr):
215
215
 
216
216
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
217
217
  arg_strs = [
218
- str(arg) if idx == -1 else str(self.components[idx]) for idx, arg in self.args[start_idx:]
218
+ str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
219
219
  ]
220
220
  def print_arg(arg: Any) -> str:
221
221
  return f"'{arg}'" if isinstance(arg, str) else str(arg)
222
222
  arg_strs.extend([
223
- f'{param_name}={print_arg(arg) if idx == -1 else str(self.components[idx])}'
223
+ f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
224
224
  for param_name, (idx, arg) in self.kwargs.items()
225
225
  ])
226
226
  if len(self.order_by) > 0:
@@ -287,7 +287,7 @@ class FunctionCall(Expr):
287
287
  """Return args and kwargs, constructed for data_row"""
288
288
  kwargs: Dict[str, Any] = {}
289
289
  for param_name, (component_idx, arg) in self.kwargs.items():
290
- val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
290
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
291
291
  param = self.fn.signature.parameters[param_name]
292
292
  if param.kind == inspect.Parameter.VAR_KEYWORD:
293
293
  # expand **kwargs parameter
@@ -298,7 +298,7 @@ class FunctionCall(Expr):
298
298
 
299
299
  args: List[Any] = []
300
300
  for param_idx, (component_idx, arg) in enumerate(self.args):
301
- val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
301
+ val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
302
302
  param = self.fn.signature.parameters_by_pos[param_idx]
303
303
  if param.kind == inspect.Parameter.VAR_POSITIONAL:
304
304
  # expand *args parameter
@@ -333,7 +333,8 @@ class FunctionCall(Expr):
333
333
  # TODO: can we get rid of this extra copy?
334
334
  fn_expr = self.components[self.fn_expr_idx]
335
335
  data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
336
- elif isinstance(self.fn, func.CallableFunction):
336
+ elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
337
+ # optimization: avoid additional level of indirection we'd get from calling Function.exec()
337
338
  data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
338
339
  elif self.is_window_fn_call:
339
340
  if self.has_group_by():
@@ -348,9 +349,10 @@ class FunctionCall(Expr):
348
349
  self.aggregator = self.fn.agg_cls(**self.agg_init_args)
349
350
  self.aggregator.update(*args)
350
351
  data_row[self.slot_idx] = self.aggregator.value()
351
- else:
352
- assert self.is_agg_fn_call
352
+ elif self.is_agg_fn_call:
353
353
  data_row[self.slot_idx] = self.aggregator.value()
354
+ else:
355
+ data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
354
356
 
355
357
  def _as_dict(self) -> Dict:
356
358
  result = {
@@ -369,9 +371,9 @@ class FunctionCall(Expr):
369
371
  # reassemble bound args
370
372
  fn = func.Function.from_dict(d['fn'])
371
373
  param_names = list(fn.signature.parameters.keys())
372
- bound_args = {param_names[i]: arg if idx == -1 else components[idx] for i, (idx, arg) in enumerate(d['args'])}
374
+ bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
373
375
  bound_args.update(
374
- {param_name: val if idx == -1 else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
376
+ {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
375
377
  group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
376
378
  order_by_exprs = components[d['order_by_start_idx']:]
377
379
  fn_call = cls(
@@ -1,19 +1,17 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple, Union
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple
3
4
 
4
5
  import PIL
5
6
  import sqlalchemy as sql
6
7
 
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.func as func
10
+ import pixeltable.type_system as ts
11
+ from .data_row import DataRow
7
12
  from .expr import Expr
8
- from .column_ref import ColumnRef
9
13
  from .function_call import FunctionCall
10
- from .image_similarity_predicate import ImageSimilarityPredicate
11
- from .data_row import DataRow
12
14
  from .row_builder import RowBuilder
13
- import pixeltable.catalog as catalog
14
- import pixeltable.func as func
15
- import pixeltable.exceptions as excs
16
- import pixeltable.type_system as ts
17
15
 
18
16
 
19
17
  # TODO: this doesn't dig up all attrs for actual jpeg images
@@ -43,9 +41,7 @@ class ImageMemberAccess(Expr):
43
41
  attr_info = _create_pil_attr_info()
44
42
 
45
43
  def __init__(self, member_name: str, caller: Expr):
46
- if member_name == 'nearest':
47
- super().__init__(ts.InvalidType()) # requires FunctionCall to return value
48
- elif member_name in self.attr_info:
44
+ if member_name in self.attr_info:
49
45
  super().__init__(self.attr_info[member_name])
50
46
  else:
51
47
  candidates = func.FunctionRegistry.get().get_type_methods(member_name, ts.ColumnType.Type.IMAGE)
@@ -78,22 +74,8 @@ class ImageMemberAccess(Expr):
78
74
  assert len(components) == 1
79
75
  return cls(d['member_name'], components[0])
80
76
 
81
- def __call__(self, *args, **kwargs) -> Union[FunctionCall, ImageSimilarityPredicate]:
82
- caller = self._caller
83
- call_signature = f'({",".join([type(arg).__name__ for arg in args])})'
84
- if self.member_name == 'nearest':
85
- # - caller must be ColumnRef
86
- # - signature is (Union[PIL.Image.Image, str])
87
- if not isinstance(caller, ColumnRef):
88
- raise excs.Error(f'nearest(): caller must be an image column')
89
- if len(args) != 1 or (not isinstance(args[0], PIL.Image.Image) and not isinstance(args[0], str)):
90
- raise excs.Error(f'nearest(): requires a PIL.Image.Image or str, got {call_signature} instead')
91
- return ImageSimilarityPredicate(
92
- caller,
93
- img=args[0] if isinstance(args[0], PIL.Image.Image) else None,
94
- text=args[0] if isinstance(args[0], str) else None)
95
-
96
- result = self.img_method(*[caller, *args], **kwargs)
77
+ def __call__(self, *args, **kwargs) -> FunctionCall:
78
+ result = self.img_method(*[self._caller, *args], **kwargs)
97
79
  result.is_method_call = True
98
80
  return result
99
81
 
@@ -112,4 +94,3 @@ class ImageMemberAccess(Expr):
112
94
  data_row[self.slot_idx] = getattr(caller_val, self.member_name)
113
95
  except AttributeError:
114
96
  data_row[self.slot_idx] = None
115
-
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Any, Dict, Tuple, Iterable
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable.exceptions as excs
8
+ from .data_row import DataRow
9
+ from .expr import Expr
10
+ from .predicate import Predicate
11
+ from .row_builder import RowBuilder
12
+
13
+
14
+ class InPredicate(Predicate):
15
+ """Predicate corresponding to the SQL IN operator."""
16
+
17
+ def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
18
+ assert (value_set_literal is None) != (value_set_expr is None)
19
+ if not lhs.col_type.is_scalar_type():
20
+ raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
21
+ super().__init__()
22
+
23
+ self.value_list: Optional[list] = None # only contains values of the correct type
24
+ if value_set_expr is not None:
25
+ if not value_set_expr.col_type.is_json_type():
26
+ raise excs.Error(
27
+ f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
28
+ self.components = [lhs.copy(), value_set_expr.copy()]
29
+ else:
30
+ assert value_set_literal is not None
31
+ self.components = [lhs.copy()]
32
+ self.value_list = self._normalize_value_set(value_set_literal)
33
+
34
+ self.id = self._create_id()
35
+
36
+ @property
37
+ def _lhs(self) -> Expr:
38
+ return self.components[0]
39
+
40
+ @property
41
+ def _value_set_expr(self) -> Expr:
42
+ assert len(self.components) == 2
43
+ return self.components[1]
44
+
45
+ def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
46
+ if not isinstance(value_set, Iterable):
47
+ raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
48
+ value_list = list(value_set)
49
+ if not filter_type_mismatches:
50
+ return value_list
51
+
52
+ # ignore elements of the wrong type
53
+ result = []
54
+ for val in value_list:
55
+ try:
56
+ self._lhs.col_type.validate_literal(val)
57
+ result.append(val)
58
+ except TypeError:
59
+ pass
60
+ return result
61
+
62
+ def __str__(self) -> str:
63
+ if self.value_list is not None:
64
+ return f'{self.components[0]}.isin({self.value_list})'
65
+ return f'{self.components[0]}.isin({self.components[1]})'
66
+
67
+ def _equals(self, other: InPredicate) -> bool:
68
+ return self.value_list == other.value_list
69
+
70
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
71
+ return super()._id_attrs() + [('value_list', self.value_list)]
72
+
73
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
74
+ lhs_sql_exprs = self.components[0].sql_expr()
75
+ if lhs_sql_exprs is None or self.value_list is None:
76
+ return None
77
+ return lhs_sql_exprs.in_(self.value_list)
78
+
79
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
80
+ lhs_val = data_row[self._lhs.slot_idx]
81
+ if self.value_list is not None:
82
+ data_row[self.slot_idx] = lhs_val in self.value_list
83
+ else:
84
+ value_set = data_row[self._value_set_expr.slot_idx]
85
+ value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
86
+ data_row[self.slot_idx] = lhs_val in value_list
87
+
88
+ def _as_dict(self) -> Dict:
89
+ return {'value_list': self.value_list, **super()._as_dict()}
90
+
91
+ @classmethod
92
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
93
+ assert 'value_list' in d
94
+ assert len(components) <= 2
95
+ return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
96
+
@@ -1,16 +1,16 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
2
+
3
3
  import copy
4
+ from typing import Optional, List, Any, Dict, Tuple
4
5
 
5
- import sqlalchemy as sql
6
6
  import numpy as np
7
+ import sqlalchemy as sql
7
8
 
8
- from .expr import Expr
9
+ import pixeltable.type_system as ts
9
10
  from .data_row import DataRow
11
+ from .expr import Expr
10
12
  from .inline_dict import InlineDict
11
13
  from .row_builder import RowBuilder
12
- import pixeltable.catalog as catalog
13
- import pixeltable.type_system as ts
14
14
 
15
15
 
16
16
  class InlineArray(Expr):
@@ -27,8 +27,8 @@ class InlineArray(Expr):
27
27
 
28
28
  # elements contains
29
29
  # - for Expr elements: (index into components, None)
30
- # - for non-Expr elements: (-1, value)
31
- self.elements: List[Tuple[int, Any]] = []
30
+ # - for non-Expr elements: (None, value)
31
+ self.elements: List[Tuple[Optional[int], Any]] = []
32
32
  for el in elements:
33
33
  el = copy.deepcopy(el)
34
34
  if isinstance(el, list):
@@ -41,11 +41,11 @@ class InlineArray(Expr):
41
41
  self.elements.append((len(self.components), None))
42
42
  self.components.append(el)
43
43
  else:
44
- self.elements.append((-1, el))
44
+ self.elements.append((None, el))
45
45
 
46
46
  inferred_element_type = ts.InvalidType()
47
47
  for idx, val in self.elements:
48
- if idx >= 0:
48
+ if idx is not None:
49
49
  inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
50
50
  else:
51
51
  inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
@@ -83,7 +83,7 @@ class InlineArray(Expr):
83
83
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
84
84
  result = [None] * len(self.elements)
85
85
  for i, (child_idx, val) in enumerate(self.elements):
86
- if child_idx >= 0:
86
+ if child_idx is not None:
87
87
  result[i] = data_row[self.components[child_idx].slot_idx]
88
88
  else:
89
89
  result[i] = copy.deepcopy(val)
@@ -100,7 +100,9 @@ class InlineArray(Expr):
100
100
  assert 'elements' in d
101
101
  arg: List[Any] = []
102
102
  for idx, val in d['elements']:
103
- if idx >= 0:
103
+ # TODO Normalize idx -1 to None via schema migrations.
104
+ # Long-term we should not be allowing idx == -1.
105
+ if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
104
106
  arg.append(components[idx])
105
107
  else:
106
108
  arg.append(val)
@@ -1,15 +1,15 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
2
+
3
3
  import copy
4
+ from typing import Optional, List, Any, Dict, Tuple
4
5
 
5
6
  import sqlalchemy as sql
6
7
 
7
- from .expr import Expr
8
- from .data_row import DataRow
9
- from .row_builder import RowBuilder
10
8
  import pixeltable.exceptions as excs
11
- import pixeltable.catalog as catalog
12
9
  import pixeltable.type_system as ts
10
+ from .data_row import DataRow
11
+ from .expr import Expr
12
+ from .row_builder import RowBuilder
13
13
 
14
14
 
15
15
  class InlineDict(Expr):
@@ -21,8 +21,8 @@ class InlineDict(Expr):
21
21
  super().__init__(ts.JsonType()) # we need to call this in order to populate self.components
22
22
  # dict_items contains
23
23
  # - for Expr fields: (key, index into components, None)
24
- # - for non-Expr fields: (key, -1, value)
25
- self.dict_items: List[Tuple[str, int, Any]] = []
24
+ # - for non-Expr fields: (key, None, value)
25
+ self.dict_items: List[Tuple[str, Optional[int], Any]] = []
26
26
  for key, val in d.items():
27
27
  if not isinstance(key, str):
28
28
  raise excs.Error(f'Dictionary requires string keys, {key} has type {type(key)}')
@@ -35,11 +35,11 @@ class InlineDict(Expr):
35
35
  self.dict_items.append((key, len(self.components), None))
36
36
  self.components.append(val)
37
37
  else:
38
- self.dict_items.append((key, -1, val))
38
+ self.dict_items.append((key, None, val))
39
39
 
40
40
  self.type_spec: Optional[Dict[str, ts.ColumnType]] = {}
41
41
  for key, idx, _ in self.dict_items:
42
- if idx == -1:
42
+ if idx is None:
43
43
  # TODO: implement type inference for values
44
44
  self.type_spec = None
45
45
  break
@@ -56,7 +56,7 @@ class InlineDict(Expr):
56
56
  return f"'{val}'"
57
57
  return str(val)
58
58
  for key, idx, val in self.dict_items:
59
- if idx != -1:
59
+ if idx is not None:
60
60
  item_strs.append(f"'{key}': {str(self.components[i])}")
61
61
  i += 1
62
62
  else:
@@ -71,7 +71,7 @@ class InlineDict(Expr):
71
71
 
72
72
  def to_dict(self) -> Dict[str, Any]:
73
73
  """Return the original dict used to construct this"""
74
- return {key: val if idx == -1 else self.components[idx] for key, idx, val in self.dict_items}
74
+ return {key: val if idx is None else self.components[idx] for key, idx, val in self.dict_items}
75
75
 
76
76
  def sql_expr(self) -> Optional[sql.ClauseElement]:
77
77
  return None
@@ -80,7 +80,7 @@ class InlineDict(Expr):
80
80
  result = {}
81
81
  for key, idx, val in self.dict_items:
82
82
  assert isinstance(key, str)
83
- if idx >= 0:
83
+ if idx is not None:
84
84
  result[key] = data_row[self.components[idx].slot_idx]
85
85
  else:
86
86
  result[key] = copy.deepcopy(val)
@@ -94,7 +94,9 @@ class InlineDict(Expr):
94
94
  assert 'dict_items' in d
95
95
  arg: Dict[str, Any] = {}
96
96
  for key, idx, val in d['dict_items']:
97
- if idx >= 0:
97
+ # TODO Normalize idx -1 to None via schema migrations.
98
+ # Long-term we should not be allowing idx == -1.
99
+ if idx is not None and idx >= 0: # Older schemas might have -1 instead of None
98
100
  arg[key] = components[idx]
99
101
  else:
100
102
  arg[key] = val
@@ -1,13 +1,16 @@
1
1
  from __future__ import annotations
2
+
3
+ import datetime
2
4
  from typing import Optional, List, Any, Dict, Tuple
3
5
 
4
6
  import sqlalchemy as sql
5
7
 
6
- from .expr import Expr
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.type_system as ts
7
10
  from .data_row import DataRow
11
+ from .expr import Expr
8
12
  from .row_builder import RowBuilder
9
- import pixeltable.catalog as catalog
10
- import pixeltable.type_system as ts
13
+
11
14
 
12
15
  class Literal(Expr):
13
16
  def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
@@ -46,9 +49,18 @@ class Literal(Expr):
46
49
  data_row[self.slot_idx] = self.val
47
50
 
48
51
  def _as_dict(self) -> Dict:
49
- return {'val': self.val, **super()._as_dict()}
52
+ # For some types, we need to explictly record their type, because JSON does not know
53
+ # how to interpret them unambiguously
54
+ if self.col_type.is_timestamp_type():
55
+ return {'val': self.val.isoformat(), 'val_t': self.col_type._type.name, **super()._as_dict()}
56
+ else:
57
+ return {'val': self.val, **super()._as_dict()}
50
58
 
51
59
  @classmethod
52
60
  def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
53
61
  assert 'val' in d
62
+ if 'val_t' in d:
63
+ val_t = d['val_t']
64
+ assert val_t == ts.ColumnType.Type.TIMESTAMP.name
65
+ return cls(datetime.datetime.fromisoformat(d['val']))
54
66
  return cls(d['val'])
@@ -54,14 +54,14 @@ class RowBuilder:
54
54
  target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
55
55
 
56
56
  def __init__(
57
- self, output_exprs: List[Expr], columns: List[catalog.Column],
58
- indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
57
+ self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
59
58
  ):
60
59
  """
61
60
  Args:
62
61
  output_exprs: list of Exprs to be evaluated
63
62
  columns: list of columns to be materialized
64
- indices: list of embeddings to be materialized (Tuple[indexed column, embedding function])
63
+ input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
64
+ TODO: enforce that output_exprs doesn't overlap with input_exprs?
65
65
  """
66
66
  self.unique_exprs = ExprSet() # dependencies precede their dependents
67
67
  self.next_slot_idx = 0
@@ -73,7 +73,6 @@ class RowBuilder:
73
73
  # output exprs: all exprs the caller wants to materialize
74
74
  # - explicitly requested output_exprs
75
75
  # - values for computed columns
76
- # - embedding values for indices
77
76
  resolve_cols = set(columns)
78
77
  self.output_exprs = [
79
78
  self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
@@ -97,21 +96,6 @@ class RowBuilder:
97
96
  ref = self._record_unique_expr(ref, recursive=False)
98
97
  self.add_table_column(col, ref.slot_idx)
99
98
 
100
- # record indices; indexed by slot_idx
101
- self.index_columns: List[catalog.Column] = []
102
- for col, embedding_fn in indices:
103
- # we assume that the parameter of the embedding function is a ref to an image column
104
- assert col.col_type.is_image_type()
105
- # construct expr to compute embedding; explicitly resize images to the required size
106
- target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
107
- expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
108
- expr = self._record_unique_expr(expr, recursive=True)
109
- self.output_exprs.append(expr)
110
- if len(self.index_columns) <= expr.slot_idx:
111
- # pad to slot_idx
112
- self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
113
- self.index_columns[expr.slot_idx] = col
114
-
115
99
  # default eval ctx: all output exprs
116
100
  self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
117
101
 
@@ -170,13 +154,6 @@ class RowBuilder:
170
154
  """Return ColumnSlotIdx for output columns"""
171
155
  return self.table_columns
172
156
 
173
- def index_slot_idxs(self) -> List[ColumnSlotIdx]:
174
- """Return ColumnSlotIdx for index columns"""
175
- return [
176
- ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
177
- if self.output_columns[i] is not None
178
- ]
179
-
180
157
  @property
181
158
  def num_materialized(self) -> int:
182
159
  return self.next_slot_idx
@@ -204,12 +181,16 @@ class RowBuilder:
204
181
  for i, c in enumerate(expr.components):
205
182
  # make sure we only refer to components that have themselves been recorded
206
183
  expr.components[i] = self._record_unique_expr(c, True)
207
- assert expr.slot_idx < 0
184
+ assert expr.slot_idx is None
208
185
  expr.slot_idx = self._next_slot_idx()
209
186
  self.unique_exprs.append(expr)
210
187
  return expr
211
188
 
212
189
  def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
190
+ assert e.slot_idx is not None
191
+ assert output_expr_id is not None
192
+ if e.slot_idx in self.input_expr_slot_idxs:
193
+ return
213
194
  self.output_expr_ids[e.slot_idx].add(output_expr_id)
214
195
  for d in e.dependencies():
215
196
  self._record_output_expr_id(d, output_expr_id)
@@ -334,22 +315,15 @@ class RowBuilder:
334
315
  exc = data_row.get_exc(slot_idx)
335
316
  num_excs += 1
336
317
  exc_col_ids.add(col.id)
337
- table_row[col.storage_name()] = None
338
- table_row[col.errortype_storage_name()] = type(exc).__name__
339
- table_row[col.errormsg_storage_name()] = str(exc)
318
+ table_row[col.store_name()] = None
319
+ table_row[col.errortype_store_name()] = type(exc).__name__
320
+ table_row[col.errormsg_store_name()] = str(exc)
340
321
  else:
341
- val = data_row.get_stored_val(slot_idx)
342
- table_row[col.storage_name()] = val
322
+ val = data_row.get_stored_val(slot_idx, col.sa_col.type)
323
+ table_row[col.store_name()] = val
343
324
  # we unfortunately need to set these, even if there are no errors
344
- table_row[col.errortype_storage_name()] = None
345
- table_row[col.errormsg_storage_name()] = None
346
-
347
- for slot_idx, col in enumerate(self.index_columns):
348
- if col is None:
349
- continue
350
- # don't use get_stored_val() here, we need to pass in the ndarray
351
- val = data_row[slot_idx]
352
- table_row[col.index_storage_name()] = val
325
+ table_row[col.errortype_store_name()] = None
326
+ table_row[col.errormsg_store_name()] = None
353
327
 
354
328
  return table_row, num_excs
355
329
 
@@ -0,0 +1,65 @@
1
+ from typing import Optional, List
2
+
3
+ import sqlalchemy as sql
4
+ import PIL.Image
5
+
6
+ import pixeltable.exceptions as excs
7
+ import pixeltable.type_system as ts
8
+ from .column_ref import ColumnRef
9
+ from .data_row import DataRow
10
+ from .expr import Expr
11
+ from .literal import Literal
12
+ from .row_builder import RowBuilder
13
+
14
+
15
+ class SimilarityExpr(Expr):
16
+
17
+ def __init__(self, col_ref: ColumnRef, item: Expr):
18
+ super().__init__(ts.FloatType())
19
+ self.components = [col_ref, item]
20
+ self.id = self._create_id()
21
+ assert isinstance(item, Literal)
22
+ assert item.col_type.is_string_type() or item.col_type.is_image_type()
23
+
24
+ # determine index to use
25
+ idx_info = col_ref.col.get_idx_info()
26
+ if len(idx_info) == 0:
27
+ raise excs.Error(f'No index found for column {col_ref.col}')
28
+ if len(idx_info) > 1:
29
+ raise excs.Error(
30
+ f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
31
+ f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
32
+ self.idx_info = next(iter(idx_info.values()))
33
+ idx = self.idx_info.idx
34
+
35
+ if item.col_type.is_string_type() and idx.txt_embed is None:
36
+ raise excs.Error(
37
+ f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
38
+ f'text_embed parameter and does not support text queries')
39
+ if item.col_type.is_image_type() and idx.img_embed is None:
40
+ raise excs.Error(
41
+ f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
42
+ f'img_embed parameter and does not support image queries')
43
+
44
+ def __str__(self) -> str:
45
+ return f'{self.components[0]}.similarity({self.components[1]})'
46
+
47
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
48
+ assert isinstance(self.components[1], Literal)
49
+ item = self.components[1].val
50
+ return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
51
+
52
+ def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
53
+ assert isinstance(self.components[1], Literal)
54
+ item = self.components[1].val
55
+ return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
56
+
57
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
58
+ # this should never get called
59
+ assert False
60
+
61
+ @classmethod
62
+ def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
63
+ assert len(components) == 2
64
+ assert isinstance(components[0], ColumnRef)
65
+ return cls(components[0], components[1])
@@ -0,0 +1,5 @@
1
+ """
2
+ Extended integrations for Pixeltable. This package contains experimental or demonstration features that
3
+ are not intended for production use. Long-term support cannot be guaranteed, usually because the features
4
+ have dependencies whose future support is unclear.
5
+ """