pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/column.py +41 -29
  5. pixeltable/catalog/globals.py +18 -0
  6. pixeltable/catalog/insertable_table.py +30 -10
  7. pixeltable/catalog/table.py +198 -86
  8. pixeltable/catalog/table_version.py +47 -53
  9. pixeltable/catalog/table_version_path.py +2 -2
  10. pixeltable/catalog/view.py +17 -18
  11. pixeltable/dataframe.py +27 -36
  12. pixeltable/env.py +7 -0
  13. pixeltable/exec/__init__.py +0 -1
  14. pixeltable/exec/aggregation_node.py +6 -3
  15. pixeltable/exec/cache_prefetch_node.py +189 -43
  16. pixeltable/exec/data_row_batch.py +5 -22
  17. pixeltable/exec/exec_context.py +2 -2
  18. pixeltable/exec/exec_node.py +3 -2
  19. pixeltable/exec/expr_eval_node.py +23 -16
  20. pixeltable/exec/in_memory_data_node.py +6 -3
  21. pixeltable/exec/sql_node.py +24 -25
  22. pixeltable/exprs/arithmetic_expr.py +12 -5
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +97 -14
  26. pixeltable/exprs/comparison.py +10 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +27 -18
  29. pixeltable/exprs/expr.py +53 -52
  30. pixeltable/exprs/expr_set.py +5 -0
  31. pixeltable/exprs/function_call.py +32 -16
  32. pixeltable/exprs/globals.py +4 -1
  33. pixeltable/exprs/in_predicate.py +8 -7
  34. pixeltable/exprs/inline_expr.py +4 -4
  35. pixeltable/exprs/is_null.py +4 -4
  36. pixeltable/exprs/json_mapper.py +11 -12
  37. pixeltable/exprs/json_path.py +6 -11
  38. pixeltable/exprs/literal.py +5 -5
  39. pixeltable/exprs/method_ref.py +5 -4
  40. pixeltable/exprs/object_ref.py +2 -1
  41. pixeltable/exprs/row_builder.py +88 -36
  42. pixeltable/exprs/rowid_ref.py +12 -11
  43. pixeltable/exprs/similarity_expr.py +12 -7
  44. pixeltable/exprs/sql_element_cache.py +7 -5
  45. pixeltable/exprs/type_cast.py +8 -6
  46. pixeltable/exprs/variable.py +5 -4
  47. pixeltable/func/aggregate_function.py +9 -9
  48. pixeltable/func/expr_template_function.py +6 -5
  49. pixeltable/func/function.py +11 -10
  50. pixeltable/func/udf.py +6 -11
  51. pixeltable/functions/__init__.py +2 -2
  52. pixeltable/functions/globals.py +5 -7
  53. pixeltable/functions/huggingface.py +155 -45
  54. pixeltable/functions/llama_cpp.py +107 -0
  55. pixeltable/functions/mistralai.py +1 -1
  56. pixeltable/functions/ollama.py +147 -0
  57. pixeltable/functions/openai.py +1 -1
  58. pixeltable/functions/replicate.py +72 -0
  59. pixeltable/functions/string.py +9 -0
  60. pixeltable/functions/together.py +1 -1
  61. pixeltable/functions/util.py +5 -2
  62. pixeltable/globals.py +67 -26
  63. pixeltable/index/btree.py +16 -3
  64. pixeltable/index/embedding_index.py +4 -4
  65. pixeltable/io/__init__.py +1 -2
  66. pixeltable/io/fiftyone.py +178 -0
  67. pixeltable/io/globals.py +96 -2
  68. pixeltable/iterators/base.py +3 -2
  69. pixeltable/iterators/document.py +1 -1
  70. pixeltable/iterators/video.py +120 -63
  71. pixeltable/metadata/__init__.py +1 -1
  72. pixeltable/metadata/converters/convert_21.py +34 -0
  73. pixeltable/metadata/converters/util.py +45 -4
  74. pixeltable/metadata/notes.py +1 -0
  75. pixeltable/metadata/schema.py +8 -0
  76. pixeltable/plan.py +17 -15
  77. pixeltable/py.typed +0 -0
  78. pixeltable/store.py +7 -2
  79. pixeltable/tool/create_test_db_dump.py +1 -1
  80. pixeltable/tool/create_test_video.py +1 -1
  81. pixeltable/tool/embed_udf.py +1 -1
  82. pixeltable/tool/mypy_plugin.py +28 -5
  83. pixeltable/type_system.py +100 -36
  84. pixeltable/utils/coco.py +5 -5
  85. pixeltable/utils/documents.py +15 -1
  86. pixeltable/utils/formatter.py +12 -13
  87. pixeltable/utils/s3.py +6 -3
  88. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
  89. pixeltable-0.2.23.dist-info/RECORD +153 -0
  90. pixeltable/exec/media_validation_node.py +0 -43
  91. pixeltable-0.2.21.dist-info/RECORD +0 -148
  92. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
  93. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
  94. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
@@ -6,6 +6,7 @@ import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
8
  import pixeltable.type_system as ts
9
+
9
10
  from .data_row import DataRow
10
11
  from .expr import Expr
11
12
  from .globals import ArithmeticOperator
@@ -68,11 +69,15 @@ class ArithmeticExpr(Expr):
68
69
  return left * right
69
70
  if self.operator == ArithmeticOperator.DIV:
70
71
  assert self.col_type.is_float_type()
72
+ # Avoid DivisionByZero: if right is 0, make this a NULL
73
+ # TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
74
+ nullif = sql.sql.func.nullif(right, 0)
71
75
  # We have to cast to a `float`, or else we'll get a `Decimal`
72
- return sql.sql.expression.cast(left / right, sql.Float)
76
+ return sql.sql.expression.cast(left / nullif, sql.Float)
73
77
  if self.operator == ArithmeticOperator.MOD:
74
78
  if self.col_type.is_int_type():
75
- return left % right
79
+ nullif = sql.sql.func.nullif(right, 0)
80
+ return left % nullif
76
81
  if self.col_type.is_float_type():
77
82
  # Postgres does not support modulus for floats
78
83
  return None
@@ -82,10 +87,12 @@ class ArithmeticExpr(Expr):
82
87
  # We need the behavior to be consistent, so that expressions will evaluate the same way
83
88
  # whether or not their operands can be translated to SQL. These SQL clauses should
84
89
  # mimic the behavior of Python's // operator.
90
+ nullif = sql.sql.func.nullif(right, 0)
85
91
  if self.col_type.is_int_type():
86
- return sql.sql.expression.cast(sql.func.floor(left / right), sql.Integer)
92
+ return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Integer)
87
93
  if self.col_type.is_float_type():
88
- return sql.sql.expression.cast(sql.func.floor(left / right), sql.Float)
94
+ return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Float)
95
+ assert False
89
96
 
90
97
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
91
98
  op1_val = data_row[self._op1.slot_idx]
@@ -121,7 +128,7 @@ class ArithmeticExpr(Expr):
121
128
  return {'operator': self.operator.value, **super()._as_dict()}
122
129
 
123
130
  @classmethod
124
- def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
131
+ def _from_dict(cls, d: dict, components: list[Expr]) -> ArithmeticExpr:
125
132
  assert 'operator' in d
126
133
  assert len(components) == 2
127
134
  return cls(ArithmeticOperator(d['operator']), components[0], components[1])
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Dict, List, Optional, Tuple
3
+ from typing import Any, Optional, Union
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -15,7 +15,7 @@ class ArraySlice(Expr):
15
15
  """
16
16
  Slice operation on an array, eg, t.array_col[:, 1:2].
17
17
  """
18
- def __init__(self, arr: Expr, index: Tuple):
18
+ def __init__(self, arr: Expr, index: tuple[Union[int, slice], ...]):
19
19
  assert arr.col_type.is_array_type()
20
20
  # determine result type
21
21
  super().__init__(arr.col_type)
@@ -24,7 +24,7 @@ class ArraySlice(Expr):
24
24
  self.id = self._create_id()
25
25
 
26
26
  def __str__(self) -> str:
27
- index_strs: List[str] = []
27
+ index_strs: list[str] = []
28
28
  for el in self.index:
29
29
  if isinstance(el, int):
30
30
  index_strs.append(str(el))
@@ -39,7 +39,7 @@ class ArraySlice(Expr):
39
39
  def _equals(self, other: ArraySlice) -> bool:
40
40
  return self.index == other.index
41
41
 
42
- def _id_attrs(self) -> List[Tuple[str, Any]]:
42
+ def _id_attrs(self) -> list[tuple[str, Any]]:
43
43
  return super()._id_attrs() + [('index', self.index)]
44
44
 
45
45
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
@@ -49,8 +49,8 @@ class ArraySlice(Expr):
49
49
  val = data_row[self._array.slot_idx]
50
50
  data_row[self.slot_idx] = val[self.index]
51
51
 
52
- def _as_dict(self) -> Dict:
53
- index = []
52
+ def _as_dict(self) -> dict:
53
+ index: list[Any] = []
54
54
  for el in self.index:
55
55
  if isinstance(el, slice):
56
56
  index.append([el.start, el.stop, el.step])
@@ -59,7 +59,7 @@ class ArraySlice(Expr):
59
59
  return {'index': index, **super()._as_dict()}
60
60
 
61
61
  @classmethod
62
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
62
+ def _from_dict(cls, d: dict, components: list[Expr]) -> ArraySlice:
63
63
  assert 'index' in d
64
64
  index = []
65
65
  for el in d['index']:
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from typing import Optional, List, Any, Dict, Tuple
4
+ from typing import Any, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable.type_system as ts
9
+ from pixeltable import catalog
9
10
  from .column_ref import ColumnRef
10
11
  from .data_row import DataRow
11
12
  from .expr import Expr
@@ -33,22 +34,36 @@ class ColumnPropertyRef(Expr):
33
34
  def default_column_name(self) -> Optional[str]:
34
35
  return str(self).replace('.', '_')
35
36
 
36
- def _equals(self, other: ColumnRef) -> bool:
37
+ def _equals(self, other: ColumnPropertyRef) -> bool:
37
38
  return self.prop == other.prop
38
39
 
39
- def _id_attrs(self) -> List[Tuple[str, Any]]:
40
+ def _id_attrs(self) -> list[tuple[str, Any]]:
40
41
  return super()._id_attrs() + [('prop', self.prop.value)]
41
42
 
42
43
  @property
43
44
  def _col_ref(self) -> ColumnRef:
44
- return self.components[0]
45
+ col_ref = self.components[0]
46
+ assert isinstance(col_ref, ColumnRef)
47
+ return col_ref
45
48
 
46
49
  def __str__(self) -> str:
47
50
  return f'{self._col_ref}.{self.prop.name.lower()}'
48
51
 
52
+ def is_error_prop(self) -> bool:
53
+ return self.prop == self.Property.ERRORTYPE or self.prop == self.Property.ERRORMSG
54
+
49
55
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
50
56
  if not self._col_ref.col.is_stored:
51
57
  return None
58
+
59
+ # the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
60
+ if (
61
+ self._col_ref.col.col_type.is_media_type()
62
+ and self._col_ref.col.media_validation == catalog.MediaValidation.ON_READ
63
+ and self.is_error_prop()
64
+ ):
65
+ return None
66
+
52
67
  if self.prop == self.Property.ERRORTYPE:
53
68
  assert self._col_ref.col.sa_errortype_col is not None
54
69
  return self._col_ref.col.sa_errortype_col
@@ -61,18 +76,30 @@ class ColumnPropertyRef(Expr):
61
76
  return None
62
77
 
63
78
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
64
- assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
65
- assert data_row.has_val[self._col_ref.slot_idx]
66
79
  if self.prop == self.Property.FILEURL:
80
+ assert data_row.has_val[self._col_ref.slot_idx]
67
81
  data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
68
- if self.prop == self.Property.LOCALPATH:
82
+ return
83
+ elif self.prop == self.Property.LOCALPATH:
84
+ assert data_row.has_val[self._col_ref.slot_idx]
69
85
  data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
70
-
71
- def _as_dict(self) -> Dict:
86
+ return
87
+ elif self.is_error_prop():
88
+ exc = data_row.get_exc(self._col_ref.slot_idx)
89
+ if exc is None:
90
+ data_row[self.slot_idx] = None
91
+ elif self.prop == self.Property.ERRORTYPE:
92
+ data_row[self.slot_idx] = type(exc).__name__
93
+ else:
94
+ data_row[self.slot_idx] = str(exc)
95
+ else:
96
+ assert False
97
+
98
+ def _as_dict(self) -> dict:
72
99
  return {'prop': self.prop.value, **super()._as_dict()}
73
100
 
74
101
  @classmethod
75
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
102
+ def _from_dict(cls, d: dict, components: list[Expr]) -> ColumnPropertyRef:
76
103
  assert 'prop' in d
77
104
  assert isinstance(components[0], ColumnRef)
78
105
  return cls(components[0], cls.Property(d['prop']))
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, Any, Tuple
2
+
3
+ from typing import Any, Optional, Sequence
3
4
  from uuid import UUID
4
5
 
5
6
  import sqlalchemy as sql
6
7
 
7
- from .expr import Expr
8
+ import pixeltable.catalog as catalog
9
+ import pixeltable.exceptions as excs
10
+ import pixeltable.iterators as iters
11
+
8
12
  from .data_row import DataRow
13
+ from .expr import Expr
9
14
  from .row_builder import RowBuilder
10
15
  from .sql_element_cache import SqlElementCache
11
- import pixeltable.iterators as iters
12
- import pixeltable.exceptions as excs
13
- import pixeltable.catalog as catalog
14
16
 
15
17
 
16
18
  class ColumnRef(Expr):
@@ -19,18 +21,31 @@ class ColumnRef(Expr):
19
21
  When this reference is created in the context of a view, it can also refer to a column of the view base.
20
22
  For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
21
23
  unique in the context of a particular table).
24
+
25
+ Media validation:
26
+ - media validation is potentially cpu-intensive, and it's desirable to schedule and parallelize it during
27
+ general expr evaluation
28
+ - media validation on read is done in ColumnRef.eval()
29
+ - a validating ColumnRef cannot be translated to SQL (because the validation is done in Python)
30
+ - in that case, the ColumnRef also instantiates a second non-validating ColumnRef as a component (= dependency)
31
+ - the non-validating ColumnRef is used for SQL translation
32
+
33
+ TODO:
34
+ separate Exprs (like validating ColumnRefs) from the logical expression tree and instead have RowBuilder
35
+ insert them into the EvalCtxs as needed
22
36
  """
23
37
 
24
38
  col: catalog.Column
25
39
  is_unstored_iter_col: bool
26
40
  iter_arg_ctx: Optional[RowBuilder.EvalCtx]
27
41
  base_rowid_len: int
28
- base_rowid: list[Optional[Any]]
42
+ base_rowid: Sequence[Optional[Any]]
29
43
  iterator: Optional[iters.ComponentIterator]
30
44
  pos_idx: Optional[int]
31
45
  id: int
46
+ perform_validation: bool # if True, performs media validation
32
47
 
33
- def __init__(self, col: catalog.Column):
48
+ def __init__(self, col: catalog.Column, perform_validation: Optional[bool] = None):
34
49
  super().__init__(col.col_type)
35
50
  assert col.tbl is not None
36
51
  self.col = col
@@ -43,17 +58,44 @@ class ColumnRef(Expr):
43
58
  self.iterator = None
44
59
  # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
45
60
  self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
61
+
62
+ self.perform_validation = False
63
+ if col.col_type.is_media_type():
64
+ # we perform media validation if the column is a media type and the validation is set to ON_READ,
65
+ # unless we're told not to
66
+ if perform_validation is not None:
67
+ self.perform_validation = perform_validation
68
+ else:
69
+ self.perform_validation = (
70
+ col.col_type.is_media_type() and col.media_validation == catalog.MediaValidation.ON_READ
71
+ )
72
+ else:
73
+ assert perform_validation is None or not perform_validation
74
+ if self.perform_validation:
75
+ non_validating_col_ref = ColumnRef(col, perform_validation=False)
76
+ self.components = [non_validating_col_ref]
46
77
  self.id = self._create_id()
47
78
 
48
79
  def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
49
80
  self.iter_arg_ctx = iter_arg_ctx
50
81
  assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
51
82
 
52
- def _id_attrs(self) -> list[Tuple[str, Any]]:
53
- return super()._id_attrs() + [('tbl_id', self.col.tbl.id), ('col_id', self.col.id)]
83
+ def _id_attrs(self) -> list[tuple[str, Any]]:
84
+ return (
85
+ super()._id_attrs()
86
+ + [('tbl_id', self.col.tbl.id), ('col_id', self.col.id), ('perform_validation', self.perform_validation)]
87
+ )
88
+
89
+ # override
90
+ def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
91
+ target = tbl_versions[self.col.tbl.id]
92
+ assert self.col.id in target.cols_by_id
93
+ col = target.cols_by_id[self.col.id]
94
+ return ColumnRef(col)
54
95
 
55
96
  def __getattr__(self, name: str) -> Expr:
56
97
  from .column_property_ref import ColumnPropertyRef
98
+
57
99
  # resolve column properties
58
100
  if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
59
101
  or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
@@ -82,7 +124,7 @@ class ColumnRef(Expr):
82
124
  return str(self)
83
125
 
84
126
  def _equals(self, other: ColumnRef) -> bool:
85
- return self.col == other.col
127
+ return self.col == other.col and self.perform_validation == other.perform_validation
86
128
 
87
129
  def __str__(self) -> str:
88
130
  if self.col.name is None:
@@ -93,10 +135,43 @@ class ColumnRef(Expr):
93
135
  def __repr__(self) -> str:
94
136
  return f'ColumnRef({self.col!r})'
95
137
 
138
+ def _repr_html_(self) -> str:
139
+ tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
140
+ return tbl._description_html(cols=[self.col])._repr_html_() # type: ignore[attr-defined]
141
+
96
142
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
97
- return self.col.sa_col
143
+ return None if self.perform_validation else self.col.sa_col
98
144
 
99
145
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
146
+ if self.perform_validation:
147
+ # validate media file of our input ColumnRef and if successful, replicate the state of that slot
148
+ # to our slot
149
+ unvalidated_slot_idx = self.components[0].slot_idx
150
+ if data_row.file_paths[unvalidated_slot_idx] is None:
151
+ # no media file to validate, we still need to replicate the value
152
+ assert data_row.file_urls[unvalidated_slot_idx] is None
153
+ val = data_row.vals[unvalidated_slot_idx]
154
+ data_row.vals[self.slot_idx] = val
155
+ data_row.has_val[self.slot_idx] = True
156
+ return
157
+
158
+ try:
159
+ self.col.col_type.validate_media(data_row.file_paths[unvalidated_slot_idx])
160
+ # access the value only after successful validation
161
+ val = data_row[unvalidated_slot_idx]
162
+ data_row.vals[self.slot_idx] = val
163
+ data_row.has_val[self.slot_idx] = True
164
+ # make sure that the validated slot points to the same file as the unvalidated slot
165
+ data_row.file_paths[self.slot_idx] = data_row.file_paths[unvalidated_slot_idx]
166
+ data_row.file_urls[self.slot_idx] = data_row.file_urls[unvalidated_slot_idx]
167
+ return
168
+ except excs.Error as exc:
169
+ # propagate the exception, but ignore it otherwise;
170
+ # media validation errors don't cause exceptions during query execution
171
+ # TODO: allow for different error-handling behavior
172
+ row_builder.set_exc(data_row, self.slot_idx, exc)
173
+ return
174
+
100
175
  if not self.is_unstored_iter_col:
101
176
  # supply default
102
177
  data_row[self.slot_idx] = None
@@ -115,7 +190,14 @@ class ColumnRef(Expr):
115
190
  def _as_dict(self) -> dict:
116
191
  tbl = self.col.tbl
117
192
  version = tbl.version if tbl.is_snapshot else None
118
- return {'tbl_id': str(tbl.id), 'tbl_version': version, 'col_id': self.col.id}
193
+ # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
194
+ # non-validating component ColumnRef
195
+ return {
196
+ 'tbl_id': str(tbl.id),
197
+ 'tbl_version': version,
198
+ 'col_id': self.col.id,
199
+ 'perform_validation': self.perform_validation
200
+ }
119
201
 
120
202
  @classmethod
121
203
  def get_column(cls, d: dict) -> catalog.Column:
@@ -126,6 +208,7 @@ class ColumnRef(Expr):
126
208
  return col
127
209
 
128
210
  @classmethod
129
- def _from_dict(cls, d: dict, _: list[Expr]) -> Expr:
211
+ def _from_dict(cls, d: dict, _: list[Expr]) -> ColumnRef:
130
212
  col = cls.get_column(d)
131
- return cls(col)
213
+ perform_validation = d['perform_validation']
214
+ return cls(col, perform_validation=perform_validation)
@@ -1,12 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, List, Any, Dict
3
+ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
8
  import pixeltable.index as index
9
9
  import pixeltable.type_system as ts
10
+
10
11
  from .column_ref import ColumnRef
11
12
  from .data_row import DataRow
12
13
  from .expr import Expr
@@ -65,7 +66,12 @@ class Comparison(Expr):
65
66
  def _op2(self) -> Expr:
66
67
  return self.components[1]
67
68
 
68
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
69
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
70
+ if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
71
+ # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
72
+ # TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
73
+ return None
74
+
69
75
  left = sql_elements.get(self._op1)
70
76
  if self.is_search_arg_comparison:
71
77
  # reference the index value column if there is an index and this is not a snapshot
@@ -113,11 +119,10 @@ class Comparison(Expr):
113
119
  elif self.operator == ComparisonOperator.GE:
114
120
  data_row[self.slot_idx] = left >= right
115
121
 
116
- def _as_dict(self) -> Dict:
122
+ def _as_dict(self) -> dict:
117
123
  return {'operator': self.operator.value, **super()._as_dict()}
118
124
 
119
125
  @classmethod
120
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
126
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Comparison:
121
127
  assert 'operator' in d
122
128
  return cls(ComparisonOperator(d['operator']), components[0], components[1])
123
-
@@ -1,20 +1,21 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import operator
4
- from typing import Optional, List, Any, Dict, Callable
4
+ from typing import Any, Callable, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
+ import pixeltable.type_system as ts
9
+
8
10
  from .data_row import DataRow
9
11
  from .expr import Expr
10
12
  from .globals import LogicalOperator
11
13
  from .row_builder import RowBuilder
12
14
  from .sql_element_cache import SqlElementCache
13
- import pixeltable.type_system as ts
14
15
 
15
16
 
16
17
  class CompoundPredicate(Expr):
17
- def __init__(self, operator: LogicalOperator, operands: List[Expr]):
18
+ def __init__(self, operator: LogicalOperator, operands: list[Expr]):
18
19
  super().__init__(ts.BoolType())
19
20
  self.operator = operator
20
21
  # operands are stored in self.components
@@ -23,7 +24,7 @@ class CompoundPredicate(Expr):
23
24
  self.components = operands
24
25
  else:
25
26
  assert len(operands) > 1
26
- self.operands: List[Expr] = []
27
+ self.operands: list[Expr] = []
27
28
  for operand in operands:
28
29
  self._merge_operand(operand)
29
30
 
@@ -35,7 +36,7 @@ class CompoundPredicate(Expr):
35
36
  return f' {self.operator} '.join([f'({e})' for e in self.components])
36
37
 
37
38
  @classmethod
38
- def make_conjunction(cls, operands: List[Expr]) -> Optional[Expr]:
39
+ def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
39
40
  if len(operands) == 0:
40
41
  return None
41
42
  if len(operands) == 1:
@@ -89,11 +90,11 @@ class CompoundPredicate(Expr):
89
90
  val = op_function(val, data_row[op.slot_idx])
90
91
  data_row[self.slot_idx] = val
91
92
 
92
- def _as_dict(self) -> Dict:
93
+ def _as_dict(self) -> dict:
93
94
  return {'operator': self.operator.value, **super()._as_dict()}
94
95
 
95
96
  @classmethod
96
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
97
+ def _from_dict(cls, d: dict, components: list[Expr]) -> CompoundPredicate:
97
98
  assert 'operator' in d
98
99
  return cls(LogicalOperator(d['operator']), components)
99
100
 
@@ -4,13 +4,13 @@ import datetime
4
4
  import io
5
5
  import urllib.parse
6
6
  import urllib.request
7
- from typing import Optional, List, Any, Tuple
7
+ from typing import Any, Optional
8
8
 
9
- import sqlalchemy as sql
10
- import pgvector.sqlalchemy
9
+ import numpy as np
10
+ import pgvector.sqlalchemy # type: ignore[import-untyped]
11
11
  import PIL
12
12
  import PIL.Image
13
- import numpy as np
13
+ import sqlalchemy as sql
14
14
 
15
15
  from pixeltable import env
16
16
 
@@ -57,7 +57,7 @@ class DataRow:
57
57
  # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
58
58
  file_paths: list[Optional[str]]
59
59
 
60
- def __init__(self, size: int, img_slot_idxs: List[int], media_slot_idxs: List[int], array_slot_idxs: List[int]):
60
+ def __init__(self, size: int, img_slot_idxs: list[int], media_slot_idxs: list[int], array_slot_idxs: list[int]):
61
61
  self.vals = [None] * size
62
62
  self.has_val = [False] * size
63
63
  self.excs = [None] * size
@@ -89,27 +89,35 @@ class DataRow:
89
89
  target.file_urls = self.file_urls.copy()
90
90
  target.file_paths = self.file_paths.copy()
91
91
 
92
- def set_pk(self, pk: Tuple[int, ...]) -> None:
92
+ def set_pk(self, pk: tuple[int, ...]) -> None:
93
93
  self.pk = pk
94
94
 
95
- def has_exc(self, slot_idx: int) -> bool:
96
- return self.excs[slot_idx] is not None
95
+ def has_exc(self, slot_idx: Optional[int] = None) -> bool:
96
+ """
97
+ Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
98
+ """
99
+ if slot_idx is not None:
100
+ return self.excs[slot_idx] is not None
101
+ return any(exc is not None for exc in self.excs)
97
102
 
98
- def get_exc(self, slot_idx: int) -> Exception:
99
- assert self.has_val[slot_idx] is False
100
- assert self.excs[slot_idx] is not None
103
+ def get_exc(self, slot_idx: int) -> Optional[Exception]:
101
104
  return self.excs[slot_idx]
102
105
 
106
+ def get_first_exc(self) -> Optional[Exception]:
107
+ for exc in self.excs:
108
+ if exc is not None:
109
+ return exc
110
+ return None
111
+
103
112
  def set_exc(self, slot_idx: int, exc: Exception) -> None:
104
113
  assert self.excs[slot_idx] is None
105
114
  self.excs[slot_idx] = exc
106
115
 
107
- if self.has_val[slot_idx]:
108
- # eg. during validation, where contents of file is found invalid
109
- self.has_val[slot_idx] = False
110
- self.vals[slot_idx] = None
111
- self.file_paths[slot_idx] = None
112
- self.file_urls[slot_idx] = None
116
+ # an exception means the value is None
117
+ self.has_val[slot_idx] = True
118
+ self.vals[slot_idx] = None
119
+ self.file_paths[slot_idx] = None
120
+ self.file_urls[slot_idx] = None
113
121
 
114
122
  def __len__(self) -> int:
115
123
  return len(self.vals)
@@ -124,6 +132,7 @@ class DataRow:
124
132
 
125
133
  if self.file_urls[index] is not None and index in self.img_slot_idxs:
126
134
  # if we need to load this from a file, it should have been materialized locally
135
+ # TODO this fails if the url was instantiated dynamically using astype()
127
136
  assert self.file_paths[index] is not None
128
137
  if self.vals[index] is None:
129
138
  self.vals[index] = PIL.Image.open(self.file_paths[index])
@@ -231,7 +240,7 @@ class DataRow:
231
240
  self.vals[index] = None
232
241
 
233
242
  @property
234
- def rowid(self) -> Tuple[int]:
243
+ def rowid(self) -> tuple[int, ...]:
235
244
  return self.pk[:-1]
236
245
 
237
246
  @property