pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- import copy
4
- from typing import Any, Optional, Sequence
3
+ from typing import TYPE_CHECKING, Any, Sequence, cast
5
4
  from uuid import UUID
6
5
 
7
6
  import sqlalchemy as sql
8
7
 
9
- import pixeltable as pxt
10
- from pixeltable import catalog, exceptions as excs, iterators as iters
8
+ import pixeltable.catalog as catalog
9
+ import pixeltable.exceptions as excs
10
+ import pixeltable.iterators as iters
11
11
 
12
12
  from ..utils.description_helper import DescriptionHelper
13
13
  from ..utils.filecache import FileCache
@@ -16,6 +16,9 @@ from .expr import Expr
16
16
  from .row_builder import RowBuilder
17
17
  from .sql_element_cache import SqlElementCache
18
18
 
19
+ if TYPE_CHECKING:
20
+ from pixeltable.dataframe import DataFrame, DataFrameResultSet
21
+
19
22
 
20
23
  class ColumnRef(Expr):
21
24
  """A reference to a table column
@@ -44,36 +47,34 @@ class ColumnRef(Expr):
44
47
 
45
48
  col: catalog.Column # TODO: merge with col_handle
46
49
  col_handle: catalog.ColumnHandle
47
- reference_tbl: Optional[catalog.TableVersionPath]
50
+ reference_tbl: catalog.TableVersionPath | None
48
51
  is_unstored_iter_col: bool
49
- iter_arg_ctx: Optional[RowBuilder.EvalCtx]
50
- base_rowid_len: int
51
- base_rowid: Sequence[Optional[Any]]
52
- iterator: Optional[iters.ComponentIterator]
53
- pos_idx: Optional[int]
54
- id: int
55
52
  perform_validation: bool # if True, performs media validation
53
+ iter_arg_ctx: RowBuilder.EvalCtx | None
54
+ base_rowid_len: int # number of rowid columns in the base table
55
+
56
+ # execution state
57
+ base_rowid: Sequence[Any | None]
58
+ iterator: iters.ComponentIterator | None
59
+ pos_idx: int
56
60
 
57
61
  def __init__(
58
62
  self,
59
63
  col: catalog.Column,
60
- reference_tbl: Optional[catalog.TableVersionPath] = None,
61
- perform_validation: Optional[bool] = None,
64
+ reference_tbl: catalog.TableVersionPath | None = None,
65
+ perform_validation: bool | None = None,
62
66
  ):
63
67
  super().__init__(col.col_type)
64
- assert col.tbl is not None
65
68
  self.col = col
66
69
  self.reference_tbl = reference_tbl
67
- self.col_handle = catalog.ColumnHandle(col.tbl.handle, col.id)
70
+ self.col_handle = col.handle
68
71
 
69
- self.is_unstored_iter_col = col.tbl.is_component_view and col.tbl.is_iterator_column(col) and not col.is_stored
72
+ self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
70
73
  self.iter_arg_ctx = None
71
- # number of rowid columns in the base table
72
- self.base_rowid_len = col.tbl.base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
73
- self.base_rowid = [None] * self.base_rowid_len
74
+ self.base_rowid_len = 0
75
+ self.base_rowid = []
74
76
  self.iterator = None
75
- # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
76
- self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
77
+ self.pos_idx = 0
77
78
 
78
79
  self.perform_validation = False
79
80
  if col.col_type.is_media_type():
@@ -99,14 +100,14 @@ class ColumnRef(Expr):
99
100
  def _id_attrs(self) -> list[tuple[str, Any]]:
100
101
  return [
101
102
  *super()._id_attrs(),
102
- ('tbl_id', self.col.tbl.id),
103
+ ('tbl_id', self.col.tbl_handle.id),
103
104
  ('col_id', self.col.id),
104
105
  ('perform_validation', self.perform_validation),
105
106
  ]
106
107
 
107
108
  # override
108
109
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
109
- target = tbl_versions[self.col.tbl.id]
110
+ target = tbl_versions[self.col.tbl_handle.id]
110
111
  assert self.col.id in target.cols_by_id
111
112
  col = target.cols_by_id[self.col.id]
112
113
  return ColumnRef(col, self.reference_tbl)
@@ -144,33 +145,6 @@ class ColumnRef(Expr):
144
145
 
145
146
  return super().__getattr__(name)
146
147
 
147
- def find_embedding_index(
148
- self, idx_name: Optional[str], method_name: str
149
- ) -> dict[str, catalog.TableVersion.IndexInfo]:
150
- """Return IndexInfo for a column, with an optional given name"""
151
- from pixeltable import index
152
-
153
- # determine index to use
154
- idx_info_dict = self.col.get_idx_info(self.reference_tbl)
155
-
156
- embedding_idx_info = {
157
- info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
158
- }
159
- if len(embedding_idx_info) == 0:
160
- raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
161
- if idx_name is not None and idx_name not in embedding_idx_info:
162
- raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
163
- if len(embedding_idx_info) > 1:
164
- if idx_name is None:
165
- raise excs.Error(
166
- f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
167
- f'`{method_name}(..., idx=<index_name>)`'
168
- )
169
- idx_info = {idx_name: embedding_idx_info[idx_name]}
170
- else:
171
- idx_info = embedding_idx_info
172
- return idx_info
173
-
174
148
  def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
175
149
  cat = catalog.Catalog.get()
176
150
  # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
@@ -185,49 +159,52 @@ class ColumnRef(Expr):
185
159
  FileCache.get().emit_eviction_warnings()
186
160
  return status
187
161
 
188
- def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
162
+ def similarity(self, item: Any, *, idx: str | None = None) -> Expr:
189
163
  from .similarity_expr import SimilarityExpr
190
164
 
191
165
  return SimilarityExpr(self, item, idx_name=idx)
192
166
 
193
- def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
194
- idx_info = self.find_embedding_index(idx, 'embedding')
195
- assert len(idx_info) == 1
196
- col = copy.copy(next(iter(idx_info.values())).val_col)
197
- col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
198
- # col.create_sa_cols()
199
- return ColumnRef(col)
167
+ def embedding(self, *, idx: str | None = None) -> ColumnRef:
168
+ from pixeltable.index import EmbeddingIndex
169
+
170
+ idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
171
+ return ColumnRef(idx_info.val_col)
200
172
 
201
- def default_column_name(self) -> Optional[str]:
173
+ @property
174
+ def tbl(self) -> catalog.TableVersionHandle:
175
+ return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
176
+
177
+ def default_column_name(self) -> str | None:
202
178
  return self.col.name if self.col is not None else None
203
179
 
204
180
  def _equals(self, other: ColumnRef) -> bool:
205
181
  return self.col == other.col and self.perform_validation == other.perform_validation
206
182
 
207
- def _df(self) -> 'pxt.dataframe.DataFrame':
208
- from pixeltable import plan
183
+ def _df(self) -> 'DataFrame':
184
+ import pixeltable.plan as plan
185
+ from pixeltable.dataframe import DataFrame
209
186
 
210
187
  if self.reference_tbl is None:
211
188
  # No reference table; use the current version of the table to which the column belongs
212
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
189
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
213
190
  return tbl.select(self)
214
191
  else:
215
192
  # Explicit reference table; construct a DataFrame directly from it
216
- return pxt.DataFrame(plan.FromClause([self.reference_tbl])).select(self)
193
+ return DataFrame(plan.FromClause([self.reference_tbl])).select(self)
217
194
 
218
- def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
195
+ def show(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
219
196
  return self._df().show(*args, **kwargs)
220
197
 
221
- def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
198
+ def head(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
222
199
  return self._df().head(*args, **kwargs)
223
200
 
224
- def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
201
+ def tail(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
225
202
  return self._df().tail(*args, **kwargs)
226
203
 
227
204
  def count(self) -> int:
228
205
  return self._df().count()
229
206
 
230
- def distinct(self) -> 'pxt.dataframe.DataFrame':
207
+ def distinct(self) -> 'DataFrame':
231
208
  """Return distinct values in this column."""
232
209
  return self._df().distinct()
233
210
 
@@ -244,7 +221,7 @@ class ColumnRef(Expr):
244
221
  return self._descriptors().to_html()
245
222
 
246
223
  def _descriptors(self) -> DescriptionHelper:
247
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
224
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
248
225
  helper = DescriptionHelper()
249
226
  helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
250
227
  helper.append(tbl._col_descriptor([self.col.name]))
@@ -253,7 +230,18 @@ class ColumnRef(Expr):
253
230
  helper.append(idxs)
254
231
  return helper
255
232
 
256
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
233
+ def prepare(self) -> None:
234
+ from pixeltable import store
235
+
236
+ if not self.is_unstored_iter_col:
237
+ return
238
+ col = self.col_handle.get()
239
+ self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
240
+ self.base_rowid = [None] * self.base_rowid_len
241
+ assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
242
+ self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
243
+
244
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
257
245
  if self.perform_validation:
258
246
  return None
259
247
  self.col = self.col_handle.get()
@@ -298,20 +286,19 @@ class ColumnRef(Expr):
298
286
  if self.base_rowid != data_row.pk[: self.base_rowid_len]:
299
287
  row_builder.eval(data_row, self.iter_arg_ctx)
300
288
  iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
301
- self.iterator = self.col.tbl.iterator_cls(**iterator_args)
289
+ self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
302
290
  self.base_rowid = data_row.pk[: self.base_rowid_len]
303
291
  self.iterator.set_pos(data_row.pk[self.pos_idx])
304
292
  res = next(self.iterator)
305
293
  data_row[self.slot_idx] = res[self.col.name]
306
294
 
307
295
  def _as_dict(self) -> dict:
308
- tbl = self.col.tbl
309
- version = tbl.version if tbl.is_snapshot else None
296
+ tbl_handle = self.col.tbl_handle
310
297
  # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
311
298
  # non-validating component ColumnRef
312
299
  return {
313
- 'tbl_id': str(tbl.id),
314
- 'tbl_version': version,
300
+ 'tbl_id': str(tbl_handle.id),
301
+ 'tbl_version': tbl_handle.effective_version,
315
302
  'col_id': self.col.id,
316
303
  'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
317
304
  'perform_validation': self.perform_validation,
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -69,8 +69,8 @@ class Comparison(Expr):
69
69
  def _op2(self) -> Expr:
70
70
  return self.components[1]
71
71
 
72
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
73
- from pixeltable import index
72
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
73
+ import pixeltable.index as index
74
74
 
75
75
  if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
76
76
  # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
@@ -81,9 +81,9 @@ class Comparison(Expr):
81
81
  if self.is_search_arg_comparison:
82
82
  # reference the index value column if there is an index and this is not a snapshot
83
83
  # (indices don't apply to snapshots)
84
- tbl = self._op1.col.tbl
84
+ tbl = self._op1.col.get_tbl()
85
85
  idx_info = [
86
- info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
86
+ info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
87
87
  ]
88
88
  if len(idx_info) > 0 and not tbl.is_snapshot:
89
89
  # there shouldn't be multiple B-tree indices on a column
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import operator
4
- from typing import Any, Callable, Optional
4
+ from typing import Any, Callable
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -61,14 +61,14 @@ class CompoundPredicate(Expr):
61
61
  def _id_attrs(self) -> list[tuple[str, Any]]:
62
62
  return [*super()._id_attrs(), ('operator', self.operator.value)]
63
63
 
64
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
64
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
65
65
  if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
66
66
  return super().split_conjuncts(condition)
67
67
  matches = [op for op in self.components if condition(op)]
68
68
  non_matches = [op for op in self.components if not condition(op)]
69
69
  return (matches, self.make_conjunction(non_matches))
70
70
 
71
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
71
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
72
72
  sql_exprs = [sql_elements.get(op) for op in self.components]
73
73
  if any(e is None for e in sql_exprs):
74
74
  return None
@@ -6,7 +6,7 @@ import io
6
6
  import urllib.parse
7
7
  import urllib.request
8
8
  from pathlib import Path
9
- from typing import Any, Optional
9
+ from typing import Any
10
10
 
11
11
  import numpy as np
12
12
  import pgvector.sqlalchemy # type: ignore[import-untyped]
@@ -126,10 +126,10 @@ class DataRow:
126
126
  _may_have_exc: bool
127
127
 
128
128
  # the primary key of a store row is a sequence of ints (the number is different for table vs view)
129
- pk: Optional[tuple[int, ...]]
129
+ pk: tuple[int, ...] | None
130
130
  # for nested rows (ie, those produced by JsonMapperDispatcher)
131
- parent_row: Optional[DataRow]
132
- parent_slot_idx: Optional[int]
131
+ parent_row: DataRow | None
132
+ parent_slot_idx: int | None
133
133
 
134
134
  # state for table output (insert()/update()); key: column id
135
135
  cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
@@ -148,8 +148,8 @@ class DataRow:
148
148
  media_slot_idxs: list[int],
149
149
  array_slot_idxs: list[int],
150
150
  json_slot_idxs: list[int],
151
- parent_row: Optional[DataRow] = None,
152
- parent_slot_idx: Optional[int] = None,
151
+ parent_row: DataRow | None = None,
152
+ parent_slot_idx: int | None = None,
153
153
  ):
154
154
  self.init(size)
155
155
  self.parent_row = parent_row
@@ -176,7 +176,7 @@ class DataRow:
176
176
  self.parent_row = None
177
177
  self.parent_slot_idx = None
178
178
 
179
- def clear(self, slot_idxs: Optional[np.ndarray] = None) -> None:
179
+ def clear(self, slot_idxs: np.ndarray | None = None) -> None:
180
180
  if slot_idxs is not None:
181
181
  self.has_val[slot_idxs] = False
182
182
  self.vals[slot_idxs] = None
@@ -209,7 +209,7 @@ class DataRow:
209
209
  def set_pk(self, pk: tuple[int, ...]) -> None:
210
210
  self.pk = pk
211
211
 
212
- def has_exc(self, slot_idx: Optional[int] = None) -> bool:
212
+ def has_exc(self, slot_idx: int | None = None) -> bool:
213
213
  """
214
214
  Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
215
215
  """
@@ -220,12 +220,12 @@ class DataRow:
220
220
  return self.excs[slot_idx] is not None
221
221
  return (self.excs != None).any()
222
222
 
223
- def get_exc(self, slot_idx: int) -> Optional[Exception]:
223
+ def get_exc(self, slot_idx: int) -> Exception | None:
224
224
  exc = self.excs[slot_idx]
225
225
  assert exc is None or isinstance(exc, Exception)
226
226
  return exc
227
227
 
228
- def get_first_exc(self) -> Optional[Exception]:
228
+ def get_first_exc(self) -> Exception | None:
229
229
  mask = self.excs != None
230
230
  if not mask.any():
231
231
  return None
@@ -260,7 +260,7 @@ class DataRow:
260
260
 
261
261
  return self.vals[index]
262
262
 
263
- def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
263
+ def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
264
264
  """Return the value that gets stored in the db"""
265
265
  assert self.excs[index] is None
266
266
  if not self.has_val[index]:
@@ -328,7 +328,7 @@ class DataRow:
328
328
  self.vals[idx] = val
329
329
  self.has_val[idx] = True
330
330
 
331
- def prepare_col_val_for_save(self, index: int, col: Optional[catalog.Column] = None) -> bool:
331
+ def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
332
332
  """
333
333
  Prepare to save a column's value into the appropriate store. Discard unneeded values.
334
334
 
pixeltable/exprs/expr.py CHANGED
@@ -7,7 +7,7 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import numpy as np
@@ -29,7 +29,7 @@ class ExprScope:
29
29
  parent is None: outermost scope
30
30
  """
31
31
 
32
- def __init__(self, parent: Optional[ExprScope]):
32
+ def __init__(self, parent: ExprScope | None):
33
33
  self.parent = parent
34
34
 
35
35
  def is_contained_in(self, other: ExprScope) -> bool:
@@ -61,13 +61,13 @@ class Expr(abc.ABC):
61
61
  # - set by the subclass's __init__()
62
62
  # - produced by _create_id()
63
63
  # - not expected to survive a serialize()/deserialize() roundtrip
64
- id: Optional[int]
64
+ id: int | None
65
65
 
66
66
  # index of the expr's value in the data row:
67
67
  # - set for all materialized exprs
68
68
  # - None: not executable
69
69
  # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
70
- slot_idx: Optional[int]
70
+ slot_idx: int | None
71
71
 
72
72
  T = TypeVar('T', bound='Expr')
73
73
 
@@ -103,7 +103,7 @@ class Expr(abc.ABC):
103
103
  assert not has_rel_path, self._expr_tree()
104
104
  assert not self._has_relative_path(), self._expr_tree()
105
105
 
106
- def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapperDispatch'] = None) -> None:
106
+ def _bind_rel_paths(self, mapper: 'exprs.JsonMapperDispatch' | None = None) -> None:
107
107
  for c in self.components:
108
108
  c._bind_rel_paths(mapper)
109
109
 
@@ -118,7 +118,7 @@ class Expr(abc.ABC):
118
118
  for c in self.components:
119
119
  c._expr_tree_r(indent + 2, buf)
120
120
 
121
- def default_column_name(self) -> Optional[str]:
121
+ def default_column_name(self) -> str | None:
122
122
  """
123
123
  Returns:
124
124
  None if this expression lacks a default name,
@@ -127,7 +127,7 @@ class Expr(abc.ABC):
127
127
  return None
128
128
 
129
129
  @property
130
- def validation_error(self) -> Optional[str]:
130
+ def validation_error(self) -> str | None:
131
131
  """
132
132
  Subclasses can override this to indicate that validation has failed after a catalog load.
133
133
 
@@ -205,12 +205,12 @@ class Expr(abc.ABC):
205
205
  return result
206
206
 
207
207
  @classmethod
208
- def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
208
+ def copy_list(cls, expr_list: list[Expr] | None) -> list[Expr] | None:
209
209
  if expr_list is None:
210
210
  return None
211
211
  return [e.copy() for e in expr_list]
212
212
 
213
- def __deepcopy__(self, memo: Optional[dict[int, Any]] = None) -> Expr:
213
+ def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Expr:
214
214
  # we don't need to create an actual deep copy because all state other than execution state is read-only
215
215
  if memo is None:
216
216
  memo = {}
@@ -241,7 +241,7 @@ class Expr(abc.ABC):
241
241
  for i in range(len(expr_list)):
242
242
  expr_list[i] = expr_list[i].substitute(spec)
243
243
 
244
- def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
244
+ def resolve_computed_cols(self, resolve_cols: set[catalog.Column] | None = None) -> Expr:
245
245
  """
246
246
  Recursively replace ColRefs to unstored computed columns with their value exprs.
247
247
  Also replaces references to stored computed columns in resolve_cols.
@@ -309,18 +309,18 @@ class Expr(abc.ABC):
309
309
 
310
310
  @overload
311
311
  def subexprs(
312
- self, *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
312
+ self, *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
313
313
  ) -> Iterator[Expr]: ...
314
314
 
315
315
  @overload
316
316
  def subexprs(
317
- self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
317
+ self, expr_class: type[T], filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
318
318
  ) -> Iterator[T]: ...
319
319
 
320
320
  def subexprs(
321
321
  self,
322
- expr_class: Optional[type[T]] = None,
323
- filter: Optional[Callable[[Expr], bool]] = None,
322
+ expr_class: type[T] | None = None,
323
+ filter: Callable[[Expr], bool] | None = None,
324
324
  traverse_matches: bool = True,
325
325
  ) -> Iterator[T]:
326
326
  """
@@ -339,11 +339,7 @@ class Expr(abc.ABC):
339
339
  @overload
340
340
  @classmethod
341
341
  def list_subexprs(
342
- cls,
343
- expr_list: Iterable[Expr],
344
- *,
345
- filter: Optional[Callable[[Expr], bool]] = None,
346
- traverse_matches: bool = True,
342
+ cls, expr_list: Iterable[Expr], *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
347
343
  ) -> Iterator[Expr]: ...
348
344
 
349
345
  @overload
@@ -352,7 +348,7 @@ class Expr(abc.ABC):
352
348
  cls,
353
349
  expr_list: Iterable[Expr],
354
350
  expr_class: type[T],
355
- filter: Optional[Callable[[Expr], bool]] = None,
351
+ filter: Callable[[Expr], bool] | None = None,
356
352
  traverse_matches: bool = True,
357
353
  ) -> Iterator[T]: ...
358
354
 
@@ -360,8 +356,8 @@ class Expr(abc.ABC):
360
356
  def list_subexprs(
361
357
  cls,
362
358
  expr_list: Iterable[Expr],
363
- expr_class: Optional[type[T]] = None,
364
- filter: Optional[Callable[[Expr], bool]] = None,
359
+ expr_class: type[T] | None = None,
360
+ filter: Callable[[Expr], bool] | None = None,
365
361
  traverse_matches: bool = True,
366
362
  ) -> Iterator[T]:
367
363
  """Produce subexprs for all exprs in list. Can contain duplicates."""
@@ -377,7 +373,7 @@ class Expr(abc.ABC):
377
373
  ) -> bool:
378
374
  return any(e._contains(expr_class, filter) for e in expr_list)
379
375
 
380
- def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
376
+ def _contains(self, cls: type[Expr] | None = None, filter: Callable[[Expr], bool] | None = None) -> bool:
381
377
  """
382
378
  Returns True if any subexpr is an instance of cls and/or matches filter.
383
379
  """
@@ -396,7 +392,9 @@ class Expr(abc.ABC):
396
392
  from .column_ref import ColumnRef
397
393
  from .rowid_ref import RowidRef
398
394
 
399
- return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
395
+ return {ref.col.get_tbl().id for ref in self.subexprs(ColumnRef)} | {
396
+ ref.tbl.id for ref in self.subexprs(RowidRef)
397
+ }
400
398
 
401
399
  @classmethod
402
400
  def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
@@ -416,14 +414,14 @@ class Expr(abc.ABC):
416
414
  result.update(cls.get_refd_column_ids(component_dict))
417
415
  return result
418
416
 
419
- def as_literal(self) -> Optional[Expr]:
417
+ def as_literal(self) -> Expr | None:
420
418
  """
421
419
  Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
422
420
  """
423
421
  return None
424
422
 
425
423
  @classmethod
426
- def from_array(cls, elements: Iterable) -> Optional[Expr]:
424
+ def from_array(cls, elements: Iterable) -> Expr | None:
427
425
  from .inline_expr import InlineArray
428
426
  from .literal import Literal
429
427
 
@@ -446,7 +444,7 @@ class Expr(abc.ABC):
446
444
  return self
447
445
 
448
446
  @classmethod
449
- def from_object(cls, o: object) -> Optional[Expr]:
447
+ def from_object(cls, o: object) -> Expr | None:
450
448
  """
451
449
  Try to turn a literal object into an Expr.
452
450
  """
@@ -476,7 +474,7 @@ class Expr(abc.ABC):
476
474
  return Literal(o, col_type=obj_type)
477
475
  return None
478
476
 
479
- def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
477
+ def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> sql.ColumnElement | None:
480
478
  """
481
479
  If this expr can be materialized directly in SQL:
482
480
  - returns a ColumnElement
@@ -495,6 +493,18 @@ class Expr(abc.ABC):
495
493
  """
496
494
  pass
497
495
 
496
+ def prepare(self) -> None:
497
+ """
498
+ Create execution state. This is called before the first eval() call.
499
+ """
500
+ for c in self.components:
501
+ c.prepare()
502
+
503
+ @classmethod
504
+ def prepare_list(cls, expr_list: Iterable[Expr]) -> None:
505
+ for e in expr_list:
506
+ e.prepare()
507
+
498
508
  def release(self) -> None:
499
509
  """
500
510
  Allow Expr class to tear down execution state. This is called after the last eval() call.
@@ -503,7 +513,7 @@ class Expr(abc.ABC):
503
513
  c.release()
504
514
 
505
515
  @classmethod
506
- def release_list(cls, expr_list: list[Expr]) -> None:
516
+ def release_list(cls, expr_list: Iterable[Expr]) -> None:
507
517
  for e in expr_list:
508
518
  e.release()
509
519
 
@@ -791,7 +801,7 @@ class Expr(abc.ABC):
791
801
 
792
802
  return CompoundPredicate(LogicalOperator.NOT, [self])
793
803
 
794
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
804
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
795
805
  """
796
806
  Returns clauses of a conjunction that meet condition in the first element.
797
807
  The second element contains remaining clauses, rolled into a conjunction.
@@ -802,7 +812,7 @@ class Expr(abc.ABC):
802
812
  else:
803
813
  return [], self
804
814
 
805
- def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'func.Function':
815
+ def _make_applicator_function(self, fn: Callable, col_type: ts.ColumnType | None) -> 'func.Function':
806
816
  """
807
817
  Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
808
818
  the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
@@ -1,4 +1,4 @@
1
- from typing import Generic, Iterable, Iterator, Optional, TypeVar
1
+ from typing import Generic, Iterable, Iterator, TypeVar
2
2
 
3
3
  from .expr import Expr
4
4
 
@@ -14,7 +14,7 @@ class ExprDict(Generic[T]):
14
14
 
15
15
  _data: dict[int, tuple[Expr, T]]
16
16
 
17
- def __init__(self, iterable: Optional[Iterable[tuple[Expr, T]]] = None):
17
+ def __init__(self, iterable: Iterable[tuple[Expr, T]] | None = None):
18
18
  self._data = {}
19
19
 
20
20
  if iterable is not None:
@@ -39,7 +39,7 @@ class ExprDict(Generic[T]):
39
39
  def __contains__(self, key: Expr) -> bool:
40
40
  return key.id in self._data
41
41
 
42
- def get(self, key: Expr, default: Optional[T] = None) -> Optional[T]:
42
+ def get(self, key: Expr, default: T | None = None) -> T | None:
43
43
  item = self._data.get(key.id)
44
44
  return item[1] if item is not None else default
45
45