pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/expr.py CHANGED
@@ -7,7 +7,7 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import numpy as np
@@ -29,7 +29,7 @@ class ExprScope:
29
29
  parent is None: outermost scope
30
30
  """
31
31
 
32
- def __init__(self, parent: Optional[ExprScope]):
32
+ def __init__(self, parent: ExprScope | None):
33
33
  self.parent = parent
34
34
 
35
35
  def is_contained_in(self, other: ExprScope) -> bool:
@@ -47,7 +47,7 @@ class Expr(abc.ABC):
47
47
  """
48
48
  Rules for using state in subclasses:
49
49
  - all state except for components and slot_idx is shared between copies of an Expr
50
- - slot_idx is set during analysis (DataFrame.show())
50
+ - slot_idx is set during analysis (Query.show())
51
51
  - during eval(), components can only be accessed via self.components; any Exprs outside of that won't
52
52
  have slot_idx set
53
53
  """
@@ -61,13 +61,13 @@ class Expr(abc.ABC):
61
61
  # - set by the subclass's __init__()
62
62
  # - produced by _create_id()
63
63
  # - not expected to survive a serialize()/deserialize() roundtrip
64
- id: Optional[int]
64
+ id: int | None
65
65
 
66
66
  # index of the expr's value in the data row:
67
67
  # - set for all materialized exprs
68
68
  # - None: not executable
69
69
  # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
70
- slot_idx: Optional[int]
70
+ slot_idx: int | None
71
71
 
72
72
  T = TypeVar('T', bound='Expr')
73
73
 
@@ -103,7 +103,7 @@ class Expr(abc.ABC):
103
103
  assert not has_rel_path, self._expr_tree()
104
104
  assert not self._has_relative_path(), self._expr_tree()
105
105
 
106
- def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapperDispatch'] = None) -> None:
106
+ def _bind_rel_paths(self, mapper: 'exprs.JsonMapperDispatch' | None = None) -> None:
107
107
  for c in self.components:
108
108
  c._bind_rel_paths(mapper)
109
109
 
@@ -118,7 +118,7 @@ class Expr(abc.ABC):
118
118
  for c in self.components:
119
119
  c._expr_tree_r(indent + 2, buf)
120
120
 
121
- def default_column_name(self) -> Optional[str]:
121
+ def default_column_name(self) -> str | None:
122
122
  """
123
123
  Returns:
124
124
  None if this expression lacks a default name,
@@ -127,7 +127,7 @@ class Expr(abc.ABC):
127
127
  return None
128
128
 
129
129
  @property
130
- def validation_error(self) -> Optional[str]:
130
+ def validation_error(self) -> str | None:
131
131
  """
132
132
  Subclasses can override this to indicate that validation has failed after a catalog load.
133
133
 
@@ -205,12 +205,12 @@ class Expr(abc.ABC):
205
205
  return result
206
206
 
207
207
  @classmethod
208
- def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
208
+ def copy_list(cls, expr_list: list[Expr] | None) -> list[Expr] | None:
209
209
  if expr_list is None:
210
210
  return None
211
211
  return [e.copy() for e in expr_list]
212
212
 
213
- def __deepcopy__(self, memo: Optional[dict[int, Any]] = None) -> Expr:
213
+ def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Expr:
214
214
  # we don't need to create an actual deep copy because all state other than execution state is read-only
215
215
  if memo is None:
216
216
  memo = {}
@@ -241,7 +241,7 @@ class Expr(abc.ABC):
241
241
  for i in range(len(expr_list)):
242
242
  expr_list[i] = expr_list[i].substitute(spec)
243
243
 
244
- def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
244
+ def resolve_computed_cols(self, resolve_cols: set[catalog.Column] | None = None) -> Expr:
245
245
  """
246
246
  Recursively replace ColRefs to unstored computed columns with their value exprs.
247
247
  Also replaces references to stored computed columns in resolve_cols.
@@ -276,6 +276,13 @@ class Expr(abc.ABC):
276
276
  tbl_versions = {tbl_version.id: tbl_version.get() for tbl_version in tbl.get_tbl_versions()}
277
277
  return self._retarget(tbl_versions)
278
278
 
279
+ @classmethod
280
+ def retarget_list(cls, expr_list: list[Expr], tbl: catalog.TableVersionPath) -> None:
281
+ """Retarget ColumnRefs in expr_list to the specific TableVersions in tbl."""
282
+ tbl_versions = {tbl_version.id: tbl_version.get() for tbl_version in tbl.get_tbl_versions()}
283
+ for i in range(len(expr_list)):
284
+ expr_list[i] = expr_list[i]._retarget(tbl_versions)
285
+
279
286
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
280
287
  for i in range(len(self.components)):
281
288
  self.components[i] = self.components[i]._retarget(tbl_versions)
@@ -302,18 +309,18 @@ class Expr(abc.ABC):
302
309
 
303
310
  @overload
304
311
  def subexprs(
305
- self, *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
312
+ self, *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
306
313
  ) -> Iterator[Expr]: ...
307
314
 
308
315
  @overload
309
316
  def subexprs(
310
- self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
317
+ self, expr_class: type[T], filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
311
318
  ) -> Iterator[T]: ...
312
319
 
313
320
  def subexprs(
314
321
  self,
315
- expr_class: Optional[type[T]] = None,
316
- filter: Optional[Callable[[Expr], bool]] = None,
322
+ expr_class: type[T] | None = None,
323
+ filter: Callable[[Expr], bool] | None = None,
317
324
  traverse_matches: bool = True,
318
325
  ) -> Iterator[T]:
319
326
  """
@@ -332,11 +339,7 @@ class Expr(abc.ABC):
332
339
  @overload
333
340
  @classmethod
334
341
  def list_subexprs(
335
- cls,
336
- expr_list: Iterable[Expr],
337
- *,
338
- filter: Optional[Callable[[Expr], bool]] = None,
339
- traverse_matches: bool = True,
342
+ cls, expr_list: Iterable[Expr], *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
340
343
  ) -> Iterator[Expr]: ...
341
344
 
342
345
  @overload
@@ -345,7 +348,7 @@ class Expr(abc.ABC):
345
348
  cls,
346
349
  expr_list: Iterable[Expr],
347
350
  expr_class: type[T],
348
- filter: Optional[Callable[[Expr], bool]] = None,
351
+ filter: Callable[[Expr], bool] | None = None,
349
352
  traverse_matches: bool = True,
350
353
  ) -> Iterator[T]: ...
351
354
 
@@ -353,15 +356,24 @@ class Expr(abc.ABC):
353
356
  def list_subexprs(
354
357
  cls,
355
358
  expr_list: Iterable[Expr],
356
- expr_class: Optional[type[T]] = None,
357
- filter: Optional[Callable[[Expr], bool]] = None,
359
+ expr_class: type[T] | None = None,
360
+ filter: Callable[[Expr], bool] | None = None,
358
361
  traverse_matches: bool = True,
359
362
  ) -> Iterator[T]:
360
363
  """Produce subexprs for all exprs in list. Can contain duplicates."""
361
364
  for e in expr_list:
362
365
  yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
363
366
 
364
- def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
367
+ @classmethod
368
+ def list_contains(
369
+ cls,
370
+ expr_list: Iterable[Expr],
371
+ expr_class: type[Expr] | None = None,
372
+ filter: Callable[[Expr], bool] | None = None,
373
+ ) -> bool:
374
+ return any(e._contains(expr_class, filter) for e in expr_list)
375
+
376
+ def _contains(self, cls: type[Expr] | None = None, filter: Callable[[Expr], bool] | None = None) -> bool:
365
377
  """
366
378
  Returns True if any subexpr is an instance of cls and/or matches filter.
367
379
  """
@@ -380,34 +392,36 @@ class Expr(abc.ABC):
380
392
  from .column_ref import ColumnRef
381
393
  from .rowid_ref import RowidRef
382
394
 
383
- return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
395
+ return {ref.col.get_tbl().id for ref in self.subexprs(ColumnRef)} | {
396
+ ref.tbl.id for ref in self.subexprs(RowidRef)
397
+ }
384
398
 
385
399
  @classmethod
386
400
  def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
387
401
  return {tbl_id for e in exprs_ for tbl_id in e.tbl_ids()}
388
402
 
389
403
  @classmethod
390
- def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
404
+ def get_refd_column_ids(cls, expr_dict: dict[str, Any]) -> set[catalog.QColumnId]:
391
405
  """Return Columns referenced by expr_dict."""
392
- result: list[catalog.Column] = []
406
+ result: set[catalog.QColumnId] = set()
393
407
  assert '_classname' in expr_dict
394
408
  from .column_ref import ColumnRef
395
409
 
396
410
  if expr_dict['_classname'] == 'ColumnRef':
397
- result.append(ColumnRef.get_column(expr_dict))
411
+ result.add(ColumnRef.get_column_id(expr_dict))
398
412
  if 'components' in expr_dict:
399
413
  for component_dict in expr_dict['components']:
400
- result.extend(cls.get_refd_columns(component_dict))
414
+ result.update(cls.get_refd_column_ids(component_dict))
401
415
  return result
402
416
 
403
- def as_literal(self) -> Optional[Expr]:
417
+ def as_literal(self) -> Expr | None:
404
418
  """
405
419
  Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
406
420
  """
407
421
  return None
408
422
 
409
423
  @classmethod
410
- def from_array(cls, elements: Iterable) -> Optional[Expr]:
424
+ def from_array(cls, elements: Iterable) -> Expr | None:
411
425
  from .inline_expr import InlineArray
412
426
  from .literal import Literal
413
427
 
@@ -430,7 +444,7 @@ class Expr(abc.ABC):
430
444
  return self
431
445
 
432
446
  @classmethod
433
- def from_object(cls, o: object) -> Optional[Expr]:
447
+ def from_object(cls, o: object) -> Expr | None:
434
448
  """
435
449
  Try to turn a literal object into an Expr.
436
450
  """
@@ -460,7 +474,7 @@ class Expr(abc.ABC):
460
474
  return Literal(o, col_type=obj_type)
461
475
  return None
462
476
 
463
- def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
477
+ def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> sql.ColumnElement | None:
464
478
  """
465
479
  If this expr can be materialized directly in SQL:
466
480
  - returns a ColumnElement
@@ -479,6 +493,18 @@ class Expr(abc.ABC):
479
493
  """
480
494
  pass
481
495
 
496
+ def prepare(self) -> None:
497
+ """
498
+ Create execution state. This is called before the first eval() call.
499
+ """
500
+ for c in self.components:
501
+ c.prepare()
502
+
503
+ @classmethod
504
+ def prepare_list(cls, expr_list: Iterable[Expr]) -> None:
505
+ for e in expr_list:
506
+ e.prepare()
507
+
482
508
  def release(self) -> None:
483
509
  """
484
510
  Allow Expr class to tear down execution state. This is called after the last eval() call.
@@ -487,7 +513,7 @@ class Expr(abc.ABC):
487
513
  c.release()
488
514
 
489
515
  @classmethod
490
- def release_list(cls, expr_list: list[Expr]) -> None:
516
+ def release_list(cls, expr_list: Iterable[Expr]) -> None:
491
517
  for e in expr_list:
492
518
  e.release()
493
519
 
@@ -543,7 +569,7 @@ class Expr(abc.ABC):
543
569
  else:
544
570
  return InPredicate(self, value_set_literal=value_set)
545
571
 
546
- def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
572
+ def astype(self, new_type: ts.ColumnType | type | _AnnotatedAlias) -> 'exprs.TypeCast':
547
573
  from pixeltable.exprs import TypeCast
548
574
 
549
575
  # Interpret the type argument the same way we would if given in a schema
@@ -555,7 +581,7 @@ class Expr(abc.ABC):
555
581
  return TypeCast(self, col_type)
556
582
 
557
583
  def apply(
558
- self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None
584
+ self, fn: Callable, *, col_type: ts.ColumnType | type | _AnnotatedAlias | None = None
559
585
  ) -> 'exprs.FunctionCall':
560
586
  if col_type is not None:
561
587
  col_type = ts.ColumnType.normalize_type(col_type)
@@ -639,7 +665,7 @@ class Expr(abc.ABC):
639
665
 
640
666
  def _make_comparison(self, op: ComparisonOperator, other: object) -> 'exprs.Comparison':
641
667
  """
642
- other: Union[Expr, LiteralPythonTypes]
668
+ other: Expr | LiteralPythonTypes
643
669
  """
644
670
  # TODO: check for compatibility
645
671
  from .comparison import Comparison
@@ -654,7 +680,7 @@ class Expr(abc.ABC):
654
680
  def __neg__(self) -> 'exprs.ArithmeticExpr':
655
681
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
656
682
 
657
- def __add__(self, other: object) -> Union[exprs.ArithmeticExpr, exprs.StringOp]:
683
+ def __add__(self, other: object) -> exprs.ArithmeticExpr | exprs.StringOp:
658
684
  if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
659
685
  return self._make_string_expr(StringOperator.CONCAT, other)
660
686
  return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
@@ -662,7 +688,7 @@ class Expr(abc.ABC):
662
688
  def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
663
689
  return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
664
690
 
665
- def __mul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
691
+ def __mul__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
666
692
  if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
667
693
  return self._make_string_expr(StringOperator.REPEAT, other)
668
694
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
@@ -676,7 +702,7 @@ class Expr(abc.ABC):
676
702
  def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
677
703
  return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
678
704
 
679
- def __radd__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
705
+ def __radd__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
680
706
  if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
681
707
  return self._rmake_string_expr(StringOperator.CONCAT, other)
682
708
  return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
@@ -684,7 +710,7 @@ class Expr(abc.ABC):
684
710
  def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
685
711
  return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
686
712
 
687
- def __rmul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
713
+ def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
688
714
  if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
689
715
  return self._rmake_string_expr(StringOperator.REPEAT, other)
690
716
  return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
@@ -726,7 +752,7 @@ class Expr(abc.ABC):
726
752
 
727
753
  def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
728
754
  """
729
- other: Union[Expr, LiteralPythonTypes]
755
+ other: Expr | LiteralPythonTypes
730
756
  """
731
757
  # TODO: check for compatibility
732
758
  from .arithmetic_expr import ArithmeticExpr
@@ -775,7 +801,7 @@ class Expr(abc.ABC):
775
801
 
776
802
  return CompoundPredicate(LogicalOperator.NOT, [self])
777
803
 
778
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
804
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
779
805
  """
780
806
  Returns clauses of a conjunction that meet condition in the first element.
781
807
  The second element contains remaining clauses, rolled into a conjunction.
@@ -786,7 +812,7 @@ class Expr(abc.ABC):
786
812
  else:
787
813
  return [], self
788
814
 
789
- def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'func.Function':
815
+ def _make_applicator_function(self, fn: Callable, col_type: ts.ColumnType | None) -> 'func.Function':
790
816
  """
791
817
  Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
792
818
  the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
@@ -1,4 +1,4 @@
1
- from typing import Generic, Iterable, Iterator, Optional, TypeVar
1
+ from typing import Generic, Iterable, Iterator, TypeVar
2
2
 
3
3
  from .expr import Expr
4
4
 
@@ -14,7 +14,7 @@ class ExprDict(Generic[T]):
14
14
 
15
15
  _data: dict[int, tuple[Expr, T]]
16
16
 
17
- def __init__(self, iterable: Optional[Iterable[tuple[Expr, T]]] = None):
17
+ def __init__(self, iterable: Iterable[tuple[Expr, T]] | None = None):
18
18
  self._data = {}
19
19
 
20
20
  if iterable is not None:
@@ -39,7 +39,7 @@ class ExprDict(Generic[T]):
39
39
  def __contains__(self, key: Expr) -> bool:
40
40
  return key.id in self._data
41
41
 
42
- def get(self, key: Expr, default: Optional[T] = None) -> Optional[T]:
42
+ def get(self, key: Expr, default: T | None = None) -> T | None:
43
43
  item = self._data.get(key.id)
44
44
  return item[1] if item is not None else default
45
45
 
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Generic, Iterable, Iterator, Optional, TypeVar
3
+ from typing import Generic, Iterable, Iterator, TypeVar
4
4
 
5
5
  from .expr import Expr
6
6
 
@@ -9,26 +9,33 @@ T = TypeVar('T', bound='Expr')
9
9
 
10
10
  class ExprSet(Generic[T]):
11
11
  """
12
- A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
12
+ An ordered set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by
13
+ Expr.id.
13
14
  """
14
15
 
15
16
  exprs: dict[int, T] # key: Expr.id
17
+ expr_offsets: dict[int, int] # key: Expr.id, value: offset into self.exprs.keys()
16
18
  exprs_by_idx: dict[int, T] # key: slot_idx
17
19
 
18
- def __init__(self, elements: Optional[Iterable[T]] = None):
20
+ def __init__(self, elements: Iterable[T] | None = None):
19
21
  self.exprs = {}
22
+ self.expr_offsets = {}
20
23
  self.exprs_by_idx = {}
21
24
  if elements is not None:
22
25
  for e in elements:
23
26
  self.add(e)
24
27
 
25
- def add(self, expr: T) -> None:
26
- if expr.id in self.exprs:
27
- return
28
+ def add(self, expr: T) -> int:
29
+ """Returns offset corresponding to iteration order"""
30
+ offset = self.expr_offsets.get(expr.id)
31
+ if offset is not None:
32
+ return offset
33
+ offset = len(self.exprs)
28
34
  self.exprs[expr.id] = expr
29
- if expr.slot_idx is None:
30
- return
31
- self.exprs_by_idx[expr.slot_idx] = expr
35
+ self.expr_offsets[expr.id] = offset
36
+ if expr.slot_idx is not None:
37
+ self.exprs_by_idx[expr.slot_idx] = expr
38
+ return offset
32
39
 
33
40
  def update(self, *others: Iterable[T]) -> None:
34
41
  for other in others:
@@ -44,7 +51,7 @@ class ExprSet(Generic[T]):
44
51
  def __iter__(self) -> Iterator[T]:
45
52
  return iter(self.exprs.values())
46
53
 
47
- def __getitem__(self, index: object) -> Optional[T]:
54
+ def __getitem__(self, index: object) -> T | None:
48
55
  """Indexed lookup by slot_idx or Expr.id."""
49
56
  assert isinstance(index, (int, Expr))
50
57
  if isinstance(index, int):