pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import dataclasses
3
4
  import datetime
4
5
  import io
5
6
  import urllib.parse
6
7
  import urllib.request
7
8
  from pathlib import Path
8
- from typing import Any, Optional
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
  import pgvector.sqlalchemy # type: ignore[import-untyped]
@@ -13,14 +14,72 @@ import PIL
13
14
  import PIL.Image
14
15
  import sqlalchemy as sql
15
16
 
16
- from pixeltable import env
17
+ import pixeltable.utils.image as image_utils
18
+ from pixeltable import catalog, env
19
+ from pixeltable.utils.local_store import TempStore
20
+ from pixeltable.utils.misc import non_none_dict_factory
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class ArrayMd:
25
+ """
26
+ Metadata for array cells that are stored externally.
27
+ """
28
+
29
+ start: int
30
+ end: int
31
+
32
+ # we store bool arrays as packed bits (uint8 arrays), and need to record the shape to reconstruct the array
33
+ is_bool: bool = False
34
+ shape: tuple[int, ...] | None = None
35
+
36
+ def as_dict(self) -> dict:
37
+ # dict_factory: suppress Nones
38
+ x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
39
+ return x
40
+
41
+
42
+ @dataclasses.dataclass
43
+ class CellMd:
44
+ """
45
+ Content of the cellmd column.
46
+
47
+ All fields are optional, to minimize storage.
48
+ """
49
+
50
+ errortype: str | None = None
51
+ errormsg: str | None = None
52
+
53
+ # a list of file urls that are used to store images and arrays; only set for json and array columns
54
+ # for json columns: a list of all urls referenced in the column value
55
+ # for array columns: a single url
56
+ file_urls: list[str] | None = None
57
+
58
+ array_md: ArrayMd | None = None
59
+
60
+ @classmethod
61
+ def from_dict(cls, d: dict) -> CellMd:
62
+ x: CellMd
63
+ if 'array_md' in d:
64
+ d2 = d.copy()
65
+ del d2['array_md']
66
+ x = cls(**d2, array_md=ArrayMd(**d['array_md']))
67
+ else:
68
+ x = cls(**d)
69
+ return x
70
+
71
+ def as_dict(self) -> dict:
72
+ x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
73
+ return x
17
74
 
18
75
 
19
76
  class DataRow:
20
77
  """
21
78
  Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
22
79
  - state for in-memory computation
23
- - state for storing the data
80
+ - state needed for expression evaluation
81
+ - containers for output column values
82
+
24
83
  This is not meant to be a black-box abstraction.
25
84
 
26
85
  In-memory representations by column type:
@@ -38,37 +97,49 @@ class DataRow:
38
97
  - DocumentType: local path if available, otherwise url
39
98
  """
40
99
 
100
+ # expr evaluation state; indexed by slot idx
41
101
  vals: np.ndarray # of object
42
102
  has_val: np.ndarray # of bool
43
103
  excs: np.ndarray # of object
44
-
45
- # expr evaluation state; indexed by slot idx
46
104
  missing_slots: np.ndarray # of bool; number of missing dependencies
47
105
  missing_dependents: np.ndarray # of int16; number of missing dependents
48
106
  is_scheduled: np.ndarray # of bool; True if this slot is scheduled for evaluation
49
107
 
50
- # control structures that are shared across all DataRows in a batch
51
- img_slot_idxs: list[int]
52
- media_slot_idxs: list[int]
53
- array_slot_idxs: list[int]
54
-
55
- # the primary key of a store row is a sequence of ints (the number is different for table vs view)
56
- pk: Optional[tuple[int, ...]]
108
+ # CellMd needed for query execution; needs to be indexed by slot idx, not column id, to work for joins
109
+ slot_md: dict[int, CellMd]
57
110
 
58
111
  # file_urls:
59
112
  # - stored url of file for media in vals[i]
60
113
  # - None if vals[i] is not media type
61
114
  # - not None if file_paths[i] is not None
115
+ # TODO: this is a sparse vector; should it be a dict[int, str]?
62
116
  file_urls: np.ndarray # of str
63
117
 
64
118
  # file_paths:
65
119
  # - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
66
120
  # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
121
+ # TODO: this is a sparse vector; should it be a dict[int, str]?
67
122
  file_paths: np.ndarray # of str
68
123
 
124
+ # If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
125
+ # exception handling under normal operation.
126
+ _may_have_exc: bool
127
+
128
+ # the primary key of a store row is a sequence of ints (the number is different for table vs view)
129
+ pk: tuple[int, ...] | None
69
130
  # for nested rows (ie, those produced by JsonMapperDispatcher)
70
- parent_row: Optional[DataRow]
71
- parent_slot_idx: Optional[int]
131
+ parent_row: DataRow | None
132
+ parent_slot_idx: int | None
133
+
134
+ # state for table output (insert()/update()); key: column id
135
+ cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
136
+ cell_md: dict[int, CellMd]
137
+
138
+ # control structures that are shared across all DataRows in a batch
139
+ img_slot_idxs: list[int]
140
+ media_slot_idxs: list[int]
141
+ array_slot_idxs: list[int]
142
+ json_slot_idxs: list[int]
72
143
 
73
144
  def __init__(
74
145
  self,
@@ -76,36 +147,42 @@ class DataRow:
76
147
  img_slot_idxs: list[int],
77
148
  media_slot_idxs: list[int],
78
149
  array_slot_idxs: list[int],
79
- parent_row: Optional[DataRow] = None,
80
- parent_slot_idx: Optional[int] = None,
150
+ json_slot_idxs: list[int],
151
+ parent_row: DataRow | None = None,
152
+ parent_slot_idx: int | None = None,
81
153
  ):
82
- self.img_slot_idxs = img_slot_idxs
83
- self.media_slot_idxs = media_slot_idxs
84
- self.array_slot_idxs = array_slot_idxs
85
154
  self.init(size)
86
155
  self.parent_row = parent_row
87
156
  self.parent_slot_idx = parent_slot_idx
88
-
89
- def init(self, num_slots: int) -> None:
90
- self.vals = np.full(num_slots, None, dtype=object)
91
- self.has_val = np.zeros(num_slots, dtype=bool)
92
- self.excs = np.full(num_slots, None, dtype=object)
93
- self.missing_slots = np.zeros(num_slots, dtype=bool)
94
- self.missing_dependents = np.zeros(num_slots, dtype=np.int16)
95
- self.is_scheduled = np.zeros(num_slots, dtype=bool)
157
+ self.img_slot_idxs = img_slot_idxs
158
+ self.media_slot_idxs = media_slot_idxs
159
+ self.array_slot_idxs = array_slot_idxs
160
+ self.json_slot_idxs = json_slot_idxs
161
+
162
+ def init(self, size: int) -> None:
163
+ self.vals = np.full(size, None, dtype=object)
164
+ self.has_val = np.zeros(size, dtype=bool)
165
+ self.excs = np.full(size, None, dtype=object)
166
+ self.missing_slots = np.zeros(size, dtype=bool)
167
+ self.missing_dependents = np.zeros(size, dtype=np.int16)
168
+ self.is_scheduled = np.zeros(size, dtype=bool)
169
+ self.slot_md = {}
170
+ self.file_urls = np.full(size, None, dtype=object)
171
+ self.file_paths = np.full(size, None, dtype=object)
172
+ self._may_have_exc = False
173
+ self.cell_vals = {}
174
+ self.cell_md = {}
96
175
  self.pk = None
97
- self.file_urls = np.full(num_slots, None, dtype=object)
98
- self.file_paths = np.full(num_slots, None, dtype=object)
99
176
  self.parent_row = None
100
177
  self.parent_slot_idx = None
101
178
 
102
- def clear(self, idxs: Optional[np.ndarray] = None) -> None:
103
- if idxs is not None:
104
- self.has_val[idxs] = False
105
- self.vals[idxs] = None
106
- self.excs[idxs] = None
107
- self.file_urls[idxs] = None
108
- self.file_paths[idxs] = None
179
+ def clear(self, slot_idxs: np.ndarray | None = None) -> None:
180
+ if slot_idxs is not None:
181
+ self.has_val[slot_idxs] = False
182
+ self.vals[slot_idxs] = None
183
+ self.excs[slot_idxs] = None
184
+ self.file_urls[slot_idxs] = None
185
+ self.file_paths[slot_idxs] = None
109
186
  else:
110
187
  self.init(len(self.vals))
111
188
 
@@ -132,20 +209,23 @@ class DataRow:
132
209
  def set_pk(self, pk: tuple[int, ...]) -> None:
133
210
  self.pk = pk
134
211
 
135
- def has_exc(self, slot_idx: Optional[int] = None) -> bool:
212
+ def has_exc(self, slot_idx: int | None = None) -> bool:
136
213
  """
137
214
  Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
138
215
  """
216
+ if not self._may_have_exc:
217
+ return False
218
+
139
219
  if slot_idx is not None:
140
220
  return self.excs[slot_idx] is not None
141
221
  return (self.excs != None).any()
142
222
 
143
- def get_exc(self, slot_idx: int) -> Optional[Exception]:
223
+ def get_exc(self, slot_idx: int) -> Exception | None:
144
224
  exc = self.excs[slot_idx]
145
225
  assert exc is None or isinstance(exc, Exception)
146
226
  return exc
147
227
 
148
- def get_first_exc(self) -> Optional[Exception]:
228
+ def get_first_exc(self) -> Exception | None:
149
229
  mask = self.excs != None
150
230
  if not mask.any():
151
231
  return None
@@ -154,6 +234,7 @@ class DataRow:
154
234
  def set_exc(self, slot_idx: int, exc: Exception) -> None:
155
235
  assert self.excs[slot_idx] is None
156
236
  self.excs[slot_idx] = exc
237
+ self._may_have_exc = True
157
238
 
158
239
  # an exception means the value is None
159
240
  self.has_val[slot_idx] = True
@@ -179,7 +260,7 @@ class DataRow:
179
260
 
180
261
  return self.vals[index]
181
262
 
182
- def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
263
+ def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
183
264
  """Return the value that gets stored in the db"""
184
265
  assert self.excs[index] is None
185
266
  if not self.has_val[index]:
@@ -247,30 +328,46 @@ class DataRow:
247
328
  self.vals[idx] = val
248
329
  self.has_val[idx] = True
249
330
 
250
- def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
251
- """Discard the in-memory value and save it to a local file, if filepath is not None"""
331
+ def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
332
+ """
333
+ Prepare to save a column's value into the appropriate store. Discard unneeded values.
334
+
335
+ Return:
336
+ True if the media object in the column needs to be saved.
337
+ """
252
338
  if self.vals[index] is None:
253
- return
339
+ return False
340
+
341
+ if self.file_urls[index] is not None:
342
+ return False
343
+
254
344
  assert self.excs[index] is None
255
345
  if self.file_paths[index] is None:
256
- if filepath is not None:
257
- image = self.vals[index]
258
- assert isinstance(image, PIL.Image.Image)
259
- # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
260
- # In that case, use WebP instead.
261
- format = 'webp' if image.has_transparency_data else 'jpeg'
262
- if not filepath.endswith(f'.{format}'):
263
- filepath += f'.{format}'
264
- self.file_paths[index] = filepath
265
- self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
266
- image.save(filepath, format=format)
346
+ if col is not None:
347
+ # This is a media object that needs to be saved
348
+ return True
267
349
  else:
268
- # we discard the content of this cell
350
+ # This is a media object that we don't care about, so we discard it
269
351
  self.has_val[index] = False
270
352
  else:
271
353
  # we already have a file for this image, nothing left to do
272
354
  pass
355
+
356
+ self.vals[index] = None
357
+ return False
358
+
359
+ def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
360
+ """Save the media object in the column to the TempStore.
361
+ Objects cannot be saved directly to general destinations."""
362
+ assert col.col_type.is_media_type()
363
+ val = self.vals[index]
364
+ format = None
365
+ if isinstance(val, PIL.Image.Image):
366
+ format = image_utils.default_format(val)
367
+ filepath, url = TempStore.save_media_object(val, col, format=format)
368
+ self.file_paths[index] = str(filepath) if filepath is not None else None
273
369
  self.vals[index] = None
370
+ return url
274
371
 
275
372
  @property
276
373
  def rowid(self) -> tuple[int, ...]:
pixeltable/exprs/expr.py CHANGED
@@ -7,7 +7,7 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import numpy as np
@@ -29,7 +29,7 @@ class ExprScope:
29
29
  parent is None: outermost scope
30
30
  """
31
31
 
32
- def __init__(self, parent: Optional[ExprScope]):
32
+ def __init__(self, parent: ExprScope | None):
33
33
  self.parent = parent
34
34
 
35
35
  def is_contained_in(self, other: ExprScope) -> bool:
@@ -61,13 +61,13 @@ class Expr(abc.ABC):
61
61
  # - set by the subclass's __init__()
62
62
  # - produced by _create_id()
63
63
  # - not expected to survive a serialize()/deserialize() roundtrip
64
- id: Optional[int]
64
+ id: int | None
65
65
 
66
66
  # index of the expr's value in the data row:
67
67
  # - set for all materialized exprs
68
68
  # - None: not executable
69
69
  # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
70
- slot_idx: Optional[int]
70
+ slot_idx: int | None
71
71
 
72
72
  T = TypeVar('T', bound='Expr')
73
73
 
@@ -103,7 +103,7 @@ class Expr(abc.ABC):
103
103
  assert not has_rel_path, self._expr_tree()
104
104
  assert not self._has_relative_path(), self._expr_tree()
105
105
 
106
- def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapperDispatch'] = None) -> None:
106
+ def _bind_rel_paths(self, mapper: 'exprs.JsonMapperDispatch' | None = None) -> None:
107
107
  for c in self.components:
108
108
  c._bind_rel_paths(mapper)
109
109
 
@@ -118,7 +118,7 @@ class Expr(abc.ABC):
118
118
  for c in self.components:
119
119
  c._expr_tree_r(indent + 2, buf)
120
120
 
121
- def default_column_name(self) -> Optional[str]:
121
+ def default_column_name(self) -> str | None:
122
122
  """
123
123
  Returns:
124
124
  None if this expression lacks a default name,
@@ -127,7 +127,7 @@ class Expr(abc.ABC):
127
127
  return None
128
128
 
129
129
  @property
130
- def validation_error(self) -> Optional[str]:
130
+ def validation_error(self) -> str | None:
131
131
  """
132
132
  Subclasses can override this to indicate that validation has failed after a catalog load.
133
133
 
@@ -205,12 +205,12 @@ class Expr(abc.ABC):
205
205
  return result
206
206
 
207
207
  @classmethod
208
- def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
208
+ def copy_list(cls, expr_list: list[Expr] | None) -> list[Expr] | None:
209
209
  if expr_list is None:
210
210
  return None
211
211
  return [e.copy() for e in expr_list]
212
212
 
213
- def __deepcopy__(self, memo: Optional[dict[int, Any]] = None) -> Expr:
213
+ def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Expr:
214
214
  # we don't need to create an actual deep copy because all state other than execution state is read-only
215
215
  if memo is None:
216
216
  memo = {}
@@ -241,7 +241,7 @@ class Expr(abc.ABC):
241
241
  for i in range(len(expr_list)):
242
242
  expr_list[i] = expr_list[i].substitute(spec)
243
243
 
244
- def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
244
+ def resolve_computed_cols(self, resolve_cols: set[catalog.Column] | None = None) -> Expr:
245
245
  """
246
246
  Recursively replace ColRefs to unstored computed columns with their value exprs.
247
247
  Also replaces references to stored computed columns in resolve_cols.
@@ -309,18 +309,18 @@ class Expr(abc.ABC):
309
309
 
310
310
  @overload
311
311
  def subexprs(
312
- self, *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
312
+ self, *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
313
313
  ) -> Iterator[Expr]: ...
314
314
 
315
315
  @overload
316
316
  def subexprs(
317
- self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
317
+ self, expr_class: type[T], filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
318
318
  ) -> Iterator[T]: ...
319
319
 
320
320
  def subexprs(
321
321
  self,
322
- expr_class: Optional[type[T]] = None,
323
- filter: Optional[Callable[[Expr], bool]] = None,
322
+ expr_class: type[T] | None = None,
323
+ filter: Callable[[Expr], bool] | None = None,
324
324
  traverse_matches: bool = True,
325
325
  ) -> Iterator[T]:
326
326
  """
@@ -339,11 +339,7 @@ class Expr(abc.ABC):
339
339
  @overload
340
340
  @classmethod
341
341
  def list_subexprs(
342
- cls,
343
- expr_list: Iterable[Expr],
344
- *,
345
- filter: Optional[Callable[[Expr], bool]] = None,
346
- traverse_matches: bool = True,
342
+ cls, expr_list: Iterable[Expr], *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
347
343
  ) -> Iterator[Expr]: ...
348
344
 
349
345
  @overload
@@ -352,7 +348,7 @@ class Expr(abc.ABC):
352
348
  cls,
353
349
  expr_list: Iterable[Expr],
354
350
  expr_class: type[T],
355
- filter: Optional[Callable[[Expr], bool]] = None,
351
+ filter: Callable[[Expr], bool] | None = None,
356
352
  traverse_matches: bool = True,
357
353
  ) -> Iterator[T]: ...
358
354
 
@@ -360,15 +356,24 @@ class Expr(abc.ABC):
360
356
  def list_subexprs(
361
357
  cls,
362
358
  expr_list: Iterable[Expr],
363
- expr_class: Optional[type[T]] = None,
364
- filter: Optional[Callable[[Expr], bool]] = None,
359
+ expr_class: type[T] | None = None,
360
+ filter: Callable[[Expr], bool] | None = None,
365
361
  traverse_matches: bool = True,
366
362
  ) -> Iterator[T]:
367
363
  """Produce subexprs for all exprs in list. Can contain duplicates."""
368
364
  for e in expr_list:
369
365
  yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
370
366
 
371
- def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
367
+ @classmethod
368
+ def list_contains(
369
+ cls,
370
+ expr_list: Iterable[Expr],
371
+ expr_class: type[Expr] | None = None,
372
+ filter: Callable[[Expr], bool] | None = None,
373
+ ) -> bool:
374
+ return any(e._contains(expr_class, filter) for e in expr_list)
375
+
376
+ def _contains(self, cls: type[Expr] | None = None, filter: Callable[[Expr], bool] | None = None) -> bool:
372
377
  """
373
378
  Returns True if any subexpr is an instance of cls and/or matches filter.
374
379
  """
@@ -387,34 +392,36 @@ class Expr(abc.ABC):
387
392
  from .column_ref import ColumnRef
388
393
  from .rowid_ref import RowidRef
389
394
 
390
- return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
395
+ return {ref.col.get_tbl().id for ref in self.subexprs(ColumnRef)} | {
396
+ ref.tbl.id for ref in self.subexprs(RowidRef)
397
+ }
391
398
 
392
399
  @classmethod
393
400
  def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
394
401
  return {tbl_id for e in exprs_ for tbl_id in e.tbl_ids()}
395
402
 
396
403
  @classmethod
397
- def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
404
+ def get_refd_column_ids(cls, expr_dict: dict[str, Any]) -> set[catalog.QColumnId]:
398
405
  """Return Columns referenced by expr_dict."""
399
- result: list[catalog.Column] = []
406
+ result: set[catalog.QColumnId] = set()
400
407
  assert '_classname' in expr_dict
401
408
  from .column_ref import ColumnRef
402
409
 
403
410
  if expr_dict['_classname'] == 'ColumnRef':
404
- result.append(ColumnRef.get_column(expr_dict))
411
+ result.add(ColumnRef.get_column_id(expr_dict))
405
412
  if 'components' in expr_dict:
406
413
  for component_dict in expr_dict['components']:
407
- result.extend(cls.get_refd_columns(component_dict))
414
+ result.update(cls.get_refd_column_ids(component_dict))
408
415
  return result
409
416
 
410
- def as_literal(self) -> Optional[Expr]:
417
+ def as_literal(self) -> Expr | None:
411
418
  """
412
419
  Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
413
420
  """
414
421
  return None
415
422
 
416
423
  @classmethod
417
- def from_array(cls, elements: Iterable) -> Optional[Expr]:
424
+ def from_array(cls, elements: Iterable) -> Expr | None:
418
425
  from .inline_expr import InlineArray
419
426
  from .literal import Literal
420
427
 
@@ -437,7 +444,7 @@ class Expr(abc.ABC):
437
444
  return self
438
445
 
439
446
  @classmethod
440
- def from_object(cls, o: object) -> Optional[Expr]:
447
+ def from_object(cls, o: object) -> Expr | None:
441
448
  """
442
449
  Try to turn a literal object into an Expr.
443
450
  """
@@ -467,7 +474,7 @@ class Expr(abc.ABC):
467
474
  return Literal(o, col_type=obj_type)
468
475
  return None
469
476
 
470
- def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
477
+ def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> sql.ColumnElement | None:
471
478
  """
472
479
  If this expr can be materialized directly in SQL:
473
480
  - returns a ColumnElement
@@ -486,6 +493,18 @@ class Expr(abc.ABC):
486
493
  """
487
494
  pass
488
495
 
496
+ def prepare(self) -> None:
497
+ """
498
+ Create execution state. This is called before the first eval() call.
499
+ """
500
+ for c in self.components:
501
+ c.prepare()
502
+
503
+ @classmethod
504
+ def prepare_list(cls, expr_list: Iterable[Expr]) -> None:
505
+ for e in expr_list:
506
+ e.prepare()
507
+
489
508
  def release(self) -> None:
490
509
  """
491
510
  Allow Expr class to tear down execution state. This is called after the last eval() call.
@@ -494,7 +513,7 @@ class Expr(abc.ABC):
494
513
  c.release()
495
514
 
496
515
  @classmethod
497
- def release_list(cls, expr_list: list[Expr]) -> None:
516
+ def release_list(cls, expr_list: Iterable[Expr]) -> None:
498
517
  for e in expr_list:
499
518
  e.release()
500
519
 
@@ -550,7 +569,7 @@ class Expr(abc.ABC):
550
569
  else:
551
570
  return InPredicate(self, value_set_literal=value_set)
552
571
 
553
- def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
572
+ def astype(self, new_type: ts.ColumnType | type | _AnnotatedAlias) -> 'exprs.TypeCast':
554
573
  from pixeltable.exprs import TypeCast
555
574
 
556
575
  # Interpret the type argument the same way we would if given in a schema
@@ -562,7 +581,7 @@ class Expr(abc.ABC):
562
581
  return TypeCast(self, col_type)
563
582
 
564
583
  def apply(
565
- self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None
584
+ self, fn: Callable, *, col_type: ts.ColumnType | type | _AnnotatedAlias | None = None
566
585
  ) -> 'exprs.FunctionCall':
567
586
  if col_type is not None:
568
587
  col_type = ts.ColumnType.normalize_type(col_type)
@@ -646,7 +665,7 @@ class Expr(abc.ABC):
646
665
 
647
666
  def _make_comparison(self, op: ComparisonOperator, other: object) -> 'exprs.Comparison':
648
667
  """
649
- other: Union[Expr, LiteralPythonTypes]
668
+ other: Expr | LiteralPythonTypes
650
669
  """
651
670
  # TODO: check for compatibility
652
671
  from .comparison import Comparison
@@ -661,7 +680,7 @@ class Expr(abc.ABC):
661
680
  def __neg__(self) -> 'exprs.ArithmeticExpr':
662
681
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
663
682
 
664
- def __add__(self, other: object) -> Union[exprs.ArithmeticExpr, exprs.StringOp]:
683
+ def __add__(self, other: object) -> exprs.ArithmeticExpr | exprs.StringOp:
665
684
  if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
666
685
  return self._make_string_expr(StringOperator.CONCAT, other)
667
686
  return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
@@ -669,7 +688,7 @@ class Expr(abc.ABC):
669
688
  def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
670
689
  return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
671
690
 
672
- def __mul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
691
+ def __mul__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
673
692
  if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
674
693
  return self._make_string_expr(StringOperator.REPEAT, other)
675
694
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
@@ -683,7 +702,7 @@ class Expr(abc.ABC):
683
702
  def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
684
703
  return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
685
704
 
686
- def __radd__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
705
+ def __radd__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
687
706
  if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
688
707
  return self._rmake_string_expr(StringOperator.CONCAT, other)
689
708
  return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
@@ -691,7 +710,7 @@ class Expr(abc.ABC):
691
710
  def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
692
711
  return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
693
712
 
694
- def __rmul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
713
+ def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr' | 'exprs.StringOp':
695
714
  if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
696
715
  return self._rmake_string_expr(StringOperator.REPEAT, other)
697
716
  return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
@@ -733,7 +752,7 @@ class Expr(abc.ABC):
733
752
 
734
753
  def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
735
754
  """
736
- other: Union[Expr, LiteralPythonTypes]
755
+ other: Expr | LiteralPythonTypes
737
756
  """
738
757
  # TODO: check for compatibility
739
758
  from .arithmetic_expr import ArithmeticExpr
@@ -782,7 +801,7 @@ class Expr(abc.ABC):
782
801
 
783
802
  return CompoundPredicate(LogicalOperator.NOT, [self])
784
803
 
785
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
804
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
786
805
  """
787
806
  Returns clauses of a conjunction that meet condition in the first element.
788
807
  The second element contains remaining clauses, rolled into a conjunction.
@@ -793,7 +812,7 @@ class Expr(abc.ABC):
793
812
  else:
794
813
  return [], self
795
814
 
796
- def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'func.Function':
815
+ def _make_applicator_function(self, fn: Callable, col_type: ts.ColumnType | None) -> 'func.Function':
797
816
  """
798
817
  Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
799
818
  the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
@@ -1,4 +1,4 @@
1
- from typing import Generic, Iterable, Iterator, Optional, TypeVar
1
+ from typing import Generic, Iterable, Iterator, TypeVar
2
2
 
3
3
  from .expr import Expr
4
4
 
@@ -14,7 +14,7 @@ class ExprDict(Generic[T]):
14
14
 
15
15
  _data: dict[int, tuple[Expr, T]]
16
16
 
17
- def __init__(self, iterable: Optional[Iterable[tuple[Expr, T]]] = None):
17
+ def __init__(self, iterable: Iterable[tuple[Expr, T]] | None = None):
18
18
  self._data = {}
19
19
 
20
20
  if iterable is not None:
@@ -39,7 +39,7 @@ class ExprDict(Generic[T]):
39
39
  def __contains__(self, key: Expr) -> bool:
40
40
  return key.id in self._data
41
41
 
42
- def get(self, key: Expr, default: Optional[T] = None) -> Optional[T]:
42
+ def get(self, key: Expr, default: T | None = None) -> T | None:
43
43
  item = self._data.get(key.id)
44
44
  return item[1] if item is not None else default
45
45