pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,20 +1,29 @@
1
1
  from __future__ import annotations
2
2
 
3
- import copy
4
- from typing import Any, Optional, Sequence
3
+ import warnings
4
+ from typing import TYPE_CHECKING, Any, Sequence, cast
5
5
  from uuid import UUID
6
6
 
7
+ import PIL.Image
7
8
  import sqlalchemy as sql
8
9
 
9
- import pixeltable as pxt
10
- from pixeltable import catalog, exceptions as excs, iterators as iters
10
+ import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.iterators as iters
13
+ import pixeltable.type_system as ts
14
+ from pixeltable.catalog.table_version import TableVersionKey
11
15
 
12
16
  from ..utils.description_helper import DescriptionHelper
17
+ from ..utils.filecache import FileCache
13
18
  from .data_row import DataRow
14
19
  from .expr import Expr
20
+ from .literal import Literal
15
21
  from .row_builder import RowBuilder
16
22
  from .sql_element_cache import SqlElementCache
17
23
 
24
+ if TYPE_CHECKING:
25
+ from pixeltable._query import Query, ResultSet
26
+
18
27
 
19
28
  class ColumnRef(Expr):
20
29
  """A reference to a table column
@@ -41,37 +50,38 @@ class ColumnRef(Expr):
41
50
  insert them into the EvalCtxs as needed
42
51
  """
43
52
 
44
- col: catalog.Column
45
- reference_tbl: Optional[catalog.TableVersionPath]
53
+ col: catalog.Column # TODO: merge with col_handle
54
+ col_handle: catalog.ColumnHandle
55
+ reference_tbl: catalog.TableVersionPath | None
46
56
  is_unstored_iter_col: bool
47
- iter_arg_ctx: Optional[RowBuilder.EvalCtx]
48
- base_rowid_len: int
49
- base_rowid: Sequence[Optional[Any]]
50
- iterator: Optional[iters.ComponentIterator]
51
- pos_idx: Optional[int]
52
- id: int
53
57
  perform_validation: bool # if True, performs media validation
58
+ iter_arg_ctx: RowBuilder.EvalCtx | None
59
+ iter_outputs: list[ColumnRef] | None
60
+ base_rowid_len: int # number of rowid columns in the base table
61
+
62
+ # execution state
63
+ base_rowid: Sequence[Any | None]
64
+ iterator: iters.ComponentIterator | None
65
+ pos_idx: int
54
66
 
55
67
  def __init__(
56
68
  self,
57
69
  col: catalog.Column,
58
- reference_tbl: Optional[catalog.TableVersionPath] = None,
59
- perform_validation: Optional[bool] = None,
70
+ reference_tbl: catalog.TableVersionPath | None = None,
71
+ perform_validation: bool | None = None,
60
72
  ):
61
73
  super().__init__(col.col_type)
62
- assert col.tbl is not None
63
74
  self.col = col
64
75
  self.reference_tbl = reference_tbl
65
- self.is_unstored_iter_col = (
66
- col.tbl.get().is_component_view and col.tbl.get().is_iterator_column(col) and not col.is_stored
67
- )
76
+ self.col_handle = col.handle
77
+
78
+ self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
68
79
  self.iter_arg_ctx = None
69
- # number of rowid columns in the base table
70
- self.base_rowid_len = col.tbl.get().base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
71
- self.base_rowid = [None] * self.base_rowid_len
80
+ self.iter_outputs = None
81
+ self.base_rowid_len = 0
82
+ self.base_rowid = []
72
83
  self.iterator = None
73
- # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
74
- self.pos_idx = col.tbl.get().num_rowid_columns() - 1 if self.is_unstored_iter_col else None
84
+ self.pos_idx = 0
75
85
 
76
86
  self.perform_validation = False
77
87
  if col.col_type.is_media_type():
@@ -90,22 +100,27 @@ class ColumnRef(Expr):
90
100
  self.components = [non_validating_col_ref]
91
101
  self.id = self._create_id()
92
102
 
93
- def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
103
+ def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx, iter_outputs: list[ColumnRef]) -> None:
94
104
  self.iter_arg_ctx = iter_arg_ctx
105
+ self.iter_outputs = iter_outputs
106
+ # If this is an unstored iterator column, then the iterator outputs may be needed in order to properly set the
107
+ # iterator position. Therefore, we need to add them as components in order to ensure they're marked as
108
+ # eval dependencies.
109
+ self.components.extend(iter_outputs)
95
110
  assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
96
111
 
97
112
  def _id_attrs(self) -> list[tuple[str, Any]]:
98
113
  return [
99
114
  *super()._id_attrs(),
100
- ('tbl_id', self.col.tbl.id),
115
+ ('tbl_id', self.col.tbl_handle.id),
101
116
  ('col_id', self.col.id),
102
117
  ('perform_validation', self.perform_validation),
103
118
  ]
104
119
 
105
120
  # override
106
121
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
107
- target = tbl_versions[self.col.tbl.id]
108
- assert self.col.id in target.cols_by_id
122
+ target = tbl_versions[self.col.tbl_handle.id]
123
+ assert self.col.id in target.cols_by_id, f'{target}: {self.col.id} not in {list(target.cols_by_id.keys())}'
109
124
  col = target.cols_by_id[self.col.id]
110
125
  return ColumnRef(col, self.reference_tbl)
111
126
 
@@ -113,12 +128,16 @@ class ColumnRef(Expr):
113
128
  from .column_property_ref import ColumnPropertyRef
114
129
 
115
130
  # resolve column properties
131
+ if name == ColumnPropertyRef.Property.CELLMD.name.lower():
132
+ # This is not user accessible, but used internally to store cell metadata
133
+ return super().__getattr__(name)
134
+
116
135
  if (
117
136
  name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
118
137
  or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
119
138
  ):
120
- property_is_present = self.col.is_stored and (self.col.is_computed or self.col_type.is_media_type())
121
- if not property_is_present:
139
+ is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
140
+ if not is_valid:
122
141
  raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
123
142
  return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
124
143
  if (
@@ -138,78 +157,156 @@ class ColumnRef(Expr):
138
157
 
139
158
  return super().__getattr__(name)
140
159
 
141
- def find_embedding_index(
142
- self, idx_name: Optional[str], method_name: str
143
- ) -> dict[str, catalog.TableVersion.IndexInfo]:
144
- """Return IndexInfo for a column, with an optional given name"""
145
- from pixeltable import index
160
+ def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
161
+ cat = catalog.Catalog.get()
162
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
163
+ with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
164
+ tbl_version = self.col_handle.tbl_version.get()
165
+ if tbl_version.id != self.reference_tbl.tbl_id:
166
+ raise excs.Error('Cannot recompute column of a base.')
167
+ if tbl_version.is_snapshot:
168
+ raise excs.Error('Cannot recompute column of a snapshot.')
169
+ col_name = self.col_handle.get().name
170
+ status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
171
+ FileCache.get().emit_eviction_warnings()
172
+ return status
173
+
174
+ def similarity(
175
+ self,
176
+ item: Any = None,
177
+ *,
178
+ string: str | None = None,
179
+ image: PIL.Image.Image | None = None,
180
+ audio: str | None = None,
181
+ video: str | None = None,
182
+ idx: str | None = None,
183
+ ) -> Expr:
184
+ from .similarity_expr import SimilarityExpr
146
185
 
147
- # determine index to use
148
- idx_info_dict = self.col.get_idx_info(self.reference_tbl)
186
+ if item is not None:
187
+ warnings.warn(
188
+ 'Use of similarity() without specifying an explicit modality is deprecated -- '
189
+ 'since version 0.5.7. Please use one of the following instead:\n'
190
+ ' .similarity(string=...)\n'
191
+ ' .similarity(image=...)\n'
192
+ ' .similarity(audio=...)\n'
193
+ ' .similarity(video=...)',
194
+ DeprecationWarning,
195
+ stacklevel=2,
196
+ )
197
+
198
+ arg_count = (string is not None) + (image is not None) + (audio is not None) + (video is not None)
199
+
200
+ if item is not None and arg_count != 0:
201
+ raise excs.Error('similarity(): `item` is deprecated and cannot be used together with modality arguments')
202
+
203
+ if arg_count > 1:
204
+ raise excs.Error('similarity(): expected exactly one of string=..., image=..., audio=..., video=...')
205
+
206
+ expr: Expr
207
+
208
+ if item is not None:
209
+ if isinstance(item, Expr): # This can happen when using similarity() with @query
210
+ if not (item.col_type.is_string_type() or item.col_type.is_image_type()):
211
+ raise excs.Error(f'similarity(): expected `String` or `Image`; got `{item.col_type}`')
212
+ expr = item
213
+ else:
214
+ if not isinstance(item, (str, PIL.Image.Image)):
215
+ raise excs.Error(f'similarity(): expected `str` or `PIL.Image.Image`; got `{type(item).__name__}`')
216
+ expr = Expr.from_object(item)
217
+ assert expr.col_type.is_string_type() or expr.col_type.is_image_type()
218
+
219
+ if string is not None:
220
+ if isinstance(string, Expr):
221
+ if not string.col_type.is_string_type():
222
+ raise excs.Error(f'similarity(string=...): expected `String`; got `{expr.col_type}`')
223
+ expr = string
224
+ else:
225
+ if not isinstance(string, str):
226
+ raise excs.Error(f'similarity(string=...): expected `str`; got `{type(string).__name__}`')
227
+ expr = Expr.from_object(string)
228
+ assert expr.col_type.is_string_type()
229
+
230
+ if image is not None:
231
+ if isinstance(image, Expr):
232
+ if not image.col_type.is_image_type():
233
+ raise excs.Error(f'similarity(image=...): expected `Image`; got `{image.col_type}`')
234
+ expr = image
235
+ else:
236
+ if not isinstance(image, PIL.Image.Image):
237
+ raise excs.Error(f'similarity(image=...): expected `PIL.Image.Image`; got `{type(image).__name__}`')
238
+ expr = Expr.from_object(image)
239
+ assert expr.col_type.is_image_type()
240
+
241
+ if audio is not None:
242
+ if isinstance(audio, Expr):
243
+ if not audio.col_type.is_audio_type():
244
+ raise excs.Error(f'similarity(audio=...): expected `Audio`; got `{audio.col_type}`')
245
+ expr = audio
246
+ else:
247
+ if not isinstance(audio, str):
248
+ raise excs.Error(
249
+ f'similarity(audio=...): expected `str` (path to audio file); got `{type(audio).__name__}`'
250
+ )
251
+ expr = Literal(audio, ts.AudioType())
252
+
253
+ if video is not None:
254
+ if isinstance(video, Expr):
255
+ if not video.col_type.is_video_type():
256
+ raise excs.Error(f'similarity(video=...): expected `Video`; got `{video.col_type}`')
257
+ expr = video
258
+ else:
259
+ if not isinstance(video, str):
260
+ raise excs.Error(
261
+ f'similarity(video=...): expected `str` (path to video file); got `{type(video).__name__}`'
262
+ )
263
+ expr = Literal(video, ts.VideoType())
149
264
 
150
- embedding_idx_info = {
151
- info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
152
- }
153
- if len(embedding_idx_info) == 0:
154
- raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
155
- if idx_name is not None and idx_name not in embedding_idx_info:
156
- raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
157
- if len(embedding_idx_info) > 1:
158
- if idx_name is None:
159
- raise excs.Error(
160
- f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
161
- f'`{method_name}(..., idx=<index_name>)`'
162
- )
163
- idx_info = {idx_name: embedding_idx_info[idx_name]}
164
- else:
165
- idx_info = embedding_idx_info
166
- return idx_info
265
+ return SimilarityExpr(self, expr, idx_name=idx)
167
266
 
168
- def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
169
- from .similarity_expr import SimilarityExpr
267
+ def embedding(self, *, idx: str | None = None) -> ColumnRef:
268
+ from pixeltable.index import EmbeddingIndex
170
269
 
171
- return SimilarityExpr(self, item, idx_name=idx)
270
+ idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
271
+ return ColumnRef(idx_info.val_col)
172
272
 
173
- def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
174
- idx_info = self.find_embedding_index(idx, 'embedding')
175
- assert len(idx_info) == 1
176
- col = copy.copy(next(iter(idx_info.values())).val_col)
177
- col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
178
- col.create_sa_cols()
179
- return ColumnRef(col)
273
+ @property
274
+ def tbl(self) -> catalog.TableVersionHandle:
275
+ return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
180
276
 
181
- def default_column_name(self) -> Optional[str]:
277
+ def default_column_name(self) -> str | None:
182
278
  return self.col.name if self.col is not None else None
183
279
 
184
280
  def _equals(self, other: ColumnRef) -> bool:
185
281
  return self.col == other.col and self.perform_validation == other.perform_validation
186
282
 
187
- def _df(self) -> 'pxt.dataframe.DataFrame':
188
- from pixeltable import plan
283
+ def select(self) -> 'Query':
284
+ import pixeltable.plan as plan
285
+ from pixeltable._query import Query
189
286
 
190
287
  if self.reference_tbl is None:
191
288
  # No reference table; use the current version of the table to which the column belongs
192
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
289
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
193
290
  return tbl.select(self)
194
291
  else:
195
- # Explicit reference table; construct a DataFrame directly from it
196
- return pxt.DataFrame(plan.FromClause([self.reference_tbl])).select(self)
292
+ # Explicit reference table; construct a Query directly from it
293
+ return Query(plan.FromClause([self.reference_tbl])).select(self)
197
294
 
198
- def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
199
- return self._df().show(*args, **kwargs)
295
+ def show(self, *args: Any, **kwargs: Any) -> 'ResultSet':
296
+ return self.select().show(*args, **kwargs)
200
297
 
201
- def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
202
- return self._df().head(*args, **kwargs)
298
+ def head(self, *args: Any, **kwargs: Any) -> 'ResultSet':
299
+ return self.select().head(*args, **kwargs)
203
300
 
204
- def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
205
- return self._df().tail(*args, **kwargs)
301
+ def tail(self, *args: Any, **kwargs: Any) -> 'ResultSet':
302
+ return self.select().tail(*args, **kwargs)
206
303
 
207
304
  def count(self) -> int:
208
- return self._df().count()
305
+ return self.select().count()
209
306
 
210
- def distinct(self) -> 'pxt.dataframe.DataFrame':
307
+ def distinct(self) -> 'Query':
211
308
  """Return distinct values in this column."""
212
- return self._df().distinct()
309
+ return self.select().distinct()
213
310
 
214
311
  def __str__(self) -> str:
215
312
  if self.col.name is None:
@@ -224,17 +321,32 @@ class ColumnRef(Expr):
224
321
  return self._descriptors().to_html()
225
322
 
226
323
  def _descriptors(self) -> DescriptionHelper:
227
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
324
+ with catalog.Catalog.get().begin_xact():
325
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
228
326
  helper = DescriptionHelper()
229
- helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
327
+ helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
230
328
  helper.append(tbl._col_descriptor([self.col.name]))
231
329
  idxs = tbl._index_descriptor([self.col.name])
232
330
  if len(idxs) > 0:
233
331
  helper.append(idxs)
234
332
  return helper
235
333
 
236
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
237
- return None if self.perform_validation else self.col.sa_col
334
+ def prepare(self) -> None:
335
+ from pixeltable import store
336
+
337
+ if not self.is_unstored_iter_col:
338
+ return
339
+ col = self.col_handle.get()
340
+ self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
341
+ self.base_rowid = [None] * self.base_rowid_len
342
+ assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
343
+ self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
344
+
345
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
346
+ if self.perform_validation:
347
+ return None
348
+ self.col = self.col_handle.get()
349
+ return self.col.sa_col
238
350
 
239
351
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
240
352
  if self.perform_validation:
@@ -273,31 +385,43 @@ class ColumnRef(Expr):
273
385
 
274
386
  # if this is a new base row, we need to instantiate a new iterator
275
387
  if self.base_rowid != data_row.pk[: self.base_rowid_len]:
388
+ assert self.iter_arg_ctx is not None
276
389
  row_builder.eval(data_row, self.iter_arg_ctx)
277
390
  iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
278
- self.iterator = self.col.tbl.get().iterator_cls(**iterator_args)
391
+ self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
279
392
  self.base_rowid = data_row.pk[: self.base_rowid_len]
280
- self.iterator.set_pos(data_row.pk[self.pos_idx])
393
+ stored_outputs = {col_ref.col.name: data_row[col_ref.slot_idx] for col_ref in self.iter_outputs}
394
+ assert all(name is not None for name in stored_outputs)
395
+ self.iterator.set_pos(data_row.pk[self.pos_idx], **stored_outputs)
281
396
  res = next(self.iterator)
282
397
  data_row[self.slot_idx] = res[self.col.name]
283
398
 
284
399
  def _as_dict(self) -> dict:
285
- tbl = self.col.tbl
286
- tbl_version = tbl.get().version if tbl.get().is_snapshot else None
400
+ tbl_handle = self.col.tbl_handle
287
401
  # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
288
402
  # non-validating component ColumnRef
403
+ assert tbl_handle.anchor_tbl_id is None # TODO: support anchor_tbl_id for view-over-replica
289
404
  return {
290
- 'tbl_id': str(tbl.id),
291
- 'tbl_version': tbl_version,
405
+ 'tbl_id': str(tbl_handle.id),
406
+ 'tbl_version': tbl_handle.effective_version,
292
407
  'col_id': self.col.id,
293
408
  'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
294
409
  'perform_validation': self.perform_validation,
295
410
  }
296
411
 
412
+ @classmethod
413
+ def get_column_id(cls, d: dict) -> catalog.QColumnId:
414
+ tbl_id, col_id = UUID(d['tbl_id']), d['col_id']
415
+ return catalog.QColumnId(tbl_id, col_id)
416
+
297
417
  @classmethod
298
418
  def get_column(cls, d: dict) -> catalog.Column:
299
419
  tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
300
- tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
420
+ # validate_initialized=False: this gets called as part of TableVersion.init()
421
+ # TODO: When we have views on replicas, we will need to store anchor_tbl_id in metadata as well.
422
+ tbl_version = catalog.Catalog.get().get_tbl_version(
423
+ TableVersionKey(tbl_id, version, None), validate_initialized=False
424
+ )
301
425
  # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
302
426
  col = next(col for col in tbl_version.cols if col.id == col_id)
303
427
  return col
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -69,8 +69,8 @@ class Comparison(Expr):
69
69
  def _op2(self) -> Expr:
70
70
  return self.components[1]
71
71
 
72
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
73
- from pixeltable import index
72
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
73
+ import pixeltable.index as index
74
74
 
75
75
  if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
76
76
  # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
@@ -81,9 +81,9 @@ class Comparison(Expr):
81
81
  if self.is_search_arg_comparison:
82
82
  # reference the index value column if there is an index and this is not a snapshot
83
83
  # (indices don't apply to snapshots)
84
- tbl = self._op1.col.tbl.get()
84
+ tbl = self._op1.col.get_tbl()
85
85
  idx_info = [
86
- info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
86
+ info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
87
87
  ]
88
88
  if len(idx_info) > 0 and not tbl.is_snapshot:
89
89
  # there shouldn't be multiple B-tree indices on a column
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import operator
4
- from typing import Any, Callable, Optional
4
+ from typing import Any, Callable
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
36
36
  return f' {self.operator} '.join([f'({e})' for e in self.components])
37
37
 
38
38
  @classmethod
39
- def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
39
+ def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
40
+ operands = [e for e in operands if e is not None]
40
41
  if len(operands) == 0:
41
42
  return None
42
43
  if len(operands) == 1:
@@ -60,14 +61,14 @@ class CompoundPredicate(Expr):
60
61
  def _id_attrs(self) -> list[tuple[str, Any]]:
61
62
  return [*super()._id_attrs(), ('operator', self.operator.value)]
62
63
 
63
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
64
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
64
65
  if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
65
66
  return super().split_conjuncts(condition)
66
67
  matches = [op for op in self.components if condition(op)]
67
68
  non_matches = [op for op in self.components if not condition(op)]
68
69
  return (matches, self.make_conjunction(non_matches))
69
70
 
70
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
71
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
71
72
  sql_exprs = [sql_elements.get(op) for op in self.components]
72
73
  if any(e is None for e in sql_exprs):
73
74
  return None