pixeltable 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

@@ -29,7 +29,14 @@ if TYPE_CHECKING:
29
29
 
30
30
  from ..func.globals import resolve_symbol
31
31
  from .column import Column
32
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
32
+ from .globals import (
33
+ _POS_COLUMN_NAME,
34
+ _ROWID_COLUMN_NAME,
35
+ MediaValidation,
36
+ RowCountStats,
37
+ UpdateStatus,
38
+ is_valid_identifier,
39
+ )
33
40
 
34
41
  if TYPE_CHECKING:
35
42
  from pixeltable import exec, store
@@ -183,6 +190,12 @@ class TableVersion:
183
190
  else:
184
191
  return f'{self.name}:{self.effective_version}'
185
192
 
193
+ @property
194
+ def handle(self) -> 'TableVersionHandle':
195
+ from .table_version_handle import TableVersionHandle
196
+
197
+ return TableVersionHandle(self.id, self.effective_version, self)
198
+
186
199
  @classmethod
187
200
  def create(
188
201
  cls,
@@ -195,7 +208,6 @@ class TableVersion:
195
208
  # base_path: Optional[pxt.catalog.TableVersionPath] = None,
196
209
  view_md: Optional[schema.ViewMd] = None,
197
210
  ) -> tuple[UUID, Optional[TableVersion]]:
198
- session = Env.get().session
199
211
  user = Env.get().user
200
212
 
201
213
  # assign ids
@@ -212,8 +224,9 @@ class TableVersion:
212
224
  # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
213
225
  column_md = cls._create_column_md(cols)
214
226
  tbl_id = uuid.uuid4()
227
+ tbl_id_str = str(tbl_id)
215
228
  table_md = schema.TableMd(
216
- tbl_id=str(tbl_id),
229
+ tbl_id=tbl_id_str,
217
230
  name=name,
218
231
  user=user,
219
232
  is_replica=False,
@@ -229,16 +242,10 @@ class TableVersion:
229
242
  view_md=view_md,
230
243
  additional_md={},
231
244
  )
232
- # create a schema.Table here, we need it to call our c'tor;
233
- # don't add it to the session yet, we might add index metadata
234
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
235
245
 
236
246
  # create schema.TableVersion
237
247
  table_version_md = schema.TableVersionMd(
238
- tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
239
- )
240
- tbl_version_record = schema.TableVersion(
241
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
248
+ tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
242
249
  )
243
250
 
244
251
  # create schema.TableSchemaVersion
@@ -252,7 +259,7 @@ class TableVersion:
252
259
  schema_col_md[col.id] = md
253
260
 
254
261
  schema_version_md = schema.TableSchemaVersionMd(
255
- tbl_id=str(tbl_record.id),
262
+ tbl_id=tbl_id_str,
256
263
  schema_version=0,
257
264
  preceding_schema_version=None,
258
265
  columns=schema_col_md,
@@ -261,9 +268,8 @@ class TableVersion:
261
268
  media_validation=media_validation.name.lower(),
262
269
  additional_md={},
263
270
  )
264
- schema_version_record = schema.TableSchemaVersion(
265
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
266
- )
271
+
272
+ cat = pxt.catalog.Catalog.get()
267
273
 
268
274
  # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
269
275
  # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
@@ -274,22 +280,23 @@ class TableVersion:
274
280
  and view_md.sample_clause is None
275
281
  and len(cols) == 0
276
282
  ):
277
- session.add(tbl_record)
278
- session.add(tbl_version_record)
279
- session.add(schema_version_record)
280
- return tbl_record.id, None
283
+ cat.store_tbl_md(
284
+ tbl_id=tbl_id,
285
+ dir_id=dir_id,
286
+ tbl_md=table_md,
287
+ version_md=table_version_md,
288
+ schema_version_md=schema_version_md,
289
+ )
290
+ return tbl_id, None
281
291
 
282
292
  # assert (base_path is not None) == (view_md is not None)
283
293
  is_snapshot = view_md is not None and view_md.is_snapshot
284
294
  effective_version = 0 if is_snapshot else None
285
295
  base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
286
296
  base = base_path.tbl_version if base_path is not None else None
287
- tbl_version = cls(
288
- tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
289
- )
297
+ tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
290
298
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
291
- cat = pxt.catalog.Catalog.get()
292
- cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
299
+ cat._tbl_versions[tbl_id, effective_version] = tbl_version
293
300
  tbl_version.init()
294
301
  tbl_version.store_tbl.create()
295
302
  is_mutable = not is_snapshot and not table_md.is_replica
@@ -306,12 +313,14 @@ class TableVersion:
306
313
  status = tbl_version._add_default_index(col)
307
314
  assert status is None or status.num_excs == 0
308
315
 
309
- # we re-create the tbl_record here, now that we have new index metadata
310
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
311
- session.add(tbl_record)
312
- session.add(tbl_version_record)
313
- session.add(schema_version_record)
314
- return tbl_record.id, tbl_version
316
+ cat.store_tbl_md(
317
+ tbl_id=tbl_id,
318
+ dir_id=dir_id,
319
+ tbl_md=tbl_version.tbl_md,
320
+ version_md=table_version_md,
321
+ schema_version_md=schema_version_md,
322
+ )
323
+ return tbl_id, tbl_version
315
324
 
316
325
  @classmethod
317
326
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
@@ -488,7 +497,7 @@ class TableVersion:
488
497
  )
489
498
 
490
499
  Catalog.get().store_tbl_md(
491
- self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
500
+ self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
492
501
  )
493
502
 
494
503
  def _store_idx_name(self, idx_id: int) -> str:
@@ -693,6 +702,7 @@ class TableVersion:
693
702
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
694
703
  )
695
704
 
705
+ computed_values = 0
696
706
  num_excs = 0
697
707
  cols_with_excs: list[Column] = []
698
708
  for col in cols_to_add:
@@ -731,18 +741,19 @@ class TableVersion:
731
741
  # populate the column
732
742
  from pixeltable.plan import Planner
733
743
 
734
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
744
+ plan = Planner.create_add_column_plan(self.path, col)
735
745
  plan.ctx.num_rows = row_count
736
746
  try:
737
747
  plan.open()
738
748
  try:
739
- excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
749
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
740
750
  except sql.exc.DBAPIError as exc:
741
751
  # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
742
752
  raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
743
753
  if excs_per_col > 0:
744
754
  cols_with_excs.append(col)
745
755
  num_excs += excs_per_col
756
+ computed_values += plan.ctx.num_computed_exprs * row_count
746
757
  finally:
747
758
  # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
748
759
  def cleanup_on_error() -> None:
@@ -765,12 +776,14 @@ class TableVersion:
765
776
 
766
777
  if print_stats:
767
778
  plan.ctx.profile.print(num_rows=row_count)
779
+
768
780
  # TODO: what to do about system columns with exceptions?
781
+ row_counts = RowCountStats(
782
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
783
+ ) # add_columns
769
784
  return UpdateStatus(
770
- num_rows=row_count,
771
- num_computed_values=row_count,
772
- num_excs=num_excs,
773
785
  cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
786
+ row_count_stats=row_counts,
774
787
  )
775
788
 
776
789
  def drop_column(self, col: Column) -> None:
@@ -910,14 +923,10 @@ class TableVersion:
910
923
  """Insert rows produced by exec_plan and propagate to views"""
911
924
  # we're creating a new version
912
925
  self.version += 1
913
- result = UpdateStatus()
914
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
926
+ cols_with_excs, result = self.store_tbl.insert_rows(
915
927
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
916
928
  )
917
- result.num_rows = num_rows
918
- result.num_excs = num_excs
919
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
920
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
929
+ result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
921
930
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
922
931
 
923
932
  # update views
@@ -926,14 +935,10 @@ class TableVersion:
926
935
 
927
936
  plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
928
937
  status = view.get()._insert(plan, timestamp, print_stats=print_stats)
929
- result.num_rows += status.num_rows
930
- result.num_excs += status.num_excs
931
- result.num_computed_values += status.num_computed_values
932
- result.cols_with_excs += status.cols_with_excs
938
+ result += status.to_cascade()
933
939
 
934
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
935
940
  if print_stats:
936
- plan.ctx.profile.print(num_rows=num_rows)
941
+ plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
937
942
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
938
943
  return result
939
944
 
@@ -973,7 +978,7 @@ class TableVersion:
973
978
  cascade=cascade,
974
979
  show_progress=True,
975
980
  )
976
- result.updated_cols = updated_cols
981
+ result += UpdateStatus(updated_cols=updated_cols)
977
982
  return result
978
983
 
979
984
  def batch_update(
@@ -1000,7 +1005,7 @@ class TableVersion:
1000
1005
  result = self.propagate_update(
1001
1006
  plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
1002
1007
  )
1003
- result.updated_cols = [c.qualified_name for c in updated_cols]
1008
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
1004
1009
 
1005
1010
  unmatched_rows = row_update_node.unmatched_rows()
1006
1011
  if len(unmatched_rows) > 0:
@@ -1008,7 +1013,7 @@ class TableVersion:
1008
1013
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
1009
1014
  if insert_if_not_exists:
1010
1015
  insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
1011
- result += insert_status
1016
+ result += insert_status.to_cascade()
1012
1017
  return result
1013
1018
 
1014
1019
  def _validate_update_spec(
@@ -1061,6 +1066,38 @@ class TableVersion:
1061
1066
 
1062
1067
  return update_targets
1063
1068
 
1069
+ def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
1070
+ assert not self.is_snapshot
1071
+ assert all(name in self.cols_by_name for name in col_names)
1072
+ assert len(col_names) > 0
1073
+ assert len(col_names) == 1 or not errors_only
1074
+
1075
+ from pixeltable.plan import Planner
1076
+
1077
+ target_columns = [self.cols_by_name[name] for name in col_names]
1078
+ where_clause: Optional[exprs.Expr] = None
1079
+ if errors_only:
1080
+ where_clause = (
1081
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1082
+ != None
1083
+ )
1084
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1085
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1086
+ )
1087
+ from pixeltable.exprs import SqlElementCache
1088
+
1089
+ result = self.propagate_update(
1090
+ plan,
1091
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1092
+ recomputed_cols,
1093
+ base_versions=[],
1094
+ timestamp=time.time(),
1095
+ cascade=cascade,
1096
+ show_progress=True,
1097
+ )
1098
+ result += UpdateStatus(updated_cols=updated_cols)
1099
+ return result
1100
+
1064
1101
  def propagate_update(
1065
1102
  self,
1066
1103
  plan: Optional[exec.ExecNode],
@@ -1071,18 +1108,20 @@ class TableVersion:
1071
1108
  cascade: bool,
1072
1109
  show_progress: bool = True,
1073
1110
  ) -> UpdateStatus:
1074
- result = UpdateStatus()
1075
1111
  if plan is not None:
1076
1112
  # we're creating a new version
1077
1113
  self.version += 1
1078
- result.num_rows, result.num_excs, cols_with_excs = self.store_tbl.insert_rows(
1079
- plan, v_min=self.version, show_progress=show_progress
1114
+ cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
1115
+ result = status.insert_to_update()
1116
+ result += UpdateStatus(
1117
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1080
1118
  )
1081
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1082
1119
  self.store_tbl.delete_rows(
1083
1120
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1084
1121
  )
1085
1122
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1123
+ else:
1124
+ result = UpdateStatus()
1086
1125
 
1087
1126
  if cascade:
1088
1127
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
@@ -1097,17 +1136,17 @@ class TableVersion:
1097
1136
  status = view.get().propagate_update(
1098
1137
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1099
1138
  )
1100
- result.num_rows += status.num_rows
1101
- result.num_excs += status.num_excs
1102
- result.cols_with_excs += status.cols_with_excs
1139
+ result += status.to_cascade()
1103
1140
 
1104
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1105
1141
  return result
1106
1142
 
1107
1143
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1108
1144
  """Delete rows in this table.
1109
1145
  Args:
1110
1146
  where: a predicate to filter rows to delete.
1147
+
1148
+ Returns:
1149
+ UpdateStatus: an object containing the number of deleted rows and other statistics.
1111
1150
  """
1112
1151
  assert self.is_insertable
1113
1152
  from pixeltable.exprs import Expr
@@ -1123,14 +1162,12 @@ class TableVersion:
1123
1162
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1124
1163
  sql_where_clause = analysis_info.sql_where_clause
1125
1164
 
1126
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1127
-
1128
- status = UpdateStatus(num_rows=num_rows)
1165
+ status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1129
1166
  return status
1130
1167
 
1131
1168
  def propagate_delete(
1132
1169
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1133
- ) -> int:
1170
+ ) -> UpdateStatus:
1134
1171
  """Delete rows in this table and propagate to views.
1135
1172
  Args:
1136
1173
  where: a predicate to filter rows to delete.
@@ -1146,18 +1183,21 @@ class TableVersion:
1146
1183
  # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1147
1184
  # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1148
1185
  # print(f'where_clause cols: {x}')
1149
- num_rows = self.store_tbl.delete_rows(
1186
+ del_rows = self.store_tbl.delete_rows(
1150
1187
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1151
1188
  )
1152
- if num_rows > 0:
1189
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1190
+ result = UpdateStatus(row_count_stats=row_counts)
1191
+ if del_rows > 0:
1153
1192
  # we're creating a new version
1154
1193
  self.version += 1
1155
1194
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1156
1195
  for view in self.mutable_views:
1157
- num_rows += view.get().propagate_delete(
1196
+ status = view.get().propagate_delete(
1158
1197
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1159
1198
  )
1160
- return num_rows
1199
+ result += status.to_cascade()
1200
+ return result
1161
1201
 
1162
1202
  def revert(self) -> None:
1163
1203
  """Reverts the table to the previous version."""
@@ -1,13 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from typing import TYPE_CHECKING, Optional
5
6
  from uuid import UUID
6
7
 
8
+ from pixeltable import exceptions as excs
9
+
7
10
  from .table_version import TableVersion
8
11
 
9
12
  if TYPE_CHECKING:
10
- pass
13
+ from pixeltable.catalog import Column
11
14
 
12
15
  _logger = logging.getLogger('pixeltable')
13
16
 
@@ -67,3 +70,25 @@ class TableVersionHandle:
67
70
  @classmethod
68
71
  def from_dict(cls, d: dict) -> TableVersionHandle:
69
72
  return cls(UUID(d['id']), d['effective_version'])
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class ColumnHandle:
77
+ tbl_version: TableVersionHandle
78
+ col_id: int
79
+
80
+ def get(self) -> 'Column':
81
+ if self.col_id not in self.tbl_version.get().cols_by_id:
82
+ schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
+ raise excs.Error(
84
+ f'Column has been dropped (no record for column ID {self.col_id} in table '
85
+ f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
+ )
87
+ return self.tbl_version.get().cols_by_id[self.col_id]
88
+
89
+ def as_dict(self) -> dict:
90
+ return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
91
+
92
+ @classmethod
93
+ def from_dict(cls, d: dict) -> ColumnHandle:
94
+ return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
@@ -229,7 +229,7 @@ class View(Table):
229
229
 
230
230
  try:
231
231
  plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
232
- num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
232
+ _, status = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
233
233
  except:
234
234
  # we need to remove the orphaned TableVersion instance
235
235
  del catalog.Catalog.get()._tbl_versions[tbl_version.id, tbl_version.effective_version]
@@ -238,7 +238,9 @@ class View(Table):
238
238
  # also remove tbl_version from the base
239
239
  base_tbl_version.mutable_views.remove(TableVersionHandle.create(tbl_version))
240
240
  raise
241
- Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
241
+ Env.get().console_logger.info(
242
+ f'Created view `{name}` with {status.num_rows} rows, {status.num_excs} exceptions.'
243
+ )
242
244
 
243
245
  session.commit()
244
246
  return view
@@ -55,18 +55,9 @@ class ColumnPropertyRef(Expr):
55
55
  return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
56
56
 
57
57
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
58
- if not self._col_ref.col.is_stored:
58
+ if not self._col_ref.col_handle.get().is_stored:
59
59
  return None
60
-
61
- # we need to reestablish that we have the correct Column instance, there could have been a metadata
62
- # reload since init()
63
- # TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
64
- # perform runtime checks and update state
65
- tv = self._col_ref.tbl_version.get()
66
- assert tv.is_validated
67
- # we can assume at this point during query execution that the column exists
68
- assert self._col_ref.col_id in tv.cols_by_id
69
- col = tv.cols_by_id[self._col_ref.col_id]
60
+ col = self._col_ref.col_handle.get()
70
61
 
71
62
  # the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
72
63
  if (
@@ -10,6 +10,7 @@ import pixeltable as pxt
10
10
  from pixeltable import catalog, exceptions as excs, iterators as iters
11
11
 
12
12
  from ..utils.description_helper import DescriptionHelper
13
+ from ..utils.filecache import FileCache
13
14
  from .data_row import DataRow
14
15
  from .expr import Expr
15
16
  from .row_builder import RowBuilder
@@ -41,7 +42,8 @@ class ColumnRef(Expr):
41
42
  insert them into the EvalCtxs as needed
42
43
  """
43
44
 
44
- col: catalog.Column
45
+ col: catalog.Column # TODO: merge with col_handle
46
+ col_handle: catalog.ColumnHandle
45
47
  reference_tbl: Optional[catalog.TableVersionPath]
46
48
  is_unstored_iter_col: bool
47
49
  iter_arg_ctx: Optional[RowBuilder.EvalCtx]
@@ -52,10 +54,6 @@ class ColumnRef(Expr):
52
54
  id: int
53
55
  perform_validation: bool # if True, performs media validation
54
56
 
55
- # needed by sql_expr() to re-resolve Column instance after a metadata reload
56
- tbl_version: catalog.TableVersionHandle
57
- col_id: int
58
-
59
57
  def __init__(
60
58
  self,
61
59
  col: catalog.Column,
@@ -66,8 +64,7 @@ class ColumnRef(Expr):
66
64
  assert col.tbl is not None
67
65
  self.col = col
68
66
  self.reference_tbl = reference_tbl
69
- self.tbl_version = catalog.TableVersionHandle(col.tbl.id, col.tbl.effective_version)
70
- self.col_id = col.id
67
+ self.col_handle = catalog.ColumnHandle(col.tbl.handle, col.id)
71
68
 
72
69
  self.is_unstored_iter_col = col.tbl.is_component_view and col.tbl.is_iterator_column(col) and not col.is_stored
73
70
  self.iter_arg_ctx = None
@@ -170,6 +167,20 @@ class ColumnRef(Expr):
170
167
  idx_info = embedding_idx_info
171
168
  return idx_info
172
169
 
170
+ def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
171
+ cat = catalog.Catalog.get()
172
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
173
+ with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
174
+ tbl_version = self.col_handle.tbl_version.get()
175
+ if tbl_version.id != self.reference_tbl.tbl_id:
176
+ raise excs.Error('Cannot recompute column of a base.')
177
+ if tbl_version.is_snapshot:
178
+ raise excs.Error('Cannot recompute column of a snapshot.')
179
+ col_name = self.col_handle.get().name
180
+ status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
181
+ FileCache.get().emit_eviction_warnings()
182
+ return status
183
+
173
184
  def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
174
185
  from .similarity_expr import SimilarityExpr
175
186
 
@@ -241,16 +252,7 @@ class ColumnRef(Expr):
241
252
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
242
253
  if self.perform_validation:
243
254
  return None
244
- # we need to reestablish that we have the correct Column instance, there could have been a metadata
245
- # reload since init()
246
- # TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
247
- # perform runtime checks and update state
248
- tv = self.tbl_version.get()
249
- assert tv.is_validated
250
- # we can assume at this point during query execution that the column exists
251
- assert self.col_id in tv.cols_by_id
252
- self.col = tv.cols_by_id[self.col_id]
253
- assert self.col.tbl is tv
255
+ self.col = self.col_handle.get()
254
256
  return self.col.sa_col
255
257
 
256
258
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -42,6 +42,10 @@ class DataRow:
42
42
  has_val: np.ndarray # of bool
43
43
  excs: np.ndarray # of object
44
44
 
45
+ # If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
46
+ # exception handling under normal operation.
47
+ _may_have_exc: bool
48
+
45
49
  # expr evaluation state; indexed by slot idx
46
50
  missing_slots: np.ndarray # of bool; number of missing dependencies
47
51
  missing_dependents: np.ndarray # of int16; number of missing dependents
@@ -90,6 +94,7 @@ class DataRow:
90
94
  self.vals = np.full(num_slots, None, dtype=object)
91
95
  self.has_val = np.zeros(num_slots, dtype=bool)
92
96
  self.excs = np.full(num_slots, None, dtype=object)
97
+ self._may_have_exc = False
93
98
  self.missing_slots = np.zeros(num_slots, dtype=bool)
94
99
  self.missing_dependents = np.zeros(num_slots, dtype=np.int16)
95
100
  self.is_scheduled = np.zeros(num_slots, dtype=bool)
@@ -136,6 +141,9 @@ class DataRow:
136
141
  """
137
142
  Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
138
143
  """
144
+ if not self._may_have_exc:
145
+ return False
146
+
139
147
  if slot_idx is not None:
140
148
  return self.excs[slot_idx] is not None
141
149
  return (self.excs != None).any()
@@ -154,6 +162,7 @@ class DataRow:
154
162
  def set_exc(self, slot_idx: int, exc: Exception) -> None:
155
163
  assert self.excs[slot_idx] is None
156
164
  self.excs[slot_idx] = exc
165
+ self._may_have_exc = True
157
166
 
158
167
  # an exception means the value is None
159
168
  self.has_val[slot_idx] = True