pixeltable 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +75 -21
- pixeltable/catalog/column.py +10 -0
- pixeltable/catalog/globals.py +121 -18
- pixeltable/catalog/insertable_table.py +2 -1
- pixeltable/catalog/table.py +135 -4
- pixeltable/catalog/table_version.py +106 -66
- pixeltable/catalog/table_version_handle.py +26 -1
- pixeltable/catalog/view.py +4 -2
- pixeltable/exprs/column_property_ref.py +2 -11
- pixeltable/exprs/column_ref.py +19 -17
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/row_builder.py +44 -13
- pixeltable/io/external_store.py +79 -52
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +45 -41
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +22 -18
- pixeltable/store.py +114 -103
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.2.dist-info}/RECORD +28 -26
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,14 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
from ..func.globals import resolve_symbol
|
|
31
31
|
from .column import Column
|
|
32
|
-
from .globals import
|
|
32
|
+
from .globals import (
|
|
33
|
+
_POS_COLUMN_NAME,
|
|
34
|
+
_ROWID_COLUMN_NAME,
|
|
35
|
+
MediaValidation,
|
|
36
|
+
RowCountStats,
|
|
37
|
+
UpdateStatus,
|
|
38
|
+
is_valid_identifier,
|
|
39
|
+
)
|
|
33
40
|
|
|
34
41
|
if TYPE_CHECKING:
|
|
35
42
|
from pixeltable import exec, store
|
|
@@ -183,6 +190,12 @@ class TableVersion:
|
|
|
183
190
|
else:
|
|
184
191
|
return f'{self.name}:{self.effective_version}'
|
|
185
192
|
|
|
193
|
+
@property
|
|
194
|
+
def handle(self) -> 'TableVersionHandle':
|
|
195
|
+
from .table_version_handle import TableVersionHandle
|
|
196
|
+
|
|
197
|
+
return TableVersionHandle(self.id, self.effective_version, self)
|
|
198
|
+
|
|
186
199
|
@classmethod
|
|
187
200
|
def create(
|
|
188
201
|
cls,
|
|
@@ -195,7 +208,6 @@ class TableVersion:
|
|
|
195
208
|
# base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
196
209
|
view_md: Optional[schema.ViewMd] = None,
|
|
197
210
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
198
|
-
session = Env.get().session
|
|
199
211
|
user = Env.get().user
|
|
200
212
|
|
|
201
213
|
# assign ids
|
|
@@ -212,8 +224,9 @@ class TableVersion:
|
|
|
212
224
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
213
225
|
column_md = cls._create_column_md(cols)
|
|
214
226
|
tbl_id = uuid.uuid4()
|
|
227
|
+
tbl_id_str = str(tbl_id)
|
|
215
228
|
table_md = schema.TableMd(
|
|
216
|
-
tbl_id=
|
|
229
|
+
tbl_id=tbl_id_str,
|
|
217
230
|
name=name,
|
|
218
231
|
user=user,
|
|
219
232
|
is_replica=False,
|
|
@@ -229,16 +242,10 @@ class TableVersion:
|
|
|
229
242
|
view_md=view_md,
|
|
230
243
|
additional_md={},
|
|
231
244
|
)
|
|
232
|
-
# create a schema.Table here, we need it to call our c'tor;
|
|
233
|
-
# don't add it to the session yet, we might add index metadata
|
|
234
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
235
245
|
|
|
236
246
|
# create schema.TableVersion
|
|
237
247
|
table_version_md = schema.TableVersionMd(
|
|
238
|
-
tbl_id=
|
|
239
|
-
)
|
|
240
|
-
tbl_version_record = schema.TableVersion(
|
|
241
|
-
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
248
|
+
tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
|
|
242
249
|
)
|
|
243
250
|
|
|
244
251
|
# create schema.TableSchemaVersion
|
|
@@ -252,7 +259,7 @@ class TableVersion:
|
|
|
252
259
|
schema_col_md[col.id] = md
|
|
253
260
|
|
|
254
261
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
255
|
-
tbl_id=
|
|
262
|
+
tbl_id=tbl_id_str,
|
|
256
263
|
schema_version=0,
|
|
257
264
|
preceding_schema_version=None,
|
|
258
265
|
columns=schema_col_md,
|
|
@@ -261,9 +268,8 @@ class TableVersion:
|
|
|
261
268
|
media_validation=media_validation.name.lower(),
|
|
262
269
|
additional_md={},
|
|
263
270
|
)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
)
|
|
271
|
+
|
|
272
|
+
cat = pxt.catalog.Catalog.get()
|
|
267
273
|
|
|
268
274
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
269
275
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
@@ -274,22 +280,23 @@ class TableVersion:
|
|
|
274
280
|
and view_md.sample_clause is None
|
|
275
281
|
and len(cols) == 0
|
|
276
282
|
):
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
283
|
+
cat.store_tbl_md(
|
|
284
|
+
tbl_id=tbl_id,
|
|
285
|
+
dir_id=dir_id,
|
|
286
|
+
tbl_md=table_md,
|
|
287
|
+
version_md=table_version_md,
|
|
288
|
+
schema_version_md=schema_version_md,
|
|
289
|
+
)
|
|
290
|
+
return tbl_id, None
|
|
281
291
|
|
|
282
292
|
# assert (base_path is not None) == (view_md is not None)
|
|
283
293
|
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
284
294
|
effective_version = 0 if is_snapshot else None
|
|
285
295
|
base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
286
296
|
base = base_path.tbl_version if base_path is not None else None
|
|
287
|
-
tbl_version = cls(
|
|
288
|
-
tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
|
|
289
|
-
)
|
|
297
|
+
tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
|
|
290
298
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
291
|
-
cat =
|
|
292
|
-
cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
|
|
299
|
+
cat._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
293
300
|
tbl_version.init()
|
|
294
301
|
tbl_version.store_tbl.create()
|
|
295
302
|
is_mutable = not is_snapshot and not table_md.is_replica
|
|
@@ -306,12 +313,14 @@ class TableVersion:
|
|
|
306
313
|
status = tbl_version._add_default_index(col)
|
|
307
314
|
assert status is None or status.num_excs == 0
|
|
308
315
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
316
|
+
cat.store_tbl_md(
|
|
317
|
+
tbl_id=tbl_id,
|
|
318
|
+
dir_id=dir_id,
|
|
319
|
+
tbl_md=tbl_version.tbl_md,
|
|
320
|
+
version_md=table_version_md,
|
|
321
|
+
schema_version_md=schema_version_md,
|
|
322
|
+
)
|
|
323
|
+
return tbl_id, tbl_version
|
|
315
324
|
|
|
316
325
|
@classmethod
|
|
317
326
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
@@ -488,7 +497,7 @@ class TableVersion:
|
|
|
488
497
|
)
|
|
489
498
|
|
|
490
499
|
Catalog.get().store_tbl_md(
|
|
491
|
-
self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
500
|
+
self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
492
501
|
)
|
|
493
502
|
|
|
494
503
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
@@ -693,6 +702,7 @@ class TableVersion:
|
|
|
693
702
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
694
703
|
)
|
|
695
704
|
|
|
705
|
+
computed_values = 0
|
|
696
706
|
num_excs = 0
|
|
697
707
|
cols_with_excs: list[Column] = []
|
|
698
708
|
for col in cols_to_add:
|
|
@@ -731,18 +741,19 @@ class TableVersion:
|
|
|
731
741
|
# populate the column
|
|
732
742
|
from pixeltable.plan import Planner
|
|
733
743
|
|
|
734
|
-
plan
|
|
744
|
+
plan = Planner.create_add_column_plan(self.path, col)
|
|
735
745
|
plan.ctx.num_rows = row_count
|
|
736
746
|
try:
|
|
737
747
|
plan.open()
|
|
738
748
|
try:
|
|
739
|
-
excs_per_col = self.store_tbl.load_column(col, plan,
|
|
749
|
+
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
740
750
|
except sql.exc.DBAPIError as exc:
|
|
741
751
|
# Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
|
|
742
752
|
raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
|
|
743
753
|
if excs_per_col > 0:
|
|
744
754
|
cols_with_excs.append(col)
|
|
745
755
|
num_excs += excs_per_col
|
|
756
|
+
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
746
757
|
finally:
|
|
747
758
|
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
748
759
|
def cleanup_on_error() -> None:
|
|
@@ -765,12 +776,14 @@ class TableVersion:
|
|
|
765
776
|
|
|
766
777
|
if print_stats:
|
|
767
778
|
plan.ctx.profile.print(num_rows=row_count)
|
|
779
|
+
|
|
768
780
|
# TODO: what to do about system columns with exceptions?
|
|
781
|
+
row_counts = RowCountStats(
|
|
782
|
+
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
783
|
+
) # add_columns
|
|
769
784
|
return UpdateStatus(
|
|
770
|
-
num_rows=row_count,
|
|
771
|
-
num_computed_values=row_count,
|
|
772
|
-
num_excs=num_excs,
|
|
773
785
|
cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
786
|
+
row_count_stats=row_counts,
|
|
774
787
|
)
|
|
775
788
|
|
|
776
789
|
def drop_column(self, col: Column) -> None:
|
|
@@ -910,14 +923,10 @@ class TableVersion:
|
|
|
910
923
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
911
924
|
# we're creating a new version
|
|
912
925
|
self.version += 1
|
|
913
|
-
result =
|
|
914
|
-
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
|
|
926
|
+
cols_with_excs, result = self.store_tbl.insert_rows(
|
|
915
927
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
916
928
|
)
|
|
917
|
-
result
|
|
918
|
-
result.num_excs = num_excs
|
|
919
|
-
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
920
|
-
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
929
|
+
result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
|
|
921
930
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
922
931
|
|
|
923
932
|
# update views
|
|
@@ -926,14 +935,10 @@ class TableVersion:
|
|
|
926
935
|
|
|
927
936
|
plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
|
|
928
937
|
status = view.get()._insert(plan, timestamp, print_stats=print_stats)
|
|
929
|
-
result
|
|
930
|
-
result.num_excs += status.num_excs
|
|
931
|
-
result.num_computed_values += status.num_computed_values
|
|
932
|
-
result.cols_with_excs += status.cols_with_excs
|
|
938
|
+
result += status.to_cascade()
|
|
933
939
|
|
|
934
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
935
940
|
if print_stats:
|
|
936
|
-
plan.ctx.profile.print(num_rows=num_rows)
|
|
941
|
+
plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
|
|
937
942
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
938
943
|
return result
|
|
939
944
|
|
|
@@ -973,7 +978,7 @@ class TableVersion:
|
|
|
973
978
|
cascade=cascade,
|
|
974
979
|
show_progress=True,
|
|
975
980
|
)
|
|
976
|
-
result
|
|
981
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
977
982
|
return result
|
|
978
983
|
|
|
979
984
|
def batch_update(
|
|
@@ -1000,7 +1005,7 @@ class TableVersion:
|
|
|
1000
1005
|
result = self.propagate_update(
|
|
1001
1006
|
plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
|
|
1002
1007
|
)
|
|
1003
|
-
result
|
|
1008
|
+
result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
|
|
1004
1009
|
|
|
1005
1010
|
unmatched_rows = row_update_node.unmatched_rows()
|
|
1006
1011
|
if len(unmatched_rows) > 0:
|
|
@@ -1008,7 +1013,7 @@ class TableVersion:
|
|
|
1008
1013
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
1009
1014
|
if insert_if_not_exists:
|
|
1010
1015
|
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
1011
|
-
result += insert_status
|
|
1016
|
+
result += insert_status.to_cascade()
|
|
1012
1017
|
return result
|
|
1013
1018
|
|
|
1014
1019
|
def _validate_update_spec(
|
|
@@ -1061,6 +1066,38 @@ class TableVersion:
|
|
|
1061
1066
|
|
|
1062
1067
|
return update_targets
|
|
1063
1068
|
|
|
1069
|
+
def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
|
|
1070
|
+
assert not self.is_snapshot
|
|
1071
|
+
assert all(name in self.cols_by_name for name in col_names)
|
|
1072
|
+
assert len(col_names) > 0
|
|
1073
|
+
assert len(col_names) == 1 or not errors_only
|
|
1074
|
+
|
|
1075
|
+
from pixeltable.plan import Planner
|
|
1076
|
+
|
|
1077
|
+
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1078
|
+
where_clause: Optional[exprs.Expr] = None
|
|
1079
|
+
if errors_only:
|
|
1080
|
+
where_clause = (
|
|
1081
|
+
exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
|
|
1082
|
+
!= None
|
|
1083
|
+
)
|
|
1084
|
+
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1085
|
+
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1086
|
+
)
|
|
1087
|
+
from pixeltable.exprs import SqlElementCache
|
|
1088
|
+
|
|
1089
|
+
result = self.propagate_update(
|
|
1090
|
+
plan,
|
|
1091
|
+
where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
|
|
1092
|
+
recomputed_cols,
|
|
1093
|
+
base_versions=[],
|
|
1094
|
+
timestamp=time.time(),
|
|
1095
|
+
cascade=cascade,
|
|
1096
|
+
show_progress=True,
|
|
1097
|
+
)
|
|
1098
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
1099
|
+
return result
|
|
1100
|
+
|
|
1064
1101
|
def propagate_update(
|
|
1065
1102
|
self,
|
|
1066
1103
|
plan: Optional[exec.ExecNode],
|
|
@@ -1071,18 +1108,20 @@ class TableVersion:
|
|
|
1071
1108
|
cascade: bool,
|
|
1072
1109
|
show_progress: bool = True,
|
|
1073
1110
|
) -> UpdateStatus:
|
|
1074
|
-
result = UpdateStatus()
|
|
1075
1111
|
if plan is not None:
|
|
1076
1112
|
# we're creating a new version
|
|
1077
1113
|
self.version += 1
|
|
1078
|
-
|
|
1079
|
-
|
|
1114
|
+
cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
|
|
1115
|
+
result = status.insert_to_update()
|
|
1116
|
+
result += UpdateStatus(
|
|
1117
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
1080
1118
|
)
|
|
1081
|
-
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
1082
1119
|
self.store_tbl.delete_rows(
|
|
1083
1120
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1084
1121
|
)
|
|
1085
1122
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1123
|
+
else:
|
|
1124
|
+
result = UpdateStatus()
|
|
1086
1125
|
|
|
1087
1126
|
if cascade:
|
|
1088
1127
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
@@ -1097,17 +1136,17 @@ class TableVersion:
|
|
|
1097
1136
|
status = view.get().propagate_update(
|
|
1098
1137
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
1099
1138
|
)
|
|
1100
|
-
result
|
|
1101
|
-
result.num_excs += status.num_excs
|
|
1102
|
-
result.cols_with_excs += status.cols_with_excs
|
|
1139
|
+
result += status.to_cascade()
|
|
1103
1140
|
|
|
1104
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
1105
1141
|
return result
|
|
1106
1142
|
|
|
1107
1143
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
1108
1144
|
"""Delete rows in this table.
|
|
1109
1145
|
Args:
|
|
1110
1146
|
where: a predicate to filter rows to delete.
|
|
1147
|
+
|
|
1148
|
+
Returns:
|
|
1149
|
+
UpdateStatus: an object containing the number of deleted rows and other statistics.
|
|
1111
1150
|
"""
|
|
1112
1151
|
assert self.is_insertable
|
|
1113
1152
|
from pixeltable.exprs import Expr
|
|
@@ -1123,14 +1162,12 @@ class TableVersion:
|
|
|
1123
1162
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1124
1163
|
sql_where_clause = analysis_info.sql_where_clause
|
|
1125
1164
|
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
status = UpdateStatus(num_rows=num_rows)
|
|
1165
|
+
status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
|
|
1129
1166
|
return status
|
|
1130
1167
|
|
|
1131
1168
|
def propagate_delete(
|
|
1132
1169
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1133
|
-
) ->
|
|
1170
|
+
) -> UpdateStatus:
|
|
1134
1171
|
"""Delete rows in this table and propagate to views.
|
|
1135
1172
|
Args:
|
|
1136
1173
|
where: a predicate to filter rows to delete.
|
|
@@ -1146,18 +1183,21 @@ class TableVersion:
|
|
|
1146
1183
|
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1147
1184
|
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1148
1185
|
# print(f'where_clause cols: {x}')
|
|
1149
|
-
|
|
1186
|
+
del_rows = self.store_tbl.delete_rows(
|
|
1150
1187
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1151
1188
|
)
|
|
1152
|
-
|
|
1189
|
+
row_counts = RowCountStats(del_rows=del_rows) # delete
|
|
1190
|
+
result = UpdateStatus(row_count_stats=row_counts)
|
|
1191
|
+
if del_rows > 0:
|
|
1153
1192
|
# we're creating a new version
|
|
1154
1193
|
self.version += 1
|
|
1155
1194
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1156
1195
|
for view in self.mutable_views:
|
|
1157
|
-
|
|
1196
|
+
status = view.get().propagate_delete(
|
|
1158
1197
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1159
1198
|
)
|
|
1160
|
-
|
|
1199
|
+
result += status.to_cascade()
|
|
1200
|
+
return result
|
|
1161
1201
|
|
|
1162
1202
|
def revert(self) -> None:
|
|
1163
1203
|
"""Reverts the table to the previous version."""
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from typing import TYPE_CHECKING, Optional
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
8
|
+
from pixeltable import exceptions as excs
|
|
9
|
+
|
|
7
10
|
from .table_version import TableVersion
|
|
8
11
|
|
|
9
12
|
if TYPE_CHECKING:
|
|
10
|
-
|
|
13
|
+
from pixeltable.catalog import Column
|
|
11
14
|
|
|
12
15
|
_logger = logging.getLogger('pixeltable')
|
|
13
16
|
|
|
@@ -67,3 +70,25 @@ class TableVersionHandle:
|
|
|
67
70
|
@classmethod
|
|
68
71
|
def from_dict(cls, d: dict) -> TableVersionHandle:
|
|
69
72
|
return cls(UUID(d['id']), d['effective_version'])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True)
|
|
76
|
+
class ColumnHandle:
|
|
77
|
+
tbl_version: TableVersionHandle
|
|
78
|
+
col_id: int
|
|
79
|
+
|
|
80
|
+
def get(self) -> 'Column':
|
|
81
|
+
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
|
+
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
|
+
raise excs.Error(
|
|
84
|
+
f'Column has been dropped (no record for column ID {self.col_id} in table '
|
|
85
|
+
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
|
+
)
|
|
87
|
+
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
88
|
+
|
|
89
|
+
def as_dict(self) -> dict:
|
|
90
|
+
return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, d: dict) -> ColumnHandle:
|
|
94
|
+
return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
|
pixeltable/catalog/view.py
CHANGED
|
@@ -229,7 +229,7 @@ class View(Table):
|
|
|
229
229
|
|
|
230
230
|
try:
|
|
231
231
|
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
232
|
-
|
|
232
|
+
_, status = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
233
233
|
except:
|
|
234
234
|
# we need to remove the orphaned TableVersion instance
|
|
235
235
|
del catalog.Catalog.get()._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
@@ -238,7 +238,9 @@ class View(Table):
|
|
|
238
238
|
# also remove tbl_version from the base
|
|
239
239
|
base_tbl_version.mutable_views.remove(TableVersionHandle.create(tbl_version))
|
|
240
240
|
raise
|
|
241
|
-
Env.get().console_logger.info(
|
|
241
|
+
Env.get().console_logger.info(
|
|
242
|
+
f'Created view `{name}` with {status.num_rows} rows, {status.num_excs} exceptions.'
|
|
243
|
+
)
|
|
242
244
|
|
|
243
245
|
session.commit()
|
|
244
246
|
return view
|
|
@@ -55,18 +55,9 @@ class ColumnPropertyRef(Expr):
|
|
|
55
55
|
return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
|
|
56
56
|
|
|
57
57
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
|
-
if not self._col_ref.
|
|
58
|
+
if not self._col_ref.col_handle.get().is_stored:
|
|
59
59
|
return None
|
|
60
|
-
|
|
61
|
-
# we need to reestablish that we have the correct Column instance, there could have been a metadata
|
|
62
|
-
# reload since init()
|
|
63
|
-
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
64
|
-
# perform runtime checks and update state
|
|
65
|
-
tv = self._col_ref.tbl_version.get()
|
|
66
|
-
assert tv.is_validated
|
|
67
|
-
# we can assume at this point during query execution that the column exists
|
|
68
|
-
assert self._col_ref.col_id in tv.cols_by_id
|
|
69
|
-
col = tv.cols_by_id[self._col_ref.col_id]
|
|
60
|
+
col = self._col_ref.col_handle.get()
|
|
70
61
|
|
|
71
62
|
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
72
63
|
if (
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -10,6 +10,7 @@ import pixeltable as pxt
|
|
|
10
10
|
from pixeltable import catalog, exceptions as excs, iterators as iters
|
|
11
11
|
|
|
12
12
|
from ..utils.description_helper import DescriptionHelper
|
|
13
|
+
from ..utils.filecache import FileCache
|
|
13
14
|
from .data_row import DataRow
|
|
14
15
|
from .expr import Expr
|
|
15
16
|
from .row_builder import RowBuilder
|
|
@@ -41,7 +42,8 @@ class ColumnRef(Expr):
|
|
|
41
42
|
insert them into the EvalCtxs as needed
|
|
42
43
|
"""
|
|
43
44
|
|
|
44
|
-
col: catalog.Column
|
|
45
|
+
col: catalog.Column # TODO: merge with col_handle
|
|
46
|
+
col_handle: catalog.ColumnHandle
|
|
45
47
|
reference_tbl: Optional[catalog.TableVersionPath]
|
|
46
48
|
is_unstored_iter_col: bool
|
|
47
49
|
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
@@ -52,10 +54,6 @@ class ColumnRef(Expr):
|
|
|
52
54
|
id: int
|
|
53
55
|
perform_validation: bool # if True, performs media validation
|
|
54
56
|
|
|
55
|
-
# needed by sql_expr() to re-resolve Column instance after a metadata reload
|
|
56
|
-
tbl_version: catalog.TableVersionHandle
|
|
57
|
-
col_id: int
|
|
58
|
-
|
|
59
57
|
def __init__(
|
|
60
58
|
self,
|
|
61
59
|
col: catalog.Column,
|
|
@@ -66,8 +64,7 @@ class ColumnRef(Expr):
|
|
|
66
64
|
assert col.tbl is not None
|
|
67
65
|
self.col = col
|
|
68
66
|
self.reference_tbl = reference_tbl
|
|
69
|
-
self.
|
|
70
|
-
self.col_id = col.id
|
|
67
|
+
self.col_handle = catalog.ColumnHandle(col.tbl.handle, col.id)
|
|
71
68
|
|
|
72
69
|
self.is_unstored_iter_col = col.tbl.is_component_view and col.tbl.is_iterator_column(col) and not col.is_stored
|
|
73
70
|
self.iter_arg_ctx = None
|
|
@@ -170,6 +167,20 @@ class ColumnRef(Expr):
|
|
|
170
167
|
idx_info = embedding_idx_info
|
|
171
168
|
return idx_info
|
|
172
169
|
|
|
170
|
+
def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
|
|
171
|
+
cat = catalog.Catalog.get()
|
|
172
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
173
|
+
with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
|
|
174
|
+
tbl_version = self.col_handle.tbl_version.get()
|
|
175
|
+
if tbl_version.id != self.reference_tbl.tbl_id:
|
|
176
|
+
raise excs.Error('Cannot recompute column of a base.')
|
|
177
|
+
if tbl_version.is_snapshot:
|
|
178
|
+
raise excs.Error('Cannot recompute column of a snapshot.')
|
|
179
|
+
col_name = self.col_handle.get().name
|
|
180
|
+
status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
|
|
181
|
+
FileCache.get().emit_eviction_warnings()
|
|
182
|
+
return status
|
|
183
|
+
|
|
173
184
|
def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
|
|
174
185
|
from .similarity_expr import SimilarityExpr
|
|
175
186
|
|
|
@@ -241,16 +252,7 @@ class ColumnRef(Expr):
|
|
|
241
252
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
242
253
|
if self.perform_validation:
|
|
243
254
|
return None
|
|
244
|
-
|
|
245
|
-
# reload since init()
|
|
246
|
-
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
247
|
-
# perform runtime checks and update state
|
|
248
|
-
tv = self.tbl_version.get()
|
|
249
|
-
assert tv.is_validated
|
|
250
|
-
# we can assume at this point during query execution that the column exists
|
|
251
|
-
assert self.col_id in tv.cols_by_id
|
|
252
|
-
self.col = tv.cols_by_id[self.col_id]
|
|
253
|
-
assert self.col.tbl is tv
|
|
255
|
+
self.col = self.col_handle.get()
|
|
254
256
|
return self.col.sa_col
|
|
255
257
|
|
|
256
258
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -42,6 +42,10 @@ class DataRow:
|
|
|
42
42
|
has_val: np.ndarray # of bool
|
|
43
43
|
excs: np.ndarray # of object
|
|
44
44
|
|
|
45
|
+
# If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
|
|
46
|
+
# exception handling under normal operation.
|
|
47
|
+
_may_have_exc: bool
|
|
48
|
+
|
|
45
49
|
# expr evaluation state; indexed by slot idx
|
|
46
50
|
missing_slots: np.ndarray # of bool; number of missing dependencies
|
|
47
51
|
missing_dependents: np.ndarray # of int16; number of missing dependents
|
|
@@ -90,6 +94,7 @@ class DataRow:
|
|
|
90
94
|
self.vals = np.full(num_slots, None, dtype=object)
|
|
91
95
|
self.has_val = np.zeros(num_slots, dtype=bool)
|
|
92
96
|
self.excs = np.full(num_slots, None, dtype=object)
|
|
97
|
+
self._may_have_exc = False
|
|
93
98
|
self.missing_slots = np.zeros(num_slots, dtype=bool)
|
|
94
99
|
self.missing_dependents = np.zeros(num_slots, dtype=np.int16)
|
|
95
100
|
self.is_scheduled = np.zeros(num_slots, dtype=bool)
|
|
@@ -136,6 +141,9 @@ class DataRow:
|
|
|
136
141
|
"""
|
|
137
142
|
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
138
143
|
"""
|
|
144
|
+
if not self._may_have_exc:
|
|
145
|
+
return False
|
|
146
|
+
|
|
139
147
|
if slot_idx is not None:
|
|
140
148
|
return self.excs[slot_idx] is not None
|
|
141
149
|
return (self.excs != None).any()
|
|
@@ -154,6 +162,7 @@ class DataRow:
|
|
|
154
162
|
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
155
163
|
assert self.excs[slot_idx] is None
|
|
156
164
|
self.excs[slot_idx] = exc
|
|
165
|
+
self._may_have_exc = True
|
|
157
166
|
|
|
158
167
|
# an exception means the value is None
|
|
159
168
|
self.has_val[slot_idx] = True
|