pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,14 @@ if TYPE_CHECKING:
29
29
 
30
30
  from ..func.globals import resolve_symbol
31
31
  from .column import Column
32
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
32
+ from .globals import (
33
+ _POS_COLUMN_NAME,
34
+ _ROWID_COLUMN_NAME,
35
+ MediaValidation,
36
+ RowCountStats,
37
+ UpdateStatus,
38
+ is_valid_identifier,
39
+ )
33
40
 
34
41
  if TYPE_CHECKING:
35
42
  from pixeltable import exec, store
@@ -167,18 +174,6 @@ class TableVersion:
167
174
  self.idxs_by_name = {}
168
175
  self.external_stores = {}
169
176
 
170
- def init(self) -> None:
171
- """
172
- Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
173
- in Catalog.
174
- """
175
- from .catalog import Catalog
176
-
177
- assert (self.id, self.effective_version) in Catalog.get()._tbl_versions
178
- self._init_schema()
179
- # init external stores; this needs to happen after the schema is created
180
- self._init_external_stores()
181
-
182
177
  def __hash__(self) -> int:
183
178
  return hash(self.id)
184
179
 
@@ -195,6 +190,12 @@ class TableVersion:
195
190
  else:
196
191
  return f'{self.name}:{self.effective_version}'
197
192
 
193
+ @property
194
+ def handle(self) -> 'TableVersionHandle':
195
+ from .table_version_handle import TableVersionHandle
196
+
197
+ return TableVersionHandle(self.id, self.effective_version, self)
198
+
198
199
  @classmethod
199
200
  def create(
200
201
  cls,
@@ -207,7 +208,6 @@ class TableVersion:
207
208
  # base_path: Optional[pxt.catalog.TableVersionPath] = None,
208
209
  view_md: Optional[schema.ViewMd] = None,
209
210
  ) -> tuple[UUID, Optional[TableVersion]]:
210
- session = Env.get().session
211
211
  user = Env.get().user
212
212
 
213
213
  # assign ids
@@ -224,8 +224,9 @@ class TableVersion:
224
224
  # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
225
225
  column_md = cls._create_column_md(cols)
226
226
  tbl_id = uuid.uuid4()
227
+ tbl_id_str = str(tbl_id)
227
228
  table_md = schema.TableMd(
228
- tbl_id=str(tbl_id),
229
+ tbl_id=tbl_id_str,
229
230
  name=name,
230
231
  user=user,
231
232
  is_replica=False,
@@ -234,22 +235,17 @@ class TableVersion:
234
235
  next_col_id=len(cols),
235
236
  next_idx_id=0,
236
237
  next_row_id=0,
238
+ view_sn=0,
237
239
  column_md=column_md,
238
240
  index_md={},
239
241
  external_stores=[],
240
242
  view_md=view_md,
241
243
  additional_md={},
242
244
  )
243
- # create a schema.Table here, we need it to call our c'tor;
244
- # don't add it to the session yet, we might add index metadata
245
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
246
245
 
247
246
  # create schema.TableVersion
248
247
  table_version_md = schema.TableVersionMd(
249
- tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
250
- )
251
- tbl_version_record = schema.TableVersion(
252
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
248
+ tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
253
249
  )
254
250
 
255
251
  # create schema.TableSchemaVersion
@@ -263,7 +259,7 @@ class TableVersion:
263
259
  schema_col_md[col.id] = md
264
260
 
265
261
  schema_version_md = schema.TableSchemaVersionMd(
266
- tbl_id=str(tbl_record.id),
262
+ tbl_id=tbl_id_str,
267
263
  schema_version=0,
268
264
  preceding_schema_version=None,
269
265
  columns=schema_col_md,
@@ -272,9 +268,8 @@ class TableVersion:
272
268
  media_validation=media_validation.name.lower(),
273
269
  additional_md={},
274
270
  )
275
- schema_version_record = schema.TableSchemaVersion(
276
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
277
- )
271
+
272
+ cat = pxt.catalog.Catalog.get()
278
273
 
279
274
  # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
280
275
  # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
@@ -285,22 +280,23 @@ class TableVersion:
285
280
  and view_md.sample_clause is None
286
281
  and len(cols) == 0
287
282
  ):
288
- session.add(tbl_record)
289
- session.add(tbl_version_record)
290
- session.add(schema_version_record)
291
- return tbl_record.id, None
283
+ cat.store_tbl_md(
284
+ tbl_id=tbl_id,
285
+ dir_id=dir_id,
286
+ tbl_md=table_md,
287
+ version_md=table_version_md,
288
+ schema_version_md=schema_version_md,
289
+ )
290
+ return tbl_id, None
292
291
 
293
292
  # assert (base_path is not None) == (view_md is not None)
294
293
  is_snapshot = view_md is not None and view_md.is_snapshot
295
294
  effective_version = 0 if is_snapshot else None
296
295
  base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
297
296
  base = base_path.tbl_version if base_path is not None else None
298
- tbl_version = cls(
299
- tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
300
- )
297
+ tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
301
298
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
302
- cat = pxt.catalog.Catalog.get()
303
- cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
299
+ cat._tbl_versions[tbl_id, effective_version] = tbl_version
304
300
  tbl_version.init()
305
301
  tbl_version.store_tbl.create()
306
302
  is_mutable = not is_snapshot and not table_md.is_replica
@@ -317,12 +313,14 @@ class TableVersion:
317
313
  status = tbl_version._add_default_index(col)
318
314
  assert status is None or status.num_excs == 0
319
315
 
320
- # we re-create the tbl_record here, now that we have new index metadata
321
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
322
- session.add(tbl_record)
323
- session.add(tbl_version_record)
324
- session.add(schema_version_record)
325
- return tbl_record.id, tbl_version
316
+ cat.store_tbl_md(
317
+ tbl_id=tbl_id,
318
+ dir_id=dir_id,
319
+ tbl_md=tbl_version.tbl_md,
320
+ version_md=table_version_md,
321
+ schema_version_md=schema_version_md,
322
+ )
323
+ return tbl_id, tbl_version
326
324
 
327
325
  @classmethod
328
326
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
@@ -342,24 +340,39 @@ class TableVersion:
342
340
  return tbl_version
343
341
 
344
342
  def drop(self) -> None:
345
- from .catalog import Catalog
346
-
347
343
  if self.is_view and self.is_mutable:
348
344
  # update mutable_views
345
+ # TODO: invalidate base to force reload
349
346
  from .table_version_handle import TableVersionHandle
350
347
 
351
348
  assert self.base is not None
352
349
  if self.base.get().is_mutable:
353
350
  self.base.get().mutable_views.remove(TableVersionHandle.create(self))
354
351
 
355
- cat = Catalog.get()
352
+ # cat = Catalog.get()
356
353
  # delete this table and all associated data
357
354
  MediaStore.delete(self.id)
358
355
  FileCache.get().clear(tbl_id=self.id)
359
- cat.delete_tbl_md(self.id)
356
+ # cat.delete_tbl_md(self.id)
360
357
  self.store_tbl.drop()
361
358
  # de-register table version from catalog
362
- cat.remove_tbl_version(self)
359
+ # cat.remove_tbl_version(self)
360
+
361
+ def init(self) -> None:
362
+ """
363
+ Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
364
+ in Catalog.
365
+ """
366
+ from .catalog import Catalog
367
+
368
+ cat = Catalog.get()
369
+ assert (self.id, self.effective_version) in cat._tbl_versions
370
+ self._init_schema()
371
+ if not self.is_snapshot:
372
+ cat.record_column_dependencies(self)
373
+
374
+ # init external stores; this needs to happen after the schema is created
375
+ self._init_external_stores()
363
376
 
364
377
  def _init_schema(self) -> None:
365
378
  # create columns first, so the indices can reference them
@@ -369,6 +382,10 @@ class TableVersion:
369
382
  # create the sa schema only after creating the columns and indices
370
383
  self._init_sa_schema()
371
384
 
385
+ # created value_exprs after everything else has been initialized
386
+ for col in self.cols_by_id.values():
387
+ col.init_value_expr()
388
+
372
389
  def _init_cols(self) -> None:
373
390
  """Initialize self.cols with the columns visible in our effective version"""
374
391
  self.cols = []
@@ -395,6 +412,7 @@ class TableVersion:
395
412
  schema_version_add=col_md.schema_version_add,
396
413
  schema_version_drop=col_md.schema_version_drop,
397
414
  value_expr_dict=col_md.value_expr,
415
+ tbl=self,
398
416
  )
399
417
  col.tbl = self
400
418
  self.cols.append(col)
@@ -410,10 +428,10 @@ class TableVersion:
410
428
  self.cols_by_name[col.name] = col
411
429
  self.cols_by_id[col.id] = col
412
430
 
413
- # make sure to traverse columns ordered by position = order in which cols were created;
414
- # this guarantees that references always point backwards
415
- if not self.is_snapshot and col_md.value_expr is not None:
416
- self._record_refd_columns(col)
431
+ # # make sure to traverse columns ordered by position = order in which cols were created;
432
+ # # this guarantees that references always point backwards
433
+ # if not self.is_snapshot and col_md.value_expr is not None:
434
+ # self._record_refd_columns(col)
417
435
 
418
436
  def _init_idxs(self) -> None:
419
437
  # self.idx_md = tbl_md.index_md
@@ -479,14 +497,9 @@ class TableVersion:
479
497
  )
480
498
 
481
499
  Catalog.get().store_tbl_md(
482
- self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
500
+ self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
483
501
  )
484
502
 
485
- def ensure_md_loaded(self) -> None:
486
- """Ensure that table metadata is loaded."""
487
- for col in self.cols_by_id.values():
488
- _ = col.value_expr
489
-
490
503
  def _store_idx_name(self, idx_id: int) -> str:
491
504
  """Return name of index in the store, which needs to be globally unique"""
492
505
  return f'idx_{self.id.hex}_{idx_id}'
@@ -689,6 +702,7 @@ class TableVersion:
689
702
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
690
703
  )
691
704
 
705
+ computed_values = 0
692
706
  num_excs = 0
693
707
  cols_with_excs: list[Column] = []
694
708
  for col in cols_to_add:
@@ -700,9 +714,6 @@ class TableVersion:
700
714
  if col.name is not None:
701
715
  self.cols_by_name[col.name] = col
702
716
  self.cols_by_id[col.id] = col
703
- if col.value_expr is not None:
704
- col.check_value_expr()
705
- self._record_refd_columns(col)
706
717
 
707
718
  # also add to stored md
708
719
  self._tbl_md.column_md[col.id] = schema.ColumnMd(
@@ -730,18 +741,19 @@ class TableVersion:
730
741
  # populate the column
731
742
  from pixeltable.plan import Planner
732
743
 
733
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
744
+ plan = Planner.create_add_column_plan(self.path, col)
734
745
  plan.ctx.num_rows = row_count
735
746
  try:
736
747
  plan.open()
737
748
  try:
738
- excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
749
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
739
750
  except sql.exc.DBAPIError as exc:
740
751
  # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
741
752
  raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
742
753
  if excs_per_col > 0:
743
754
  cols_with_excs.append(col)
744
755
  num_excs += excs_per_col
756
+ computed_values += plan.ctx.num_computed_exprs * row_count
745
757
  finally:
746
758
  # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
747
759
  def cleanup_on_error() -> None:
@@ -760,14 +772,18 @@ class TableVersion:
760
772
  run_cleanup_on_exception(cleanup_on_error)
761
773
  plan.close()
762
774
 
775
+ pxt.catalog.Catalog.get().record_column_dependencies(self)
776
+
763
777
  if print_stats:
764
778
  plan.ctx.profile.print(num_rows=row_count)
765
- # TODO(mkornacker): what to do about system columns with exceptions?
779
+
780
+ # TODO: what to do about system columns with exceptions?
781
+ row_counts = RowCountStats(
782
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
783
+ ) # add_columns
766
784
  return UpdateStatus(
767
- num_rows=row_count,
768
- num_computed_values=row_count,
769
- num_excs=num_excs,
770
785
  cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
786
+ row_count_stats=row_counts,
771
787
  )
772
788
 
773
789
  def drop_column(self, col: Column) -> None:
@@ -805,13 +821,6 @@ class TableVersion:
805
821
  assert not self.is_snapshot
806
822
 
807
823
  for col in cols:
808
- if col.value_expr is not None:
809
- # update Column.dependent_cols
810
- for c in self.cols:
811
- if c == col:
812
- break
813
- c.dependent_cols.discard(col)
814
-
815
824
  col.schema_version_drop = self.schema_version
816
825
  if col.name is not None:
817
826
  assert col.name in self.cols_by_name
@@ -828,6 +837,7 @@ class TableVersion:
828
837
  schema_col.pos = pos
829
838
 
830
839
  self.store_tbl.create_sa_tbl()
840
+ pxt.catalog.Catalog.get().record_column_dependencies(self)
831
841
 
832
842
  def rename_column(self, old_name: str, new_name: str) -> None:
833
843
  """Rename a column."""
@@ -913,14 +923,10 @@ class TableVersion:
913
923
  """Insert rows produced by exec_plan and propagate to views"""
914
924
  # we're creating a new version
915
925
  self.version += 1
916
- result = UpdateStatus()
917
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
926
+ cols_with_excs, result = self.store_tbl.insert_rows(
918
927
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
919
928
  )
920
- result.num_rows = num_rows
921
- result.num_excs = num_excs
922
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
923
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
929
+ result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
924
930
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
925
931
 
926
932
  # update views
@@ -929,14 +935,10 @@ class TableVersion:
929
935
 
930
936
  plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
931
937
  status = view.get()._insert(plan, timestamp, print_stats=print_stats)
932
- result.num_rows += status.num_rows
933
- result.num_excs += status.num_excs
934
- result.num_computed_values += status.num_computed_values
935
- result.cols_with_excs += status.cols_with_excs
938
+ result += status.to_cascade()
936
939
 
937
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
938
940
  if print_stats:
939
- plan.ctx.profile.print(num_rows=num_rows)
941
+ plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
940
942
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
941
943
  return result
942
944
 
@@ -976,7 +978,7 @@ class TableVersion:
976
978
  cascade=cascade,
977
979
  show_progress=True,
978
980
  )
979
- result.updated_cols = updated_cols
981
+ result += UpdateStatus(updated_cols=updated_cols)
980
982
  return result
981
983
 
982
984
  def batch_update(
@@ -1003,7 +1005,7 @@ class TableVersion:
1003
1005
  result = self.propagate_update(
1004
1006
  plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
1005
1007
  )
1006
- result.updated_cols = [c.qualified_name for c in updated_cols]
1008
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
1007
1009
 
1008
1010
  unmatched_rows = row_update_node.unmatched_rows()
1009
1011
  if len(unmatched_rows) > 0:
@@ -1011,7 +1013,7 @@ class TableVersion:
1011
1013
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
1012
1014
  if insert_if_not_exists:
1013
1015
  insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
1014
- result += insert_status
1016
+ result += insert_status.to_cascade()
1015
1017
  return result
1016
1018
 
1017
1019
  def _validate_update_spec(
@@ -1064,6 +1066,38 @@ class TableVersion:
1064
1066
 
1065
1067
  return update_targets
1066
1068
 
1069
+ def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
1070
+ assert not self.is_snapshot
1071
+ assert all(name in self.cols_by_name for name in col_names)
1072
+ assert len(col_names) > 0
1073
+ assert len(col_names) == 1 or not errors_only
1074
+
1075
+ from pixeltable.plan import Planner
1076
+
1077
+ target_columns = [self.cols_by_name[name] for name in col_names]
1078
+ where_clause: Optional[exprs.Expr] = None
1079
+ if errors_only:
1080
+ where_clause = (
1081
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1082
+ != None
1083
+ )
1084
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1085
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1086
+ )
1087
+ from pixeltable.exprs import SqlElementCache
1088
+
1089
+ result = self.propagate_update(
1090
+ plan,
1091
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1092
+ recomputed_cols,
1093
+ base_versions=[],
1094
+ timestamp=time.time(),
1095
+ cascade=cascade,
1096
+ show_progress=True,
1097
+ )
1098
+ result += UpdateStatus(updated_cols=updated_cols)
1099
+ return result
1100
+
1067
1101
  def propagate_update(
1068
1102
  self,
1069
1103
  plan: Optional[exec.ExecNode],
@@ -1074,18 +1108,20 @@ class TableVersion:
1074
1108
  cascade: bool,
1075
1109
  show_progress: bool = True,
1076
1110
  ) -> UpdateStatus:
1077
- result = UpdateStatus()
1078
1111
  if plan is not None:
1079
1112
  # we're creating a new version
1080
1113
  self.version += 1
1081
- result.num_rows, result.num_excs, cols_with_excs = self.store_tbl.insert_rows(
1082
- plan, v_min=self.version, show_progress=show_progress
1114
+ cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
1115
+ result = status.insert_to_update()
1116
+ result += UpdateStatus(
1117
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1083
1118
  )
1084
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1085
1119
  self.store_tbl.delete_rows(
1086
1120
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1087
1121
  )
1088
1122
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1123
+ else:
1124
+ result = UpdateStatus()
1089
1125
 
1090
1126
  if cascade:
1091
1127
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
@@ -1100,17 +1136,17 @@ class TableVersion:
1100
1136
  status = view.get().propagate_update(
1101
1137
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1102
1138
  )
1103
- result.num_rows += status.num_rows
1104
- result.num_excs += status.num_excs
1105
- result.cols_with_excs += status.cols_with_excs
1139
+ result += status.to_cascade()
1106
1140
 
1107
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1108
1141
  return result
1109
1142
 
1110
1143
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1111
1144
  """Delete rows in this table.
1112
1145
  Args:
1113
1146
  where: a predicate to filter rows to delete.
1147
+
1148
+ Returns:
1149
+ UpdateStatus: an object containing the number of deleted rows and other statistics.
1114
1150
  """
1115
1151
  assert self.is_insertable
1116
1152
  from pixeltable.exprs import Expr
@@ -1126,14 +1162,12 @@ class TableVersion:
1126
1162
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1127
1163
  sql_where_clause = analysis_info.sql_where_clause
1128
1164
 
1129
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1130
-
1131
- status = UpdateStatus(num_rows=num_rows)
1165
+ status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1132
1166
  return status
1133
1167
 
1134
1168
  def propagate_delete(
1135
1169
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1136
- ) -> int:
1170
+ ) -> UpdateStatus:
1137
1171
  """Delete rows in this table and propagate to views.
1138
1172
  Args:
1139
1173
  where: a predicate to filter rows to delete.
@@ -1149,18 +1183,21 @@ class TableVersion:
1149
1183
  # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1150
1184
  # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1151
1185
  # print(f'where_clause cols: {x}')
1152
- num_rows = self.store_tbl.delete_rows(
1186
+ del_rows = self.store_tbl.delete_rows(
1153
1187
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1154
1188
  )
1155
- if num_rows > 0:
1189
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1190
+ result = UpdateStatus(row_count_stats=row_counts)
1191
+ if del_rows > 0:
1156
1192
  # we're creating a new version
1157
1193
  self.version += 1
1158
1194
  self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1159
1195
  for view in self.mutable_views:
1160
- num_rows += view.get().propagate_delete(
1196
+ status = view.get().propagate_delete(
1161
1197
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1162
1198
  )
1163
- return num_rows
1199
+ result += status.to_cascade()
1200
+ return result
1164
1201
 
1165
1202
  def revert(self) -> None:
1166
1203
  """Reverts the table to the previous version."""
@@ -1458,18 +1495,6 @@ class TableVersion:
1458
1495
  names = [c.name for c in self.cols_by_name.values() if c.is_computed]
1459
1496
  return names
1460
1497
 
1461
- def _record_refd_columns(self, col: Column) -> None:
1462
- """Update Column.dependent_cols for all cols referenced in col.value_expr."""
1463
- from pixeltable import exprs
1464
-
1465
- if col.value_expr_dict is not None:
1466
- # if we have a value_expr_dict, use that instead of instantiating the value_expr
1467
- refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1468
- else:
1469
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1470
- for refd_col in refd_cols:
1471
- refd_col.dependent_cols.add(col)
1472
-
1473
1498
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1474
1499
  result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1475
1500
  return result
@@ -1478,7 +1503,8 @@ class TableVersion:
1478
1503
  """
1479
1504
  Return the set of columns that transitively depend on any of the given ones.
1480
1505
  """
1481
- result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
1506
+ cat = pxt.catalog.Catalog.get()
1507
+ result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
1482
1508
  if len(result) > 0:
1483
1509
  result.update(self.get_dependent_columns(result))
1484
1510
  return result
@@ -1,13 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from typing import TYPE_CHECKING, Optional
5
6
  from uuid import UUID
6
7
 
8
+ from pixeltable import exceptions as excs
9
+
7
10
  from .table_version import TableVersion
8
11
 
9
12
  if TYPE_CHECKING:
10
- pass
13
+ from pixeltable.catalog import Column
11
14
 
12
15
  _logger = logging.getLogger('pixeltable')
13
16
 
@@ -34,6 +37,10 @@ class TableVersionHandle:
34
37
  def __hash__(self) -> int:
35
38
  return hash((self.id, self.effective_version))
36
39
 
40
+ @property
41
+ def is_snapshot(self) -> bool:
42
+ return self.effective_version is not None
43
+
37
44
  @classmethod
38
45
  def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
39
46
  return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
@@ -53,7 +60,6 @@ class TableVersionHandle:
53
60
  else:
54
61
  self._tbl_version = Catalog.get().get_tbl_version(self.id, self.effective_version)
55
62
  if self.effective_version is None:
56
- # make sure we don't see a discarded instance of a live TableVersion
57
63
  tvs = list(Catalog.get()._tbl_versions.values())
58
64
  assert self._tbl_version in tvs
59
65
  return self._tbl_version
@@ -64,3 +70,25 @@ class TableVersionHandle:
64
70
  @classmethod
65
71
  def from_dict(cls, d: dict) -> TableVersionHandle:
66
72
  return cls(UUID(d['id']), d['effective_version'])
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class ColumnHandle:
77
+ tbl_version: TableVersionHandle
78
+ col_id: int
79
+
80
+ def get(self) -> 'Column':
81
+ if self.col_id not in self.tbl_version.get().cols_by_id:
82
+ schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
+ raise excs.Error(
84
+ f'Column has been dropped (no record for column ID {self.col_id} in table '
85
+ f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
+ )
87
+ return self.tbl_version.get().cols_by_id[self.col_id]
88
+
89
+ def as_dict(self) -> dict:
90
+ return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
91
+
92
+ @classmethod
93
+ def from_dict(cls, d: dict) -> ColumnHandle:
94
+ return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
@@ -8,6 +8,7 @@ from pixeltable.env import Env
8
8
  from pixeltable.metadata import schema
9
9
 
10
10
  from .column import Column
11
+ from .globals import MediaValidation
11
12
  from .table_version import TableVersion
12
13
  from .table_version_handle import TableVersionHandle
13
14
 
@@ -83,6 +84,7 @@ class TableVersionPath:
83
84
  if self.base is not None:
84
85
  self.base.clear_cached_md()
85
86
 
87
+ @property
86
88
  def tbl_id(self) -> UUID:
87
89
  """Return the id of the table/view that this path represents"""
88
90
  return self.tbl_version.id
@@ -92,6 +94,11 @@ class TableVersionPath:
92
94
  self.refresh_cached_md()
93
95
  return self._cached_tbl_version.version
94
96
 
97
+ def schema_version(self) -> int:
98
+ """Return the version of the table/view that this path represents"""
99
+ self.refresh_cached_md()
100
+ return self._cached_tbl_version.schema_version
101
+
95
102
  def tbl_name(self) -> str:
96
103
  """Return the name of the table/view that this path represents"""
97
104
  self.refresh_cached_md()
@@ -103,10 +110,7 @@ class TableVersionPath:
103
110
 
104
111
  def is_snapshot(self) -> bool:
105
112
  """Return True if this is a path of snapshot versions"""
106
- self.refresh_cached_md()
107
- if not self._cached_tbl_version.is_snapshot:
108
- return False
109
- return self.base.is_snapshot() if self.base is not None else True
113
+ return self.tbl_version.is_snapshot
110
114
 
111
115
  def is_view(self) -> bool:
112
116
  self.refresh_cached_md()
@@ -116,10 +120,30 @@ class TableVersionPath:
116
120
  self.refresh_cached_md()
117
121
  return self._cached_tbl_version.is_component_view
118
122
 
123
+ def is_replica(self) -> bool:
124
+ self.refresh_cached_md()
125
+ return self._cached_tbl_version.is_replica
126
+
127
+ def is_mutable(self) -> bool:
128
+ self.refresh_cached_md()
129
+ return self._cached_tbl_version.is_mutable
130
+
119
131
  def is_insertable(self) -> bool:
120
132
  self.refresh_cached_md()
121
133
  return self._cached_tbl_version.is_insertable
122
134
 
135
+ def comment(self) -> str:
136
+ self.refresh_cached_md()
137
+ return self._cached_tbl_version.comment
138
+
139
+ def num_retained_versions(self) -> int:
140
+ self.refresh_cached_md()
141
+ return self._cached_tbl_version.num_retained_versions
142
+
143
+ def media_validation(self) -> MediaValidation:
144
+ self.refresh_cached_md()
145
+ return self._cached_tbl_version.media_validation
146
+
123
147
  def get_tbl_versions(self) -> list[TableVersionHandle]:
124
148
  """Return all tbl versions"""
125
149
  if self.base is None: