pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -9,6 +9,7 @@ from pathlib import Path
9
9
  from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
10
10
 
11
11
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
+ import datetime
12
13
  from uuid import UUID
13
14
 
14
15
  import pandas as pd
@@ -17,6 +18,7 @@ import sqlalchemy as sql
17
18
  import pixeltable as pxt
18
19
  from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
19
20
  from pixeltable.metadata import schema
21
+ from pixeltable.metadata.utils import MetadataUtils
20
22
 
21
23
  from ..exprs import ColumnRef
22
24
  from ..utils.description_helper import DescriptionHelper
@@ -48,21 +50,23 @@ class Table(SchemaObject):
48
50
  """
49
51
  A handle to a table, view, or snapshot. This class is the primary interface through which table operations
50
52
  (queries, insertions, updates, etc.) are performed in Pixeltable.
53
+
54
+ Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
55
+ FileCache.emit_eviction_warnings() at the end of the operation.
51
56
  """
52
57
 
53
- # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
54
- # FileCache.emit_eviction_warnings() at the end of the operation.
58
+ # the chain of TableVersions needed to run queries and supply metadata (eg, schema)
59
+ _tbl_version_path: TableVersionPath
55
60
 
56
- _is_dropped: bool
57
- __tbl_version_path: TableVersionPath
61
+ # the physical TableVersion backing this Table; None for pure snapshots
62
+ _tbl_version: Optional[TableVersionHandle]
58
63
 
59
64
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
60
65
  super().__init__(id, name, dir_id)
61
- self._is_dropped = False
62
- self.__tbl_version_path = tbl_version_path
66
+ self._tbl_version_path = tbl_version_path
67
+ self._tbl_version = None
63
68
 
64
69
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
65
- self._check_is_dropped()
66
70
  super()._move(new_name, new_dir_id)
67
71
  conn = env.Env.get().conn
68
72
  stmt = sql.text(
@@ -75,6 +79,7 @@ class Table(SchemaObject):
75
79
  )
76
80
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
77
81
 
82
+ # this is duplicated from SchemaObject so that our API docs show the docstring for Table
78
83
  def get_metadata(self) -> dict[str, Any]:
79
84
  """
80
85
  Retrieves metadata associated with this table.
@@ -100,42 +105,27 @@ class Table(SchemaObject):
100
105
  }
101
106
  ```
102
107
  """
103
- from pixeltable.catalog import Catalog
104
-
105
- with Catalog.get().begin_xact(for_write=False):
106
- self._check_is_dropped()
107
- md = super().get_metadata()
108
- md['base'] = self._base_table._path() if self._base_table is not None else None
109
- md['schema'] = self._schema
110
- md['is_replica'] = self._tbl_version.get().is_replica
111
- md['version'] = self._version
112
- md['schema_version'] = self._tbl_version.get().schema_version
113
- md['comment'] = self._comment
114
- md['num_retained_versions'] = self._num_retained_versions
115
- md['media_validation'] = self._media_validation.name.lower()
116
- return md
117
-
118
- @property
119
- def _version(self) -> int:
108
+ return super().get_metadata()
109
+
110
+ def _get_metadata(self) -> dict[str, Any]:
111
+ md = super()._get_metadata()
112
+ base = self._get_base_table()
113
+ md['base'] = base._path() if base is not None else None
114
+ md['schema'] = self._get_schema()
115
+ md['is_replica'] = self._tbl_version_path.is_replica()
116
+ md['version'] = self._get_version()
117
+ md['schema_version'] = self._tbl_version_path.schema_version()
118
+ md['comment'] = self._get_comment()
119
+ md['num_retained_versions'] = self._get_num_retained_versions()
120
+ md['media_validation'] = self._get_media_validation().name.lower()
121
+ return md
122
+
123
+ def _get_version(self) -> int:
120
124
  """Return the version of this table. Used by tests to ascertain version changes."""
121
- return self._tbl_version.get().version
122
-
123
- @property
124
- def _tbl_version(self) -> TableVersionHandle:
125
- """Return TableVersion for just this table."""
126
- return self._tbl_version_path.tbl_version
127
-
128
- @property
129
- def _tbl_version_path(self) -> TableVersionPath:
130
- self._check_is_dropped()
131
- return self.__tbl_version_path
125
+ return self._tbl_version_path.version()
132
126
 
133
127
  def __hash__(self) -> int:
134
- return hash(self._tbl_version.id)
135
-
136
- def _check_is_dropped(self) -> None:
137
- if self._is_dropped:
138
- raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
128
+ return hash(self._tbl_version_path.tbl_id)
139
129
 
140
130
  def __getattr__(self, name: str) -> 'exprs.ColumnRef':
141
131
  """Return a ColumnRef for the given name."""
@@ -162,15 +152,18 @@ class Table(SchemaObject):
162
152
  from pixeltable.catalog import Catalog
163
153
 
164
154
  with Catalog.get().begin_xact(for_write=False):
165
- self._check_is_dropped()
166
155
  return [t._path() for t in self._get_views(recursive=recursive)]
167
156
 
168
- def _get_views(self, *, recursive: bool = True) -> list['Table']:
157
+ def _get_views(self, *, recursive: bool = True, include_snapshots: bool = True) -> list['Table']:
169
158
  cat = catalog.Catalog.get()
170
159
  view_ids = cat.get_view_ids(self._id)
171
160
  views = [cat.get_table_by_id(id) for id in view_ids]
161
+ if not include_snapshots:
162
+ views = [t for t in views if not t._tbl_version_path.is_snapshot()]
172
163
  if recursive:
173
- views.extend([t for view in views for t in view._get_views(recursive=True)])
164
+ views.extend(
165
+ t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
166
+ )
174
167
  return views
175
168
 
176
169
  def _df(self) -> 'pxt.dataframe.DataFrame':
@@ -276,35 +269,32 @@ class Table(SchemaObject):
276
269
  """Return the number of rows in this table."""
277
270
  return self._df().count()
278
271
 
279
- @property
280
272
  def columns(self) -> list[str]:
281
273
  """Return the names of the columns in this table."""
282
274
  cols = self._tbl_version_path.columns()
283
275
  return [c.name for c in cols]
284
276
 
285
- @property
286
- def _schema(self) -> dict[str, ts.ColumnType]:
277
+ def _get_schema(self) -> dict[str, ts.ColumnType]:
287
278
  """Return the schema (column names and column types) of this table."""
288
279
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
289
280
 
290
- @property
291
- def base_table(self) -> Optional['Table']:
292
- with env.Env.get().begin_xact():
293
- return self._base_table
281
+ def get_base_table(self) -> Optional['Table']:
282
+ from pixeltable.catalog import Catalog
283
+
284
+ with Catalog.get().begin_xact(for_write=False):
285
+ return self._get_base_table()
294
286
 
295
- @property
296
287
  @abc.abstractmethod
297
- def _base_table(self) -> Optional['Table']:
298
- """The base's Table instance"""
288
+ def _get_base_table(self) -> Optional['Table']:
289
+ """The base's Table instance. Requires a transaction context"""
299
290
 
300
- @property
301
- def _base_tables(self) -> list['Table']:
302
- """The ancestor list of bases of this table, starting with its immediate base."""
303
- bases = []
304
- base = self._base_table
291
+ def _get_base_tables(self) -> list['Table']:
292
+ """The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
293
+ bases: list[Table] = []
294
+ base = self._get_base_table()
305
295
  while base is not None:
306
296
  bases.append(base)
307
- base = base._base_table
297
+ base = base._get_base_table()
308
298
  return bases
309
299
 
310
300
  @property
@@ -312,17 +302,14 @@ class Table(SchemaObject):
312
302
  def _effective_base_versions(self) -> list[Optional[int]]:
313
303
  """The effective versions of the ancestor bases, starting with its immediate base."""
314
304
 
315
- @property
316
- def _comment(self) -> str:
317
- return self._tbl_version.get().comment
305
+ def _get_comment(self) -> str:
306
+ return self._tbl_version_path.comment()
318
307
 
319
- @property
320
- def _num_retained_versions(self) -> int:
321
- return self._tbl_version.get().num_retained_versions
308
+ def _get_num_retained_versions(self) -> int:
309
+ return self._tbl_version_path.num_retained_versions()
322
310
 
323
- @property
324
- def _media_validation(self) -> MediaValidation:
325
- return self._tbl_version.get().media_validation
311
+ def _get_media_validation(self) -> MediaValidation:
312
+ return self._tbl_version_path.media_validation()
326
313
 
327
314
  def __repr__(self) -> str:
328
315
  return self._descriptors().to_string()
@@ -346,8 +333,8 @@ class Table(SchemaObject):
346
333
  stores = self._external_store_descriptor()
347
334
  if not stores.empty:
348
335
  helper.append(stores)
349
- if self._comment:
350
- helper.append(f'COMMENT: {self._comment}')
336
+ if self._get_comment():
337
+ helper.append(f'COMMENT: {self._get_comment()}')
351
338
  return helper
352
339
 
353
340
  def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
@@ -364,6 +351,8 @@ class Table(SchemaObject):
364
351
  def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
365
352
  from pixeltable import index
366
353
 
354
+ if self._tbl_version is None:
355
+ return pd.DataFrame([])
367
356
  pd_rows = []
368
357
  for name, info in self._tbl_version.get().idxs_by_name.items():
369
358
  if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
@@ -383,7 +372,7 @@ class Table(SchemaObject):
383
372
 
384
373
  def _external_store_descriptor(self) -> pd.DataFrame:
385
374
  pd_rows = []
386
- for name, store in self._tbl_version.get().external_stores.items():
375
+ for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
387
376
  row = {'External Store': name, 'Type': type(store).__name__}
388
377
  pd_rows.append(row)
389
378
  return pd.DataFrame(pd_rows)
@@ -392,7 +381,6 @@ class Table(SchemaObject):
392
381
  """
393
382
  Print the table schema.
394
383
  """
395
- self._check_is_dropped()
396
384
  if getattr(builtins, '__IPYTHON__', False):
397
385
  from IPython.display import Markdown, display
398
386
 
@@ -400,11 +388,6 @@ class Table(SchemaObject):
400
388
  else:
401
389
  print(repr(self))
402
390
 
403
- def _drop(self) -> None:
404
- self._check_is_dropped()
405
- self._tbl_version.get().drop()
406
- self._is_dropped = True
407
-
408
391
  # TODO Factor this out into a separate module.
409
392
  # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
410
393
  def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
@@ -422,9 +405,11 @@ class Table(SchemaObject):
422
405
  def _column_has_dependents(self, col: Column) -> bool:
423
406
  """Returns True if the column has dependents, False otherwise."""
424
407
  assert col is not None
425
- assert col.name in self._schema
426
- if any(c.name is not None for c in col.dependent_cols):
408
+ assert col.name in self._get_schema()
409
+ cat = catalog.Catalog.get()
410
+ if any(c.name is not None for c in cat.get_column_dependents(col.tbl.id, col.id)):
427
411
  return True
412
+ assert self._tbl_version is not None
428
413
  return any(
429
414
  col in store.get_local_columns()
430
415
  for view in (self, *self._get_views(recursive=True))
@@ -436,8 +421,8 @@ class Table(SchemaObject):
436
421
 
437
422
  If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
438
423
  """
439
- assert not self.get_metadata()['is_snapshot']
440
- existing_col_names = set(self._schema.keys())
424
+ assert self._tbl_version is not None
425
+ existing_col_names = set(self._get_schema().keys())
441
426
  cols_to_ignore = []
442
427
  for new_col_name in new_col_names:
443
428
  if new_col_name in existing_col_names:
@@ -507,9 +492,9 @@ class Table(SchemaObject):
507
492
  """
508
493
  from pixeltable.catalog import Catalog
509
494
 
510
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
511
- self._check_is_dropped()
512
- if self.get_metadata()['is_snapshot']:
495
+ # lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
496
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
497
+ if self._tbl_version_path.is_snapshot():
513
498
  raise excs.Error('Cannot add column to a snapshot.')
514
499
  col_schema = {
515
500
  col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
@@ -530,6 +515,7 @@ class Table(SchemaObject):
530
515
  new_cols = self._create_columns(col_schema)
531
516
  for new_col in new_cols:
532
517
  self._verify_column(new_col)
518
+ assert self._tbl_version is not None
533
519
  status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
534
520
  FileCache.get().emit_eviction_warnings()
535
521
  return status
@@ -570,10 +556,9 @@ class Table(SchemaObject):
570
556
  """
571
557
  from pixeltable.catalog import Catalog
572
558
 
573
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
574
- self._check_is_dropped()
559
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
575
560
  # verify kwargs
576
- if self._tbl_version.get().is_snapshot:
561
+ if self._tbl_version_path.is_snapshot():
577
562
  raise excs.Error('Cannot add column to a snapshot.')
578
563
  # verify kwargs and construct column schema dict
579
564
  if len(kwargs) != 1:
@@ -637,9 +622,8 @@ class Table(SchemaObject):
637
622
  """
638
623
  from pixeltable.catalog import Catalog
639
624
 
640
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
641
- self._check_is_dropped()
642
- if self.get_metadata()['is_snapshot']:
625
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
626
+ if self._tbl_version_path.is_snapshot():
643
627
  raise excs.Error('Cannot add column to a snapshot.')
644
628
  if len(kwargs) != 1:
645
629
  raise excs.Error(
@@ -676,6 +660,7 @@ class Table(SchemaObject):
676
660
 
677
661
  new_col = self._create_columns({col_name: col_schema})[0]
678
662
  self._verify_column(new_col)
663
+ assert self._tbl_version is not None
679
664
  status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
680
665
  FileCache.get().emit_eviction_warnings()
681
666
  return status
@@ -822,8 +807,9 @@ class Table(SchemaObject):
822
807
  """
823
808
  from pixeltable.catalog import Catalog
824
809
 
825
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
826
- self._check_is_dropped()
810
+ cat = Catalog.get()
811
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
812
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
827
813
  if self._tbl_version_path.is_snapshot():
828
814
  raise excs.Error('Cannot drop column from a snapshot.')
829
815
  col: Column = None
@@ -846,20 +832,22 @@ class Table(SchemaObject):
846
832
  return
847
833
  col = column.col
848
834
 
849
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
835
+ dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
850
836
  if len(dependent_user_cols) > 0:
851
837
  raise excs.Error(
852
838
  f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
853
839
  f'{", ".join(c.name for c in dependent_user_cols)}'
854
840
  )
855
841
 
842
+ _ = self._get_views(recursive=True, include_snapshots=False)
856
843
  # See if this column has a dependent store. We need to look through all stores in all
857
844
  # (transitive) views of this table.
845
+ col_handle = col.handle
858
846
  dependent_stores = [
859
847
  (view, store)
860
- for view in (self, *self._get_views(recursive=True))
848
+ for view in (self, *self._get_views(recursive=True, include_snapshots=False))
861
849
  for store in view._tbl_version.get().external_stores.values()
862
- if col in store.get_local_columns()
850
+ if col_handle in store.get_local_columns()
863
851
  ]
864
852
  if len(dependent_stores) > 0:
865
853
  dependent_store_names = [
@@ -891,7 +879,7 @@ class Table(SchemaObject):
891
879
  """
892
880
  from pixeltable.catalog import Catalog
893
881
 
894
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
882
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
895
883
  self._tbl_version.get().rename_column(old_name, new_name)
896
884
 
897
885
  def _list_index_info_for_test(self) -> list[dict[str, Any]]:
@@ -902,7 +890,6 @@ class Table(SchemaObject):
902
890
  A list of index information, each containing the index's
903
891
  id, name, and the name of the column it indexes.
904
892
  """
905
- assert not self._is_dropped
906
893
  index_info = []
907
894
  for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
908
895
  index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
@@ -1001,7 +988,7 @@ class Table(SchemaObject):
1001
988
  """
1002
989
  from pixeltable.catalog import Catalog
1003
990
 
1004
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
991
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1005
992
  if self._tbl_version_path.is_snapshot():
1006
993
  raise excs.Error('Cannot add an index to a snapshot')
1007
994
  col = self._resolve_column_parameter(column)
@@ -1090,7 +1077,7 @@ class Table(SchemaObject):
1090
1077
  if (column is None) == (idx_name is None):
1091
1078
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1092
1079
 
1093
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1080
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1094
1081
  col: Column = None
1095
1082
  if idx_name is None:
1096
1083
  col = self._resolve_column_parameter(column)
@@ -1169,7 +1156,7 @@ class Table(SchemaObject):
1169
1156
  if (column is None) == (idx_name is None):
1170
1157
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1171
1158
 
1172
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1159
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1173
1160
  col: Column = None
1174
1161
  if idx_name is None:
1175
1162
  col = self._resolve_column_parameter(column)
@@ -1185,6 +1172,8 @@ class Table(SchemaObject):
1185
1172
  _idx_class: Optional[type[index.IndexBase]] = None,
1186
1173
  if_not_exists: Literal['error', 'ignore'] = 'error',
1187
1174
  ) -> None:
1175
+ from pixeltable.catalog import Catalog
1176
+
1188
1177
  if self._tbl_version_path.is_snapshot():
1189
1178
  raise excs.Error('Cannot drop an index from a snapshot')
1190
1179
  assert (col is None) != (idx_name is None)
@@ -1216,7 +1205,10 @@ class Table(SchemaObject):
1216
1205
  idx_info = idx_info_list[0]
1217
1206
 
1218
1207
  # Find out if anything depends on this index
1219
- dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1208
+ val_col = idx_info.val_col
1209
+ dependent_user_cols = [
1210
+ c for c in Catalog.get().get_column_dependents(val_col.tbl.id, val_col.id) if c.name is not None
1211
+ ]
1220
1212
  if len(dependent_user_cols) > 0:
1221
1213
  raise excs.Error(
1222
1214
  f'Cannot drop index because the following columns depend on it:\n'
@@ -1332,6 +1324,9 @@ class Table(SchemaObject):
1332
1324
  where: a predicate to filter rows to update.
1333
1325
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
1334
1326
 
1327
+ Returns:
1328
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
1329
+
1335
1330
  Examples:
1336
1331
  Set column `int_col` to 1 for all rows:
1337
1332
 
@@ -1351,7 +1346,9 @@ class Table(SchemaObject):
1351
1346
  """
1352
1347
  from pixeltable.catalog import Catalog
1353
1348
 
1354
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1349
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1350
+ if self._tbl_version_path.is_snapshot():
1351
+ raise excs.Error('Cannot update a snapshot')
1355
1352
  status = self._tbl_version.get().update(value_spec, where, cascade)
1356
1353
  FileCache.get().emit_eviction_warnings()
1357
1354
  return status
@@ -1389,7 +1386,7 @@ class Table(SchemaObject):
1389
1386
  """
1390
1387
  from pixeltable.catalog import Catalog
1391
1388
 
1392
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1389
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1393
1390
  if self._tbl_version_path.is_snapshot():
1394
1391
  raise excs.Error('Cannot update a snapshot')
1395
1392
  rows = list(rows)
@@ -1428,6 +1425,69 @@ class Table(SchemaObject):
1428
1425
  FileCache.get().emit_eviction_warnings()
1429
1426
  return status
1430
1427
 
1428
+ def recompute_columns(
1429
+ self, *columns: Union[str, ColumnRef], errors_only: bool = False, cascade: bool = True
1430
+ ) -> UpdateStatus:
1431
+ """Recompute the values in one or more computed columns of this table.
1432
+
1433
+ Args:
1434
+ columns: The names or references of the computed columns to recompute.
1435
+ errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1436
+ `errortype` property is non-None). Only allowed for recomputing a single column.
1437
+ cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
1438
+
1439
+ Examples:
1440
+ Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
1441
+ depends on them:
1442
+
1443
+ >>> tbl.recompute_columns('c1', 'c2')
1444
+
1445
+ Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
1446
+ it:
1447
+
1448
+ >>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
1449
+
1450
+ Recompute column `c1` and its dependents, but only for rows that have errors in it:
1451
+
1452
+ >>> tbl.recompute_columns('c1', errors_only=True)
1453
+ """
1454
+ from pixeltable.catalog import Catalog
1455
+
1456
+ cat = Catalog.get()
1457
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
1458
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1459
+ if self._tbl_version_path.is_snapshot():
1460
+ raise excs.Error('Cannot recompute columns of a snapshot.')
1461
+ if len(columns) == 0:
1462
+ raise excs.Error('At least one column must be specified to recompute')
1463
+ if errors_only and len(columns) > 1:
1464
+ raise excs.Error('Cannot use errors_only=True with multiple columns')
1465
+
1466
+ col_names: list[str] = []
1467
+ for column in columns:
1468
+ col_name: str
1469
+ col: Column
1470
+ if isinstance(column, str):
1471
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1472
+ if col is None:
1473
+ raise excs.Error(f'Unknown column: {column!r}')
1474
+ col_name = column
1475
+ else:
1476
+ assert isinstance(column, ColumnRef)
1477
+ col = column.col
1478
+ if not self._tbl_version_path.has_column(col, include_bases=True):
1479
+ raise excs.Error(f'Unknown column: {col.name!r}')
1480
+ col_name = col.name
1481
+ if not col.is_computed:
1482
+ raise excs.Error(f'Column {col_name!r} is not a computed column')
1483
+ if col.tbl.id != self._tbl_version_path.tbl_id:
1484
+ raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
1485
+ col_names.append(col_name)
1486
+
1487
+ status = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
1488
+ FileCache.get().emit_eviction_warnings()
1489
+ return status
1490
+
1431
1491
  def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1432
1492
  """Delete rows in this table.
1433
1493
 
@@ -1453,14 +1513,13 @@ class Table(SchemaObject):
1453
1513
  """
1454
1514
  from pixeltable.catalog import Catalog
1455
1515
 
1456
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1516
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1457
1517
  if self._tbl_version_path.is_snapshot():
1458
1518
  raise excs.Error('Cannot revert a snapshot')
1459
1519
  self._tbl_version.get().revert()
1460
1520
  # remove cached md in order to force a reload on the next operation
1461
- self.__tbl_version_path.clear_cached_md()
1521
+ self._tbl_version_path.clear_cached_md()
1462
1522
 
1463
- @property
1464
1523
  def external_stores(self) -> list[str]:
1465
1524
  return list(self._tbl_version.get().external_stores.keys())
1466
1525
 
@@ -1470,10 +1529,10 @@ class Table(SchemaObject):
1470
1529
  """
1471
1530
  from pixeltable.catalog import Catalog
1472
1531
 
1473
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1474
- if self._tbl_version.get().is_snapshot:
1532
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1533
+ if self._tbl_version_path.is_snapshot():
1475
1534
  raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1476
- if store.name in self.external_stores:
1535
+ if store.name in self.external_stores():
1477
1536
  raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1478
1537
  _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
1479
1538
 
@@ -1501,9 +1560,10 @@ class Table(SchemaObject):
1501
1560
  """
1502
1561
  from pixeltable.catalog import Catalog
1503
1562
 
1504
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1505
- self._check_is_dropped()
1506
- all_stores = self.external_stores
1563
+ if self._tbl_version_path.is_snapshot():
1564
+ return
1565
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1566
+ all_stores = self.external_stores()
1507
1567
 
1508
1568
  if stores is None:
1509
1569
  stores = all_stores
@@ -1540,9 +1600,13 @@ class Table(SchemaObject):
1540
1600
  """
1541
1601
  from pixeltable.catalog import Catalog
1542
1602
 
1543
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1544
- self._check_is_dropped()
1545
- all_stores = self.external_stores
1603
+ if self._tbl_version_path.is_snapshot():
1604
+ return pxt.io.SyncStatus()
1605
+ # we lock the entire tree starting at the root base table in order to ensure that all synced columns can
1606
+ # have their updates propagated down the tree
1607
+ base_tv = self._tbl_version_path.get_tbl_versions()[-1]
1608
+ with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
1609
+ all_stores = self.external_stores()
1546
1610
 
1547
1611
  if stores is None:
1548
1612
  stores = all_stores
@@ -1553,16 +1617,78 @@ class Table(SchemaObject):
1553
1617
  if store not in all_stores:
1554
1618
  raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1555
1619
 
1556
- sync_status = pxt.io.SyncStatus.empty()
1620
+ sync_status = pxt.io.SyncStatus()
1557
1621
  for store in stores:
1558
1622
  store_obj = self._tbl_version.get().external_stores[store]
1559
1623
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
1560
- sync_status = sync_status.combine(store_sync_status)
1624
+ sync_status += store_sync_status
1561
1625
 
1562
1626
  return sync_status
1563
1627
 
1564
1628
  def __dir__(self) -> list[str]:
1565
- return list(super().__dir__()) + list(self._schema.keys())
1629
+ return list(super().__dir__()) + list(self._get_schema().keys())
1566
1630
 
1567
1631
  def _ipython_key_completions_(self) -> list[str]:
1568
- return list(self._schema.keys())
1632
+ return list(self._get_schema().keys())
1633
+
1634
+ def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
1635
+ """Returns rows of information about the versions of this table, most recent first.
1636
+
1637
+ Args:
1638
+ n: a limit to the number of versions listed
1639
+
1640
+ Examples:
1641
+ Report history:
1642
+
1643
+ >>> tbl.history()
1644
+
1645
+ Report only the most recent 5 changes to the table:
1646
+
1647
+ >>> tbl.history(n=5)
1648
+
1649
+ Returns:
1650
+ A list of information about each version, ordered from most recent to oldest version.
1651
+ """
1652
+ from pixeltable.catalog import Catalog
1653
+
1654
+ if n is None:
1655
+ n = 1000_000_000
1656
+ if not isinstance(n, int) or n < 1:
1657
+ raise excs.Error(f'Invalid value for n: {n}')
1658
+
1659
+ # Retrieve the table history components from the catalog
1660
+ tbl_id = self._id
1661
+ # Collect an extra version, if available, to allow for computation of the first version's schema change
1662
+ vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
1663
+
1664
+ # Construct the metadata change description dictionary
1665
+ md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
1666
+ md_dict = MetadataUtils._create_md_change_dict(md_list)
1667
+
1668
+ # Construct report lines
1669
+ if len(vers_list) > n:
1670
+ assert len(vers_list) == n + 1
1671
+ over_count = 1
1672
+ else:
1673
+ over_count = 0
1674
+
1675
+ report_lines: list[list[Any]] = []
1676
+ for vers_md in vers_list[0 : len(vers_list) - over_count]:
1677
+ version = vers_md.version_md.version
1678
+ schema_change = md_dict.get(version, '')
1679
+ change_type = 'schema' if schema_change != '' else 'data'
1680
+ report_line = [
1681
+ version,
1682
+ datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
1683
+ change_type,
1684
+ schema_change,
1685
+ ]
1686
+ report_lines.append(report_line)
1687
+
1688
+ report_schema = {
1689
+ 'version': ts.IntType(),
1690
+ 'created_at': ts.TimestampType(),
1691
+ 'change': ts.StringType(),
1692
+ 'schema_change': ts.StringType(),
1693
+ }
1694
+ return pxt.dataframe.DataFrameResultSet(report_lines, report_schema)