pixeltable 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show
  1. pixeltable/__init__.py +2 -27
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +309 -59
  4. pixeltable/catalog/globals.py +5 -5
  5. pixeltable/catalog/insertable_table.py +13 -1
  6. pixeltable/catalog/path.py +13 -6
  7. pixeltable/catalog/table.py +28 -41
  8. pixeltable/catalog/table_version.py +100 -72
  9. pixeltable/catalog/view.py +35 -9
  10. pixeltable/dataframe.py +2 -2
  11. pixeltable/exceptions.py +20 -2
  12. pixeltable/exec/expr_eval/evaluators.py +0 -4
  13. pixeltable/exec/expr_eval/expr_eval_node.py +0 -1
  14. pixeltable/exec/sql_node.py +3 -3
  15. pixeltable/exprs/json_path.py +1 -5
  16. pixeltable/func/__init__.py +1 -1
  17. pixeltable/func/aggregate_function.py +1 -1
  18. pixeltable/func/callable_function.py +1 -1
  19. pixeltable/func/expr_template_function.py +2 -2
  20. pixeltable/func/function.py +3 -4
  21. pixeltable/func/query_template_function.py +87 -4
  22. pixeltable/func/tools.py +1 -1
  23. pixeltable/func/udf.py +1 -1
  24. pixeltable/functions/__init__.py +1 -0
  25. pixeltable/functions/anthropic.py +1 -1
  26. pixeltable/functions/bedrock.py +130 -0
  27. pixeltable/functions/huggingface.py +7 -6
  28. pixeltable/functions/image.py +15 -16
  29. pixeltable/functions/mistralai.py +3 -2
  30. pixeltable/functions/openai.py +9 -8
  31. pixeltable/functions/together.py +4 -3
  32. pixeltable/globals.py +7 -2
  33. pixeltable/io/datarows.py +4 -3
  34. pixeltable/io/label_studio.py +17 -17
  35. pixeltable/io/pandas.py +13 -12
  36. pixeltable/io/table_data_conduit.py +8 -2
  37. pixeltable/metadata/__init__.py +1 -1
  38. pixeltable/metadata/converters/convert_19.py +2 -2
  39. pixeltable/metadata/converters/convert_31.py +11 -0
  40. pixeltable/metadata/converters/convert_32.py +15 -0
  41. pixeltable/metadata/converters/convert_33.py +17 -0
  42. pixeltable/metadata/notes.py +3 -0
  43. pixeltable/metadata/schema.py +26 -1
  44. pixeltable/plan.py +2 -3
  45. pixeltable/share/packager.py +9 -25
  46. pixeltable/share/publish.py +20 -9
  47. pixeltable/store.py +7 -4
  48. pixeltable/utils/exception_handler.py +59 -0
  49. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/METADATA +1 -1
  50. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/RECORD +53 -48
  51. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/WHEEL +1 -1
  52. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/LICENSE +0 -0
  53. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/entry_points.txt +0 -0
@@ -11,8 +11,8 @@ _logger = logging.getLogger('pixeltable')
11
11
 
12
12
 
13
13
  class Path:
14
- def __init__(self, path: str, empty_is_valid: bool = False):
15
- if not is_valid_path(path, empty_is_valid):
14
+ def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
15
+ if not is_valid_path(path, empty_is_valid, allow_system_paths):
16
16
  raise excs.Error(f"Invalid path format: '{path}'")
17
17
  self.components = path.split('.')
18
18
 
@@ -29,21 +29,25 @@ class Path:
29
29
  def is_root(self) -> bool:
30
30
  return not self.components[0]
31
31
 
32
+ @property
33
+ def is_system_path(self) -> bool:
34
+ return self.components[0].startswith('_')
35
+
32
36
  @property
33
37
  def parent(self) -> Path:
34
38
  if len(self.components) == 1:
35
39
  if self.is_root:
36
40
  return self
37
41
  else:
38
- return Path('', empty_is_valid=True)
42
+ return Path('', empty_is_valid=True, allow_system_paths=True)
39
43
  else:
40
- return Path('.'.join(self.components[:-1]))
44
+ return Path('.'.join(self.components[:-1]), allow_system_paths=True)
41
45
 
42
46
  def append(self, name: str) -> Path:
43
47
  if self.is_root:
44
- return Path(name)
48
+ return Path(name, allow_system_paths=True)
45
49
  else:
46
- return Path(f'{self!s}.{name}')
50
+ return Path(f'{self}.{name}', allow_system_paths=True)
47
51
 
48
52
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
49
53
  """
@@ -67,6 +71,9 @@ class Path:
67
71
  for i in range(0, len(self.components)):
68
72
  yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
69
73
 
74
+ def __repr__(self) -> str:
75
+ return repr(str(self))
76
+
70
77
  def __str__(self) -> str:
71
78
  return '.'.join(self.components)
72
79
 
@@ -95,6 +95,7 @@ class Table(SchemaObject):
95
95
  'col1': StringType(),
96
96
  'col2': IntType(),
97
97
  },
98
+ 'is_replica': False,
98
99
  'version': 22,
99
100
  'schema_version': 1,
100
101
  'comment': '',
@@ -108,8 +109,9 @@ class Table(SchemaObject):
108
109
  self._check_is_dropped()
109
110
  with env.Env.get().begin_xact():
110
111
  md = super().get_metadata()
111
- md['base'] = self._base._path() if self._base is not None else None
112
+ md['base'] = self._base_table._path() if self._base_table is not None else None
112
113
  md['schema'] = self._schema
114
+ md['is_replica'] = self._tbl_version.get().is_replica
113
115
  md['version'] = self._version
114
116
  md['schema_version'] = self._tbl_version.get().schema_version
115
117
  md['comment'] = self._comment
@@ -139,14 +141,14 @@ class Table(SchemaObject):
139
141
  if self._is_dropped:
140
142
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
141
143
 
142
- def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
144
+ def __getattr__(self, name: str) -> 'exprs.ColumnRef':
143
145
  """Return a ColumnRef for the given name."""
144
146
  col = self._tbl_version_path.get_column(name)
145
147
  if col is None:
146
148
  raise AttributeError(f'Column {name!r} unknown')
147
149
  return ColumnRef(col)
148
150
 
149
- def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef':
151
+ def __getitem__(self, name: str) -> 'exprs.ColumnRef':
150
152
  """Return a ColumnRef for the given name."""
151
153
  return getattr(self, name)
152
154
 
@@ -253,28 +255,27 @@ class Table(SchemaObject):
253
255
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
254
256
 
255
257
  @property
256
- def _base(self) -> Optional['Table']:
257
- """
258
- The base table of this `Table`. If this table is a view, returns the `Table`
259
- from which it was derived. Otherwise, returns `None`.
260
- """
261
- if self._tbl_version_path.base is None:
262
- return None
263
- base_id = self._tbl_version_path.base.tbl_version.id
264
- return catalog.Catalog.get().get_table_by_id(base_id)
258
+ @abc.abstractmethod
259
+ def _base_table(self) -> Optional['Table']:
260
+ """The base's Table instance"""
261
+ ...
265
262
 
266
263
  @property
267
- def _bases(self) -> list['Table']:
268
- """
269
- The ancestor list of bases of this table, starting with its immediate base.
270
- """
264
+ def _base_tables(self) -> list['Table']:
265
+ """The ancestor list of bases of this table, starting with its immediate base."""
271
266
  bases = []
272
- base = self._base
267
+ base = self._base_table
273
268
  while base is not None:
274
269
  bases.append(base)
275
- base = base._base
270
+ base = base._base_table
276
271
  return bases
277
272
 
273
+ @property
274
+ @abc.abstractmethod
275
+ def _effective_base_versions(self) -> list[Optional[int]]:
276
+ """The effective versions of the ancestor bases, starting with its immediate base."""
277
+ ...
278
+
278
279
  @property
279
280
  def _comment(self) -> str:
280
281
  return self._tbl_version.get().comment
@@ -298,7 +299,7 @@ class Table(SchemaObject):
298
299
  Constructs a list of descriptors for this table that can be pretty-printed.
299
300
  """
300
301
  helper = DescriptionHelper()
301
- helper.append(self._title_descriptor())
302
+ helper.append(self._table_descriptor())
302
303
  helper.append(self._col_descriptor())
303
304
  idxs = self._index_descriptor()
304
305
  if not idxs.empty:
@@ -310,14 +311,8 @@ class Table(SchemaObject):
310
311
  helper.append(f'COMMENT: {self._comment}')
311
312
  return helper
312
313
 
313
- def _title_descriptor(self) -> str:
314
- title: str
315
- if self._base is None:
316
- title = f'Table\n{self._path()!r}'
317
- else:
318
- title = f'View\n{self._path()!r}'
319
- title += f'\n(of {self.__bases_to_desc()})'
320
- return title
314
+ @abc.abstractmethod
315
+ def _table_descriptor(self) -> str: ...
321
316
 
322
317
  def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
323
318
  return pd.DataFrame(
@@ -330,14 +325,6 @@ class Table(SchemaObject):
330
325
  if columns is None or col.name in columns
331
326
  )
332
327
 
333
- def __bases_to_desc(self) -> str:
334
- bases = self._bases
335
- assert len(bases) >= 1
336
- if len(bases) <= 2:
337
- return ', '.join(repr(b._path()) for b in bases)
338
- else:
339
- return f'{bases[0]._path()!r}, ..., {bases[-1]._path()!r}'
340
-
341
328
  def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
342
329
  from pixeltable import index
343
330
 
@@ -371,9 +358,9 @@ class Table(SchemaObject):
371
358
  """
372
359
  self._check_is_dropped()
373
360
  if getattr(builtins, '__IPYTHON__', False):
374
- from IPython.display import display
361
+ from IPython.display import Markdown, display
375
362
 
376
- display(self._repr_html_())
363
+ display(Markdown(self._repr_html_()))
377
364
  else:
378
365
  print(repr(self))
379
366
 
@@ -689,7 +676,7 @@ class Table(SchemaObject):
689
676
  for name, spec in schema.items():
690
677
  col_type: Optional[ts.ColumnType] = None
691
678
  value_expr: Optional[exprs.Expr] = None
692
- primary_key: Optional[bool] = None
679
+ primary_key: bool = False
693
680
  media_validation: Optional[catalog.MediaValidation] = None
694
681
  stored = True
695
682
 
@@ -711,7 +698,7 @@ class Table(SchemaObject):
711
698
  value_expr = value_expr.copy()
712
699
  value_expr.bind_rel_paths()
713
700
  stored = spec.get('stored', True)
714
- primary_key = spec.get('primary_key')
701
+ primary_key = spec.get('primary_key', False)
715
702
  media_validation_str = spec.get('media_validation')
716
703
  media_validation = (
717
704
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
@@ -1282,7 +1269,7 @@ class Table(SchemaObject):
1282
1269
  raise NotImplementedError
1283
1270
 
1284
1271
  def update(
1285
- self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
1272
+ self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
1286
1273
  ) -> UpdateStatus:
1287
1274
  """Update rows in this table.
1288
1275
 
@@ -1383,7 +1370,7 @@ class Table(SchemaObject):
1383
1370
  FileCache.get().emit_eviction_warnings()
1384
1371
  return status
1385
1372
 
1386
- def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
1373
+ def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1387
1374
  """Delete rows in this table.
1388
1375
 
1389
1376
  Args:
@@ -5,7 +5,7 @@ import importlib
5
5
  import logging
6
6
  import time
7
7
  import uuid
8
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
8
+ from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
9
9
  from uuid import UUID
10
10
 
11
11
  import jsonschema.exceptions
@@ -18,6 +18,7 @@ from pixeltable import exprs, index
18
18
  from pixeltable.env import Env
19
19
  from pixeltable.iterators import ComponentIterator
20
20
  from pixeltable.metadata import schema
21
+ from pixeltable.utils.exception_handler import run_cleanup_on_exception
21
22
  from pixeltable.utils.filecache import FileCache
22
23
  from pixeltable.utils.media_store import MediaStore
23
24
 
@@ -55,6 +56,7 @@ class TableVersion:
55
56
  name: str
56
57
  user: Optional[str]
57
58
  effective_version: Optional[int]
59
+ is_replica: bool
58
60
  version: int
59
61
  comment: str
60
62
  media_validation: MediaValidation
@@ -111,6 +113,7 @@ class TableVersion:
111
113
  self.user = tbl_md.user
112
114
  self.effective_version = effective_version
113
115
  self.version = tbl_md.current_version if effective_version is None else effective_version
116
+ self.is_replica = tbl_md.is_replica
114
117
  self.comment = schema_version_md.comment
115
118
  self.num_retained_versions = schema_version_md.num_retained_versions
116
119
  self.schema_version = schema_version_md.schema_version
@@ -232,6 +235,7 @@ class TableVersion:
232
235
  tbl_id=str(tbl_id),
233
236
  name=name,
234
237
  user=user,
238
+ is_replica=False,
235
239
  current_version=0,
236
240
  current_schema_version=0,
237
241
  next_col_id=len(cols),
@@ -310,24 +314,16 @@ class TableVersion:
310
314
  session.add(schema_version_record)
311
315
  return tbl_record.id, tbl_version
312
316
 
313
- @classmethod
314
- def delete_md(cls, tbl_id: UUID) -> None:
315
- conn = Env.get().conn
316
- conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
317
- conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
318
- conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
319
-
320
317
  def drop(self) -> None:
318
+ from .catalog import Catalog
319
+
320
+ cat = Catalog.get()
321
321
  # delete this table and all associated data
322
322
  MediaStore.delete(self.id)
323
323
  FileCache.get().clear(tbl_id=self.id)
324
- self.delete_md(self.id)
324
+ cat.delete_tbl_md(self.id)
325
325
  self.store_tbl.drop()
326
-
327
326
  # de-register table version from catalog
328
- from .catalog import Catalog
329
-
330
- cat = Catalog.get()
331
327
  cat.remove_tbl_version(self)
332
328
 
333
329
  def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
@@ -381,7 +377,7 @@ class TableVersion:
381
377
 
382
378
  # make sure to traverse columns ordered by position = order in which cols were created;
383
379
  # this guarantees that references always point backwards
384
- if col_md.value_expr is not None:
380
+ if not self.is_snapshot and col_md.value_expr is not None:
385
381
  self._record_refd_columns(col)
386
382
 
387
383
  def _init_idxs(self, tbl_md: schema.TableMd) -> None:
@@ -437,29 +433,15 @@ class TableVersion:
437
433
  specified preceding schema version
438
434
  """
439
435
  assert update_tbl_version or preceding_schema_version is None
436
+ from pixeltable.catalog import Catalog
440
437
 
441
- conn = Env.get().conn
442
- conn.execute(
443
- sql.update(schema.Table.__table__)
444
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
445
- .where(schema.Table.id == self.id)
438
+ tbl_md = self._create_tbl_md()
439
+ version_md = self._create_version_md(timestamp) if update_tbl_version else None
440
+ schema_version_md = (
441
+ self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
446
442
  )
447
443
 
448
- if update_tbl_version:
449
- version_md = self._create_version_md(timestamp)
450
- conn.execute(
451
- sql.insert(schema.TableVersion.__table__).values(
452
- tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)
453
- )
454
- )
455
-
456
- if preceding_schema_version is not None:
457
- schema_version_md = self._create_schema_version_md(preceding_schema_version)
458
- conn.execute(
459
- sql.insert(schema.TableSchemaVersion.__table__).values(
460
- tbl_id=self.id, schema_version=self.schema_version, md=dataclasses.asdict(schema_version_md)
461
- )
462
- )
444
+ Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
463
445
 
464
446
  def ensure_md_loaded(self) -> None:
465
447
  """Ensure that table metadata is loaded."""
@@ -480,33 +462,36 @@ class TableVersion:
480
462
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
481
463
  return status
482
464
 
483
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
484
- """Add a B-tree index on this column if it has a compatible type"""
465
+ def _is_btree_indexable(self, col: Column) -> bool:
485
466
  if not col.stored:
486
467
  # if the column is intentionally not stored, we want to avoid the overhead of an index
487
- return None
468
+ return False
488
469
  # Skip index for stored media columns produced by an iterator
489
470
  if col.col_type.is_media_type() and self.is_iterator_column(col):
490
- return None
471
+ return False
491
472
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
492
473
  # wrong type for a B-tree
493
- return None
494
- if col.col_type.is_bool_type():
474
+ return False
475
+ if col.col_type.is_bool_type(): # noqa : SIM103 Supress `Return the negated condition directly` check
495
476
  # B-trees on bools aren't useful
477
+ return False
478
+ return True
479
+
480
+ def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
481
+ """Add a B-tree index on this column if it has a compatible type"""
482
+ if not self._is_btree_indexable(col):
496
483
  return None
497
484
  status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
498
485
  return status
499
486
 
500
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
487
+ def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
488
+ """Create value and undo columns for the given index.
489
+ Args:
490
+ idx: index for which columns will be created.
491
+ Returns:
492
+ A tuple containing the value column and the undo column.
493
+ """
501
494
  assert not self.is_snapshot
502
- idx_id = self.next_idx_id
503
- self.next_idx_id += 1
504
- if idx_name is None:
505
- idx_name = f'idx{idx_id}'
506
- else:
507
- assert is_valid_identifier(idx_name)
508
- assert idx_name not in [i.name for i in self.idx_md.values()]
509
-
510
495
  # add the index value and undo columns (which need to be nullable)
511
496
  val_col = Column(
512
497
  col_id=self.next_col_id,
@@ -535,7 +520,19 @@ class TableVersion:
535
520
  undo_col.tbl = self.create_handle()
536
521
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
537
522
  self.next_col_id += 1
523
+ return val_col, undo_col
538
524
 
525
+ def _create_index(
526
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
527
+ ) -> None:
528
+ """Create the given index along with index md"""
529
+ idx_id = self.next_idx_id
530
+ self.next_idx_id += 1
531
+ if idx_name is None:
532
+ idx_name = f'idx{idx_id}'
533
+ else:
534
+ assert is_valid_identifier(idx_name)
535
+ assert idx_name not in [i.name for i in self.idx_md.values()]
539
536
  # create and register the index metadata
540
537
  idx_cls = type(idx)
541
538
  idx_md = schema.IndexMd(
@@ -553,14 +550,27 @@ class TableVersion:
553
550
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
554
551
  self.idx_md[idx_id] = idx_md
555
552
  self.idxs_by_name[idx_name] = idx_info
553
+ try:
554
+ idx.create_index(self._store_idx_name(idx_id), val_col)
555
+ finally:
556
556
 
557
+ def cleanup_index() -> None:
558
+ """Delete the newly added in-memory index structure"""
559
+ del self.idxs_by_name[idx_name]
560
+ del self.idx_md[idx_id]
561
+ self.next_idx_id = idx_id
562
+
563
+ # Run cleanup only if there has been an exception; otherwise, skip cleanup.
564
+ run_cleanup_on_exception(cleanup_index)
565
+
566
+ def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
567
+ val_col, undo_vol = self._create_index_columns(idx)
557
568
  # add the columns and update the metadata
558
569
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
559
570
  # with the database operations
560
- status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
571
+ status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
561
572
  # now create the index structure
562
- idx.create_index(self._store_idx_name(idx_id), val_col)
563
-
573
+ self._create_index(col, val_col, undo_vol, idx_name, idx)
564
574
  return status
565
575
 
566
576
  def drop_index(self, idx_id: int) -> None:
@@ -601,9 +611,21 @@ class TableVersion:
601
611
  self.version += 1
602
612
  preceding_schema_version = self.schema_version
603
613
  self.schema_version = self.version
604
- status = self._add_columns(cols, print_stats=print_stats, on_error=on_error)
614
+ index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
615
+ all_cols: list[Column] = []
605
616
  for col in cols:
606
- _ = self._add_default_index(col)
617
+ all_cols.append(col)
618
+ if self._is_btree_indexable(col):
619
+ idx = index.BtreeIndex(col)
620
+ val_col, undo_col = self._create_index_columns(idx)
621
+ index_cols[col] = (idx, val_col, undo_col)
622
+ all_cols.append(val_col)
623
+ all_cols.append(undo_col)
624
+ # Add all columns
625
+ status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
626
+ # Create indices and their mds
627
+ for col, (idx, val_col, undo_col) in index_cols.items():
628
+ self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
607
629
  self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
608
630
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
609
631
 
@@ -619,9 +641,9 @@ class TableVersion:
619
641
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
620
642
  ) -> UpdateStatus:
621
643
  """Add and populate columns within the current transaction"""
622
- cols = list(cols)
644
+ cols_to_add = list(cols)
623
645
  row_count = self.store_tbl.count()
624
- for col in cols:
646
+ for col in cols_to_add:
625
647
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
626
648
  raise excs.Error(
627
649
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
@@ -629,7 +651,8 @@ class TableVersion:
629
651
 
630
652
  num_excs = 0
631
653
  cols_with_excs: list[Column] = []
632
- for col in cols:
654
+ for col in cols_to_add:
655
+ excs_per_col = 0
633
656
  col.schema_version_add = self.schema_version
634
657
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
635
658
  # because it might be referenced by the next column's value_expr
@@ -652,29 +675,32 @@ class TableVersion:
652
675
 
653
676
  plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
654
677
  plan.ctx.num_rows = row_count
655
-
656
678
  try:
657
679
  plan.open()
658
680
  try:
659
- num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
681
+ excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
660
682
  except sql.exc.DBAPIError as exc:
661
683
  # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
662
684
  raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
663
- if num_excs > 0:
685
+ if excs_per_col > 0:
664
686
  cols_with_excs.append(col)
665
- except excs.Error as exc:
666
- self.cols.pop()
667
- for c in cols:
668
- # remove columns that we already added
669
- if c.id not in self.cols_by_id:
670
- continue
671
- if c.name is not None:
672
- del self.cols_by_name[c.name]
673
- del self.cols_by_id[c.id]
674
- # we need to re-initialize the sqlalchemy schema
675
- self.store_tbl.create_sa_tbl()
676
- raise exc
687
+ num_excs += excs_per_col
677
688
  finally:
689
+ # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
690
+ def cleanup_on_error() -> None:
691
+ """Delete columns that are added as part of current add_columns operation and re-initialize
692
+ the sqlalchemy schema"""
693
+ self.cols = [col for col in self.cols if col not in cols_to_add]
694
+ for col in cols_to_add:
695
+ # remove columns that we already added
696
+ if col.id in self.cols_by_id:
697
+ del self.cols_by_id[col.id]
698
+ if col.name is not None and col.name in self.cols_by_name:
699
+ del self.cols_by_name[col.name]
700
+ self.store_tbl.create_sa_tbl()
701
+
702
+ # Run cleanup only if there has been an exception; otherwise, skip cleanup.
703
+ run_cleanup_on_exception(cleanup_on_error)
678
704
  plan.close()
679
705
 
680
706
  if print_stats:
@@ -1298,6 +1324,7 @@ class TableVersion:
1298
1324
  column_md: dict[int, schema.ColumnMd] = {}
1299
1325
  for col in cols:
1300
1326
  value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1327
+ assert col.is_pk is not None
1301
1328
  column_md[col.id] = schema.ColumnMd(
1302
1329
  id=col.id,
1303
1330
  col_type=col.col_type.as_dict(),
@@ -1320,6 +1347,7 @@ class TableVersion:
1320
1347
  tbl_id=str(self.id),
1321
1348
  name=self.name,
1322
1349
  user=self.user,
1350
+ is_replica=self.is_replica,
1323
1351
  current_version=self.version,
1324
1352
  current_schema_version=self.schema_version,
1325
1353
  next_col_id=self.next_col_id,
@@ -8,7 +8,7 @@ from uuid import UUID
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.metadata.schema as md_schema
10
10
  import pixeltable.type_system as ts
11
- from pixeltable import exprs, func
11
+ from pixeltable import catalog, exprs, func
12
12
  from pixeltable.env import Env
13
13
  from pixeltable.iterators import ComponentIterator
14
14
 
@@ -20,7 +20,7 @@ from .table_version_handle import TableVersionHandle
20
20
  from .table_version_path import TableVersionPath
21
21
 
22
22
  if TYPE_CHECKING:
23
- import pixeltable as pxt
23
+ from pixeltable.globals import TableDataSource
24
24
 
25
25
  _logger = logging.getLogger('pixeltable')
26
26
 
@@ -65,7 +65,7 @@ class View(Table):
65
65
  base: TableVersionPath,
66
66
  select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
67
67
  additional_columns: dict[str, Any],
68
- predicate: Optional['pxt.exprs.Expr'],
68
+ predicate: Optional['exprs.Expr'],
69
69
  is_snapshot: bool,
70
70
  num_retained_versions: int,
71
71
  comment: str,
@@ -242,7 +242,7 @@ class View(Table):
242
242
  # there is not TableVersion to drop
243
243
  self._check_is_dropped()
244
244
  self.is_dropped = True
245
- TableVersion.delete_md(self._id)
245
+ catalog.Catalog.get().delete_tbl_md(self._id)
246
246
  else:
247
247
  super()._drop()
248
248
 
@@ -252,11 +252,6 @@ class View(Table):
252
252
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
253
253
  return md
254
254
 
255
- if TYPE_CHECKING:
256
- import datasets # type: ignore[import-untyped]
257
-
258
- from pixeltable.globals import RowData, TableDataSource
259
-
260
255
  def insert(
261
256
  self,
262
257
  source: Optional[TableDataSource] = None,
@@ -272,3 +267,34 @@ class View(Table):
272
267
 
273
268
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
274
269
  raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
270
+
271
+ @property
272
+ def _base_table(self) -> Optional['Table']:
273
+ # if this is a pure snapshot, our tbl_version_path only reflects the base (there is no TableVersion instance
274
+ # for the snapshot itself)
275
+ base_id = self._tbl_version.id if self._snapshot_only else self._tbl_version_path.base.tbl_version.id
276
+ return catalog.Catalog.get().get_table_by_id(base_id)
277
+
278
+ @property
279
+ def _effective_base_versions(self) -> list[Optional[int]]:
280
+ effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
281
+ if self._snapshot_only:
282
+ return effective_versions
283
+ else:
284
+ return effective_versions[1:]
285
+
286
+ def _table_descriptor(self) -> str:
287
+ display_name = 'Snapshot' if self._snapshot_only else 'View'
288
+ result = [f'{display_name} {self._path()!r}']
289
+ bases_descrs: list[str] = []
290
+ for base, effective_version in zip(self._base_tables, self._effective_base_versions):
291
+ if effective_version is None:
292
+ bases_descrs.append(f'{base._path()!r}')
293
+ else:
294
+ base_descr = f'{base._path()}:{effective_version}'
295
+ bases_descrs.append(f'{base_descr!r}')
296
+ result.append(f' (of {", ".join(bases_descrs)})')
297
+
298
+ if self._tbl_version.get().predicate is not None:
299
+ result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
300
+ return ''.join(result)
pixeltable/dataframe.py CHANGED
@@ -513,9 +513,9 @@ class DataFrame:
513
513
  (select list, where clause, ...) vertically.
514
514
  """
515
515
  if getattr(builtins, '__IPYTHON__', False):
516
- from IPython.display import display
516
+ from IPython.display import Markdown, display
517
517
 
518
- display(self._repr_html_())
518
+ display(Markdown(self._repr_html_()))
519
519
  else:
520
520
  print(repr(self))
521
521
 
pixeltable/exceptions.py CHANGED
@@ -1,4 +1,3 @@
1
- from dataclasses import dataclass
2
1
  from types import TracebackType
3
2
  from typing import TYPE_CHECKING, Any
4
3
 
@@ -10,7 +9,6 @@ class Error(Exception):
10
9
  pass
11
10
 
12
11
 
13
- @dataclass
14
12
  class ExprEvalError(Exception):
15
13
  expr: 'exprs.Expr'
16
14
  expr_msg: str
@@ -19,6 +17,26 @@ class ExprEvalError(Exception):
19
17
  input_vals: list[Any]
20
18
  row_num: int
21
19
 
20
+ def __init__(
21
+ self,
22
+ expr: 'exprs.Expr',
23
+ expr_msg: str,
24
+ exc: Exception,
25
+ exc_tb: TracebackType,
26
+ input_vals: list[Any],
27
+ row_num: int,
28
+ ) -> None:
29
+ exct = type(exc)
30
+ super().__init__(
31
+ f'Expression evaluation failed with an error of type `{exct.__module__}.{exct.__qualname__}`:\n{expr}'
32
+ )
33
+ self.expr = expr
34
+ self.expr_msg = expr_msg
35
+ self.exc = exc
36
+ self.exc_tb = exc_tb
37
+ self.input_vals = input_vals
38
+ self.row_num = row_num
39
+
22
40
 
23
41
  class PixeltableWarning(Warning):
24
42
  pass