pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +370 -93
  5. pixeltable/catalog/column.py +6 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +14 -16
  8. pixeltable/catalog/insertable_table.py +6 -8
  9. pixeltable/catalog/path.py +14 -7
  10. pixeltable/catalog/table.py +72 -62
  11. pixeltable/catalog/table_version.py +137 -107
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +10 -14
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +108 -42
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +1 -2
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -18
  32. pixeltable/exprs/__init__.py +1 -1
  33. pixeltable/exprs/column_property_ref.py +1 -1
  34. pixeltable/exprs/column_ref.py +3 -3
  35. pixeltable/exprs/compound_predicate.py +1 -1
  36. pixeltable/exprs/data_row.py +17 -1
  37. pixeltable/exprs/expr.py +12 -12
  38. pixeltable/exprs/function_call.py +34 -2
  39. pixeltable/exprs/json_mapper.py +95 -48
  40. pixeltable/exprs/json_path.py +4 -9
  41. pixeltable/exprs/method_ref.py +2 -2
  42. pixeltable/exprs/object_ref.py +2 -2
  43. pixeltable/exprs/row_builder.py +33 -6
  44. pixeltable/exprs/similarity_expr.py +1 -1
  45. pixeltable/exprs/sql_element_cache.py +1 -1
  46. pixeltable/exprs/string_op.py +2 -2
  47. pixeltable/ext/__init__.py +1 -1
  48. pixeltable/ext/functions/__init__.py +1 -1
  49. pixeltable/ext/functions/whisperx.py +1 -1
  50. pixeltable/ext/functions/yolox.py +1 -1
  51. pixeltable/func/__init__.py +1 -1
  52. pixeltable/func/aggregate_function.py +2 -2
  53. pixeltable/func/callable_function.py +3 -6
  54. pixeltable/func/expr_template_function.py +24 -4
  55. pixeltable/func/function.py +7 -9
  56. pixeltable/func/function_registry.py +1 -1
  57. pixeltable/func/query_template_function.py +87 -4
  58. pixeltable/func/signature.py +1 -1
  59. pixeltable/func/tools.py +1 -1
  60. pixeltable/func/udf.py +2 -2
  61. pixeltable/functions/__init__.py +1 -1
  62. pixeltable/functions/anthropic.py +2 -2
  63. pixeltable/functions/audio.py +1 -1
  64. pixeltable/functions/deepseek.py +1 -1
  65. pixeltable/functions/fireworks.py +1 -1
  66. pixeltable/functions/globals.py +6 -6
  67. pixeltable/functions/huggingface.py +1 -1
  68. pixeltable/functions/image.py +1 -1
  69. pixeltable/functions/json.py +1 -1
  70. pixeltable/functions/llama_cpp.py +1 -1
  71. pixeltable/functions/math.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +1 -1
  74. pixeltable/functions/openai.py +2 -2
  75. pixeltable/functions/replicate.py +1 -1
  76. pixeltable/functions/string.py +1 -1
  77. pixeltable/functions/timestamp.py +1 -1
  78. pixeltable/functions/together.py +1 -1
  79. pixeltable/functions/util.py +1 -1
  80. pixeltable/functions/video.py +2 -2
  81. pixeltable/functions/vision.py +2 -2
  82. pixeltable/globals.py +7 -2
  83. pixeltable/index/embedding_index.py +12 -1
  84. pixeltable/io/__init__.py +5 -3
  85. pixeltable/io/fiftyone.py +6 -7
  86. pixeltable/io/label_studio.py +21 -20
  87. pixeltable/io/pandas.py +6 -5
  88. pixeltable/iterators/__init__.py +1 -1
  89. pixeltable/metadata/__init__.py +6 -4
  90. pixeltable/metadata/converters/convert_24.py +3 -3
  91. pixeltable/metadata/converters/convert_25.py +1 -1
  92. pixeltable/metadata/converters/convert_29.py +1 -1
  93. pixeltable/metadata/converters/convert_31.py +11 -0
  94. pixeltable/metadata/converters/convert_32.py +15 -0
  95. pixeltable/metadata/converters/convert_33.py +17 -0
  96. pixeltable/metadata/notes.py +3 -0
  97. pixeltable/metadata/schema.py +26 -1
  98. pixeltable/plan.py +2 -3
  99. pixeltable/share/packager.py +8 -24
  100. pixeltable/share/publish.py +20 -9
  101. pixeltable/store.py +9 -6
  102. pixeltable/type_system.py +19 -7
  103. pixeltable/utils/console_output.py +3 -2
  104. pixeltable/utils/coroutine.py +3 -3
  105. pixeltable/utils/dbms.py +66 -0
  106. pixeltable/utils/documents.py +61 -67
  107. pixeltable/utils/exception_handler.py +59 -0
  108. pixeltable/utils/filecache.py +1 -1
  109. pixeltable/utils/http_server.py +3 -2
  110. pixeltable/utils/pytorch.py +1 -1
  111. pixeltable/utils/sql.py +1 -1
  112. pixeltable-0.3.12.dist-info/METADATA +436 -0
  113. pixeltable-0.3.12.dist-info/RECORD +183 -0
  114. pixeltable/catalog/path_dict.py +0 -169
  115. pixeltable-0.3.10.dist-info/METADATA +0 -382
  116. pixeltable-0.3.10.dist-info/RECORD +0 -179
  117. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
  118. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
  119. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@ import importlib
5
5
  import logging
6
6
  import time
7
7
  import uuid
8
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
8
+ from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
9
9
  from uuid import UUID
10
10
 
11
11
  import jsonschema.exceptions
@@ -13,12 +13,12 @@ import sqlalchemy as sql
13
13
 
14
14
  import pixeltable as pxt
15
15
  import pixeltable.exceptions as excs
16
- import pixeltable.exprs as exprs
17
- import pixeltable.index as index
18
16
  import pixeltable.type_system as ts
17
+ from pixeltable import exprs, index
19
18
  from pixeltable.env import Env
20
19
  from pixeltable.iterators import ComponentIterator
21
20
  from pixeltable.metadata import schema
21
+ from pixeltable.utils.exception_handler import run_cleanup_on_exception
22
22
  from pixeltable.utils.filecache import FileCache
23
23
  from pixeltable.utils.media_store import MediaStore
24
24
 
@@ -54,7 +54,9 @@ class TableVersion:
54
54
 
55
55
  id: UUID
56
56
  name: str
57
+ user: Optional[str]
57
58
  effective_version: Optional[int]
59
+ is_replica: bool
58
60
  version: int
59
61
  comment: str
60
62
  media_validation: MediaValidation
@@ -108,8 +110,10 @@ class TableVersion:
108
110
  ):
109
111
  self.id = id
110
112
  self.name = tbl_md.name
113
+ self.user = tbl_md.user
111
114
  self.effective_version = effective_version
112
115
  self.version = tbl_md.current_version if effective_version is None else effective_version
116
+ self.is_replica = tbl_md.is_replica
113
117
  self.comment = schema_version_md.comment
114
118
  self.num_retained_versions = schema_version_md.num_retained_versions
115
119
  self.schema_version = schema_version_md.schema_version
@@ -211,6 +215,7 @@ class TableVersion:
211
215
  view_md: Optional[schema.ViewMd] = None,
212
216
  ) -> tuple[UUID, Optional[TableVersion]]:
213
217
  session = Env.get().session
218
+ user = Env.get().user
214
219
 
215
220
  # assign ids
216
221
  cols_by_name: dict[str, Column] = {}
@@ -229,7 +234,8 @@ class TableVersion:
229
234
  table_md = schema.TableMd(
230
235
  tbl_id=str(tbl_id),
231
236
  name=name,
232
- user=None,
237
+ user=user,
238
+ is_replica=False,
233
239
  current_version=0,
234
240
  current_schema_version=0,
235
241
  next_col_id=len(cols),
@@ -308,24 +314,16 @@ class TableVersion:
308
314
  session.add(schema_version_record)
309
315
  return tbl_record.id, tbl_version
310
316
 
311
- @classmethod
312
- def delete_md(cls, tbl_id: UUID) -> None:
313
- conn = Env.get().conn
314
- conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
315
- conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
316
- conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
317
-
318
317
  def drop(self) -> None:
318
+ from .catalog import Catalog
319
+
320
+ cat = Catalog.get()
319
321
  # delete this table and all associated data
320
322
  MediaStore.delete(self.id)
321
323
  FileCache.get().clear(tbl_id=self.id)
322
- self.delete_md(self.id)
324
+ cat.delete_tbl_md(self.id)
323
325
  self.store_tbl.drop()
324
-
325
326
  # de-register table version from catalog
326
- from .catalog import Catalog
327
-
328
- cat = Catalog.get()
329
327
  cat.remove_tbl_version(self)
330
328
 
331
329
  def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
@@ -341,8 +339,11 @@ class TableVersion:
341
339
  self.cols = []
342
340
  self.cols_by_name = {}
343
341
  self.cols_by_id = {}
344
- for col_md in tbl_md.column_md.values():
345
- schema_col_md = schema_version_md.columns[col_md.id] if col_md.id in schema_version_md.columns else None
342
+ # Sort columns in column_md by the position specified in col_md.id to guarantee that all references
343
+ # point backward.
344
+ sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
345
+ for col_md in sorted_column_md:
346
+ schema_col_md = schema_version_md.columns.get(col_md.id)
346
347
  col_name = schema_col_md.name if schema_col_md is not None else None
347
348
  media_val = (
348
349
  MediaValidation[schema_col_md.media_validation.upper()]
@@ -376,7 +377,7 @@ class TableVersion:
376
377
 
377
378
  # make sure to traverse columns ordered by position = order in which cols were created;
378
379
  # this guarantees that references always point backwards
379
- if col_md.value_expr is not None:
380
+ if not self.is_snapshot and col_md.value_expr is not None:
380
381
  self._record_refd_columns(col)
381
382
 
382
383
  def _init_idxs(self, tbl_md: schema.TableMd) -> None:
@@ -385,10 +386,8 @@ class TableVersion:
385
386
  import pixeltable.index as index_module
386
387
 
387
388
  for md in tbl_md.index_md.values():
388
- if (
389
- md.schema_version_add > self.schema_version
390
- or md.schema_version_drop is not None
391
- and md.schema_version_drop <= self.schema_version
389
+ if md.schema_version_add > self.schema_version or (
390
+ md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
392
391
  ):
393
392
  # index not visible in this schema version
394
393
  continue
@@ -434,29 +433,15 @@ class TableVersion:
434
433
  specified preceding schema version
435
434
  """
436
435
  assert update_tbl_version or preceding_schema_version is None
436
+ from pixeltable.catalog import Catalog
437
437
 
438
- conn = Env.get().conn
439
- conn.execute(
440
- sql.update(schema.Table.__table__)
441
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
442
- .where(schema.Table.id == self.id)
438
+ tbl_md = self._create_tbl_md()
439
+ version_md = self._create_version_md(timestamp) if update_tbl_version else None
440
+ schema_version_md = (
441
+ self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
443
442
  )
444
443
 
445
- if update_tbl_version:
446
- version_md = self._create_version_md(timestamp)
447
- conn.execute(
448
- sql.insert(schema.TableVersion.__table__).values(
449
- tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)
450
- )
451
- )
452
-
453
- if preceding_schema_version is not None:
454
- schema_version_md = self._create_schema_version_md(preceding_schema_version)
455
- conn.execute(
456
- sql.insert(schema.TableSchemaVersion.__table__).values(
457
- tbl_id=self.id, schema_version=self.schema_version, md=dataclasses.asdict(schema_version_md)
458
- )
459
- )
444
+ Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
460
445
 
461
446
  def ensure_md_loaded(self) -> None:
462
447
  """Ensure that table metadata is loaded."""
@@ -477,33 +462,36 @@ class TableVersion:
477
462
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
478
463
  return status
479
464
 
480
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
481
- """Add a B-tree index on this column if it has a compatible type"""
465
+ def _is_btree_indexable(self, col: Column) -> bool:
482
466
  if not col.stored:
483
467
  # if the column is intentionally not stored, we want to avoid the overhead of an index
484
- return None
468
+ return False
485
469
  # Skip index for stored media columns produced by an iterator
486
470
  if col.col_type.is_media_type() and self.is_iterator_column(col):
487
- return None
471
+ return False
488
472
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
489
473
  # wrong type for a B-tree
490
- return None
491
- if col.col_type.is_bool_type():
474
+ return False
475
+ if col.col_type.is_bool_type(): # noqa : SIM103 Supress `Return the negated condition directly` check
492
476
  # B-trees on bools aren't useful
477
+ return False
478
+ return True
479
+
480
+ def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
481
+ """Add a B-tree index on this column if it has a compatible type"""
482
+ if not self._is_btree_indexable(col):
493
483
  return None
494
484
  status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
495
485
  return status
496
486
 
497
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
487
+ def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
488
+ """Create value and undo columns for the given index.
489
+ Args:
490
+ idx: index for which columns will be created.
491
+ Returns:
492
+ A tuple containing the value column and the undo column.
493
+ """
498
494
  assert not self.is_snapshot
499
- idx_id = self.next_idx_id
500
- self.next_idx_id += 1
501
- if idx_name is None:
502
- idx_name = f'idx{idx_id}'
503
- else:
504
- assert is_valid_identifier(idx_name)
505
- assert idx_name not in [i.name for i in self.idx_md.values()]
506
-
507
495
  # add the index value and undo columns (which need to be nullable)
508
496
  val_col = Column(
509
497
  col_id=self.next_col_id,
@@ -532,7 +520,19 @@ class TableVersion:
532
520
  undo_col.tbl = self.create_handle()
533
521
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
534
522
  self.next_col_id += 1
523
+ return val_col, undo_col
535
524
 
525
+ def _create_index(
526
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
527
+ ) -> None:
528
+ """Create the given index along with index md"""
529
+ idx_id = self.next_idx_id
530
+ self.next_idx_id += 1
531
+ if idx_name is None:
532
+ idx_name = f'idx{idx_id}'
533
+ else:
534
+ assert is_valid_identifier(idx_name)
535
+ assert idx_name not in [i.name for i in self.idx_md.values()]
536
536
  # create and register the index metadata
537
537
  idx_cls = type(idx)
538
538
  idx_md = schema.IndexMd(
@@ -550,14 +550,27 @@ class TableVersion:
550
550
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
551
551
  self.idx_md[idx_id] = idx_md
552
552
  self.idxs_by_name[idx_name] = idx_info
553
+ try:
554
+ idx.create_index(self._store_idx_name(idx_id), val_col)
555
+ finally:
556
+
557
+ def cleanup_index() -> None:
558
+ """Delete the newly added in-memory index structure"""
559
+ del self.idxs_by_name[idx_name]
560
+ del self.idx_md[idx_id]
561
+ self.next_idx_id = idx_id
553
562
 
563
+ # Run cleanup only if there has been an exception; otherwise, skip cleanup.
564
+ run_cleanup_on_exception(cleanup_index)
565
+
566
+ def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
567
+ val_col, undo_vol = self._create_index_columns(idx)
554
568
  # add the columns and update the metadata
555
569
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
556
570
  # with the database operations
557
- status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
571
+ status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
558
572
  # now create the index structure
559
- idx.create_index(self._store_idx_name(idx_id), val_col)
560
-
573
+ self._create_index(col, val_col, undo_vol, idx_name, idx)
561
574
  return status
562
575
 
563
576
  def drop_index(self, idx_id: int) -> None:
@@ -598,9 +611,21 @@ class TableVersion:
598
611
  self.version += 1
599
612
  preceding_schema_version = self.schema_version
600
613
  self.schema_version = self.version
601
- status = self._add_columns(cols, print_stats=print_stats, on_error=on_error)
614
+ index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
615
+ all_cols: list[Column] = []
602
616
  for col in cols:
603
- _ = self._add_default_index(col)
617
+ all_cols.append(col)
618
+ if self._is_btree_indexable(col):
619
+ idx = index.BtreeIndex(col)
620
+ val_col, undo_col = self._create_index_columns(idx)
621
+ index_cols[col] = (idx, val_col, undo_col)
622
+ all_cols.append(val_col)
623
+ all_cols.append(undo_col)
624
+ # Add all columns
625
+ status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
626
+ # Create indices and their mds
627
+ for col, (idx, val_col, undo_col) in index_cols.items():
628
+ self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
604
629
  self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
605
630
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
606
631
 
@@ -616,18 +641,18 @@ class TableVersion:
616
641
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
617
642
  ) -> UpdateStatus:
618
643
  """Add and populate columns within the current transaction"""
619
- cols = list(cols)
644
+ cols_to_add = list(cols)
620
645
  row_count = self.store_tbl.count()
621
- for col in cols:
622
- if not col.col_type.nullable and not col.is_computed:
623
- if row_count > 0:
624
- raise excs.Error(
625
- f'Cannot add non-nullable column "{col.name}" to table {self.name} with existing rows'
626
- )
646
+ for col in cols_to_add:
647
+ if not col.col_type.nullable and not col.is_computed and row_count > 0:
648
+ raise excs.Error(
649
+ f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
650
+ )
627
651
 
628
652
  num_excs = 0
629
653
  cols_with_excs: list[Column] = []
630
- for col in cols:
654
+ for col in cols_to_add:
655
+ excs_per_col = 0
631
656
  col.schema_version_add = self.schema_version
632
657
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
633
658
  # because it might be referenced by the next column's value_expr
@@ -650,29 +675,32 @@ class TableVersion:
650
675
 
651
676
  plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
652
677
  plan.ctx.num_rows = row_count
653
-
654
678
  try:
655
679
  plan.open()
656
680
  try:
657
- num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
681
+ excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
658
682
  except sql.exc.DBAPIError as exc:
659
683
  # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
660
684
  raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
661
- if num_excs > 0:
685
+ if excs_per_col > 0:
662
686
  cols_with_excs.append(col)
663
- except excs.Error as exc:
664
- self.cols.pop()
665
- for col in cols:
666
- # remove columns that we already added
667
- if col.id not in self.cols_by_id:
668
- continue
669
- if col.name is not None:
670
- del self.cols_by_name[col.name]
671
- del self.cols_by_id[col.id]
672
- # we need to re-initialize the sqlalchemy schema
673
- self.store_tbl.create_sa_tbl()
674
- raise exc
687
+ num_excs += excs_per_col
675
688
  finally:
689
+ # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
690
+ def cleanup_on_error() -> None:
691
+ """Delete columns that are added as part of current add_columns operation and re-initialize
692
+ the sqlalchemy schema"""
693
+ self.cols = [col for col in self.cols if col not in cols_to_add]
694
+ for col in cols_to_add:
695
+ # remove columns that we already added
696
+ if col.id in self.cols_by_id:
697
+ del self.cols_by_id[col.id]
698
+ if col.name is not None and col.name in self.cols_by_name:
699
+ del self.cols_by_name[col.name]
700
+ self.store_tbl.create_sa_tbl()
701
+
702
+ # Run cleanup only if there has been an exception; otherwise, skip cleanup.
703
+ run_cleanup_on_exception(cleanup_on_error)
676
704
  plan.close()
677
705
 
678
706
  if print_stats:
@@ -756,19 +784,20 @@ class TableVersion:
756
784
  self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
757
785
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
758
786
 
759
- def set_comment(self, new_comment: Optional[str]):
787
+ def set_comment(self, new_comment: Optional[str]) -> None:
760
788
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
761
789
  self.comment = new_comment
762
790
  self._create_schema_version()
763
791
 
764
- def set_num_retained_versions(self, new_num_retained_versions: int):
792
+ def set_num_retained_versions(self, new_num_retained_versions: int) -> None:
765
793
  _logger.info(
766
- f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} (was {self.num_retained_versions})'
794
+ f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} '
795
+ f'(was {self.num_retained_versions})'
767
796
  )
768
797
  self.num_retained_versions = new_num_retained_versions
769
798
  self._create_schema_version()
770
799
 
771
- def _create_schema_version(self):
800
+ def _create_schema_version(self) -> None:
772
801
  # we're creating a new schema version
773
802
  self.version += 1
774
803
  preceding_schema_version = self.schema_version
@@ -858,7 +887,7 @@ class TableVersion:
858
887
 
859
888
  from pixeltable.plan import Planner
860
889
 
861
- update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
890
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
862
891
  if where is not None:
863
892
  if not isinstance(where, exprs.Expr):
864
893
  raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
@@ -897,7 +926,6 @@ class TableVersion:
897
926
  """
898
927
  # if we do lookups of rowids, we must have one for each row in the batch
899
928
  assert len(rowids) == 0 or len(rowids) == len(batch)
900
- cols_with_excs: set[str] = set()
901
929
 
902
930
  from pixeltable.plan import Planner
903
931
 
@@ -919,7 +947,7 @@ class TableVersion:
919
947
  return result
920
948
 
921
949
  def _validate_update_spec(
922
- self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
950
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool, allow_media: bool
923
951
  ) -> dict[Column, exprs.Expr]:
924
952
  update_targets: dict[Column, exprs.Expr] = {}
925
953
  for col_name, val in value_spec.items():
@@ -939,27 +967,31 @@ class TableVersion:
939
967
  raise excs.Error(f'Column {col_name} is computed and cannot be updated')
940
968
  if col.is_pk and not allow_pk:
941
969
  raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
970
+ if col.col_type.is_media_type() and not allow_media:
971
+ raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
942
972
 
943
973
  # make sure that the value is compatible with the column type
944
974
  value_expr: exprs.Expr
945
975
  try:
946
976
  # check if this is a literal
947
977
  value_expr = exprs.Literal(val, col_type=col.col_type)
948
- except (TypeError, jsonschema.exceptions.ValidationError):
978
+ except (TypeError, jsonschema.exceptions.ValidationError) as exc:
949
979
  if not allow_exprs:
950
980
  raise excs.Error(
951
981
  f'Column {col_name}: value {val!r} is not a valid literal for this column '
952
982
  f'(expected {col.col_type})'
953
- )
983
+ ) from exc
954
984
  # it's not a literal, let's try to create an expr from it
955
985
  value_expr = exprs.Expr.from_object(val)
956
986
  if value_expr is None:
957
- raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
987
+ raise excs.Error(
988
+ f'Column {col_name}: value {val!r} is not a recognized literal or expression'
989
+ ) from exc
958
990
  if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
959
991
  raise excs.Error(
960
992
  f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
961
993
  f'{col_name} ({col.col_type})'
962
- )
994
+ ) from exc
963
995
  update_targets[col] = value_expr
964
996
 
965
997
  return update_targets
@@ -988,7 +1020,7 @@ class TableVersion:
988
1020
  self._update_md(timestamp)
989
1021
 
990
1022
  if cascade:
991
- base_versions = [None if plan is None else self.version] + base_versions # don't update in place
1023
+ base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
992
1024
  # propagate to views
993
1025
  for view in self.mutable_views:
994
1026
  recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
@@ -1048,11 +1080,9 @@ class TableVersion:
1048
1080
  # we're creating a new version
1049
1081
  self.version += 1
1050
1082
  self._update_md(timestamp)
1051
- else:
1052
- pass
1053
1083
  for view in self.mutable_views:
1054
1084
  num_rows += view.get().propagate_delete(
1055
- where=None, base_versions=[self.version] + base_versions, timestamp=timestamp
1085
+ where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1056
1086
  )
1057
1087
  return num_rows
1058
1088
 
@@ -1232,9 +1262,7 @@ class TableVersion:
1232
1262
 
1233
1263
  def is_system_column(self, col: Column) -> bool:
1234
1264
  """Return True if column was created by Pixeltable"""
1235
- if col.name == _POS_COLUMN_NAME and self.is_component_view:
1236
- return True
1237
- return False
1265
+ return col.name == _POS_COLUMN_NAME and self.is_component_view
1238
1266
 
1239
1267
  def user_columns(self) -> list[Column]:
1240
1268
  """Return all non-system columns"""
@@ -1262,7 +1290,7 @@ class TableVersion:
1262
1290
 
1263
1291
  def _record_refd_columns(self, col: Column) -> None:
1264
1292
  """Update Column.dependent_cols for all cols referenced in col.value_expr."""
1265
- import pixeltable.exprs as exprs
1293
+ from pixeltable import exprs
1266
1294
 
1267
1295
  if col.value_expr_dict is not None:
1268
1296
  # if we have a value_expr_dict, use that instead of instantiating the value_expr
@@ -1296,6 +1324,7 @@ class TableVersion:
1296
1324
  column_md: dict[int, schema.ColumnMd] = {}
1297
1325
  for col in cols:
1298
1326
  value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1327
+ assert col.is_pk is not None
1299
1328
  column_md[col.id] = schema.ColumnMd(
1300
1329
  id=col.id,
1301
1330
  col_type=col.col_type.as_dict(),
@@ -1317,7 +1346,8 @@ class TableVersion:
1317
1346
  return schema.TableMd(
1318
1347
  tbl_id=str(self.id),
1319
1348
  name=self.name,
1320
- user=None,
1349
+ user=self.user,
1350
+ is_replica=self.is_replica,
1321
1351
  current_version=self.version,
1322
1352
  current_schema_version=self.schema_version,
1323
1353
  next_col_id=self.next_col_id,
@@ -1364,7 +1394,7 @@ class TableVersion:
1364
1394
 
1365
1395
  @classmethod
1366
1396
  def from_dict(cls, d: dict) -> TableVersion:
1367
- import pixeltable.catalog as catalog
1397
+ from pixeltable import catalog
1368
1398
 
1369
1399
  id = UUID(d['id'])
1370
1400
  effective_version = d['effective_version']
@@ -31,6 +31,9 @@ class TableVersionHandle:
31
31
  return False
32
32
  return self.id == other.id and self.effective_version == other.effective_version
33
33
 
34
+ def __hash__(self) -> int:
35
+ return hash((self.id, self.effective_version))
36
+
34
37
  @classmethod
35
38
  def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
36
39
  return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
@@ -82,7 +82,7 @@ class TableVersionPath:
82
82
  """Return all tbl versions"""
83
83
  if self.base is None:
84
84
  return [self.tbl_version]
85
- return [self.tbl_version] + self.base.get_tbl_versions()
85
+ return [self.tbl_version, *self.base.get_tbl_versions()]
86
86
 
87
87
  def get_bases(self) -> list[TableVersionHandle]:
88
88
  """Return all tbl versions"""
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
5
+ from typing import TYPE_CHECKING, Any, List, Literal, Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import pixeltable.exceptions as excs
@@ -20,7 +20,7 @@ from .table_version_handle import TableVersionHandle
20
20
  from .table_version_path import TableVersionPath
21
21
 
22
22
  if TYPE_CHECKING:
23
- import pixeltable as pxt
23
+ from pixeltable.globals import TableDataSource
24
24
 
25
25
  _logger = logging.getLogger('pixeltable')
26
26
 
@@ -65,7 +65,7 @@ class View(Table):
65
65
  base: TableVersionPath,
66
66
  select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
67
67
  additional_columns: dict[str, Any],
68
- predicate: Optional['pxt.exprs.Expr'],
68
+ predicate: Optional['exprs.Expr'],
69
69
  is_snapshot: bool,
70
70
  num_retained_versions: int,
71
71
  comment: str,
@@ -98,7 +98,8 @@ class View(Table):
98
98
  # make sure that the value can be computed in the context of the base
99
99
  if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
100
100
  raise excs.Error(
101
- f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}'
101
+ f'Column {col.name}: value expression cannot be computed in the context of the '
102
+ f'base {base.tbl_name()}'
102
103
  )
103
104
 
104
105
  if iterator_cls is not None:
@@ -111,8 +112,8 @@ class View(Table):
111
112
  bound_args: dict[str, Any]
112
113
  try:
113
114
  bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
114
- except TypeError as e:
115
- raise excs.Error(f'Invalid iterator arguments: {e}')
115
+ except TypeError as exc:
116
+ raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
116
117
  # we ignore 'self'
117
118
  first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
118
119
  del bound_args[first_param_name]
@@ -203,8 +204,8 @@ class View(Table):
203
204
 
204
205
  from pixeltable.plan import Planner
205
206
 
206
- plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
207
- num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
207
+ plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
208
+ num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
208
209
  Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
209
210
 
210
211
  session.commit()
@@ -241,7 +242,7 @@ class View(Table):
241
242
  # there is not TableVersion to drop
242
243
  self._check_is_dropped()
243
244
  self.is_dropped = True
244
- TableVersion.delete_md(self._id)
245
+ catalog.Catalog.get().delete_tbl_md(self._id)
245
246
  else:
246
247
  super()._drop()
247
248
 
@@ -251,11 +252,6 @@ class View(Table):
251
252
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
252
253
  return md
253
254
 
254
- if TYPE_CHECKING:
255
- import datasets # type: ignore[import-untyped]
256
-
257
- from pixeltable.globals import RowData, TableDataSource
258
-
259
255
  def insert(
260
256
  self,
261
257
  source: Optional[TableDataSource] = None,
pixeltable/dataframe.py CHANGED
@@ -88,12 +88,12 @@ class DataFrameResultSet:
88
88
  def __iter__(self) -> Iterator[dict[str, Any]]:
89
89
  return (self._row_to_dict(i) for i in range(len(self)))
90
90
 
91
- def __eq__(self, other):
91
+ def __eq__(self, other: object) -> bool:
92
92
  if not isinstance(other, DataFrameResultSet):
93
93
  return False
94
94
  return self.to_pandas().equals(other.to_pandas())
95
95
 
96
- def __hash__(self):
96
+ def __hash__(self) -> int:
97
97
  return hash(self.to_pandas())
98
98
 
99
99
 
@@ -571,7 +571,7 @@ class DataFrame:
571
571
  expr = exprs.Expr.from_object(raw_expr)
572
572
  if expr is None:
573
573
  raise excs.Error(f'Invalid expression: {raw_expr}')
574
- if expr.col_type.is_invalid_type():
574
+ if expr.col_type.is_invalid_type() and not (isinstance(expr, exprs.Literal) and expr.val is None):
575
575
  raise excs.Error(f'Invalid type: {raw_expr}')
576
576
  if not expr.is_bound_by(self._from_clause.tbls):
577
577
  raise excs.Error(
@@ -624,6 +624,8 @@ class DataFrame:
624
624
 
625
625
  >>> df = person.where(t.age > 30)
626
626
  """
627
+ if self.where_clause is not None:
628
+ raise excs.Error('Where clause already specified')
627
629
  if not isinstance(pred, exprs.Expr):
628
630
  raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
629
631
  if not pred.col_type.is_bool_type():