pixeltable 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (77) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +29 -11
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +307 -186
  10. pixeltable/catalog/table_version.py +109 -63
  11. pixeltable/catalog/table_version_path.py +28 -5
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +129 -26
  14. pixeltable/env.py +29 -18
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +74 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +51 -22
  50. pixeltable/io/label_studio.py +639 -0
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +44 -0
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +66 -0
  62. pixeltable/metadata/schema.py +3 -3
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +147 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/METADATA +1 -1
  70. pixeltable-0.2.10.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/label_studio.py +0 -452
  73. pixeltable/datatransfer/remote.py +0 -85
  74. pixeltable/functions/pil/image.py +0 -147
  75. pixeltable-0.2.8.dist-info/RECORD +0 -124
  76. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/LICENSE +0 -0
  77. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/WHEEL +0 -0
@@ -24,6 +24,7 @@ from pixeltable.utils.filecache import FileCache
24
24
  from pixeltable.utils.media_store import MediaStore
25
25
  from .column import Column
26
26
  from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
27
+ from ..func.globals import resolve_symbol
27
28
 
28
29
  _logger = logging.getLogger('pixeltable')
29
30
 
@@ -88,8 +89,6 @@ class TableVersion:
88
89
  self.next_idx_id = tbl_md.next_idx_id
89
90
  self.next_rowid = tbl_md.next_row_id
90
91
 
91
- self.remotes = dict(TableVersion._init_remote(remote_md) for remote_md in tbl_md.remotes)
92
-
93
92
  # view-specific initialization
94
93
  from pixeltable import exprs
95
94
  predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
@@ -120,11 +119,16 @@ class TableVersion:
120
119
  # init schema after we determined whether we're a component view, and before we create the store table
121
120
  self.cols: list[Column] = [] # contains complete history of columns, incl dropped ones
122
121
  self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
123
- self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
122
+ self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
124
123
  self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
125
124
  self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
125
+ self.external_stores: dict[str, pixeltable.io.ExternalStore] = {}
126
+
126
127
  self._init_schema(tbl_md, schema_version_md)
127
128
 
129
+ # Init external stores (this needs to happen after the schema is created)
130
+ self._init_external_stores(tbl_md)
131
+
128
132
  def __hash__(self) -> int:
129
133
  return hash(self.id)
130
134
 
@@ -159,7 +163,7 @@ class TableVersion:
159
163
  column_md = cls._create_column_md(cols)
160
164
  table_md = schema.TableMd(
161
165
  name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
162
- next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, remotes=[], view_md=view_md)
166
+ next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, external_stores=[], view_md=view_md)
163
167
  # create a schema.Table here, we need it to call our c'tor;
164
168
  # don't add it to the session yet, we might add index metadata
165
169
  tbl_id = uuid.uuid4()
@@ -239,6 +243,8 @@ class TableVersion:
239
243
  def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
240
244
  """Initialize self.cols with the columns visible in our effective version"""
241
245
  import pixeltable.exprs as exprs
246
+ from pixeltable.catalog import Catalog
247
+
242
248
  self.cols = []
243
249
  self.cols_by_name = {}
244
250
  self.cols_by_id = {}
@@ -247,7 +253,8 @@ class TableVersion:
247
253
  col = Column(
248
254
  col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
249
255
  is_pk=col_md.is_pk, stored=col_md.stored,
250
- schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop)
256
+ schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
257
+ value_expr_dict=col_md.value_expr)
251
258
  col.tbl = self
252
259
  self.cols.append(col)
253
260
 
@@ -265,8 +272,8 @@ class TableVersion:
265
272
  # make sure to traverse columns ordered by position = order in which cols were created;
266
273
  # this guarantees that references always point backwards
267
274
  if col_md.value_expr is not None:
268
- col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
269
- self._record_value_expr(col)
275
+ refd_cols = exprs.Expr.get_refd_columns(col_md.value_expr)
276
+ self._record_refd_columns(col)
270
277
 
271
278
  def _init_idxs(self, tbl_md: schema.TableMd) -> None:
272
279
  self.idx_md = tbl_md.index_md
@@ -306,22 +313,30 @@ class TableVersion:
306
313
  self.store_tbl: StoreBase = StoreTable(self)
307
314
 
308
315
  def _update_md(
309
- self, timestamp: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection
316
+ self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
310
317
  ) -> None:
311
- """Update all recorded metadata in response to a data or schema change.
318
+ """Writes table metadata to the database.
319
+
312
320
  Args:
313
321
  timestamp: timestamp of the change
314
- preceding_schema_version: last schema version if schema change, else None
322
+ conn: database connection to use
323
+ update_tbl_version: if `True`, will also write `TableVersion` metadata
324
+ preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
325
+ specified preceding schema version
315
326
  """
327
+ assert update_tbl_version or preceding_schema_version is None
328
+
316
329
  conn.execute(
317
330
  sql.update(schema.Table.__table__)
318
331
  .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
319
332
  .where(schema.Table.id == self.id))
320
333
 
321
- version_md = self._create_version_md(timestamp)
322
- conn.execute(
323
- sql.insert(schema.TableVersion.__table__)
324
- .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
334
+ if update_tbl_version:
335
+ version_md = self._create_version_md(timestamp)
336
+ conn.execute(
337
+ sql.insert(schema.TableVersion.__table__)
338
+ .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
339
+
325
340
  if preceding_schema_version is not None:
326
341
  schema_version_md = self._create_schema_version_md(preceding_schema_version)
327
342
  conn.execute(
@@ -341,7 +356,7 @@ class TableVersion:
341
356
  self.schema_version = self.version
342
357
  with Env.get().engine.begin() as conn:
343
358
  status = self._add_index(col, idx_name, idx, conn)
344
- self._update_md(time.time(), preceding_schema_version, conn)
359
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
345
360
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
346
361
  return status
347
362
 
@@ -424,7 +439,7 @@ class TableVersion:
424
439
 
425
440
  with Env.get().engine.begin() as conn:
426
441
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
427
- self._update_md(time.time(), preceding_schema_version, conn)
442
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
428
443
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
429
444
 
430
445
  def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
@@ -450,7 +465,7 @@ class TableVersion:
450
465
  status = self._add_columns([col], conn, print_stats=print_stats)
451
466
  _ = self._add_default_index(col, conn)
452
467
  # TODO: what to do about errors?
453
- self._update_md(time.time(), preceding_schema_version, conn)
468
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
454
469
  _logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
455
470
 
456
471
  msg = (
@@ -461,8 +476,9 @@ class TableVersion:
461
476
  _logger.info(f'Column {col.name}: {msg}')
462
477
  return status
463
478
 
464
- def _add_columns(self, cols: List[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
479
+ def _add_columns(self, cols: Iterable[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
465
480
  """Add and populate columns within the current transaction"""
481
+ cols = list(cols)
466
482
  row_count = self.store_tbl.count(conn=conn)
467
483
  for col in cols:
468
484
  if not col.col_type.nullable and not col.is_computed:
@@ -482,7 +498,7 @@ class TableVersion:
482
498
  self.cols_by_id[col.id] = col
483
499
  if col.value_expr is not None:
484
500
  col.check_value_expr()
485
- self._record_value_expr(col)
501
+ self._record_refd_columns(col)
486
502
 
487
503
  if col.is_stored:
488
504
  self.store_tbl.add_column(col, conn)
@@ -496,7 +512,7 @@ class TableVersion:
496
512
  plan.ctx.num_rows = row_count
497
513
 
498
514
  try:
499
- plan.ctx.conn = conn
515
+ plan.ctx.set_conn(conn)
500
516
  plan.open()
501
517
  num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
502
518
  if num_excs > 0:
@@ -526,6 +542,8 @@ class TableVersion:
526
542
  def drop_column(self, name: str) -> None:
527
543
  """Drop a column from the table.
528
544
  """
545
+ from pixeltable.catalog import Catalog
546
+
529
547
  assert not self.is_snapshot
530
548
  if name not in self.cols_by_name:
531
549
  raise excs.Error(f'Unknown column: {name}')
@@ -533,8 +551,27 @@ class TableVersion:
533
551
  dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
534
552
  if len(dependent_user_cols) > 0:
535
553
  raise excs.Error(
536
- f'Cannot drop column {name} because the following columns depend on it:\n',
537
- f'{", ".join([c.name for c in dependent_user_cols])}')
554
+ f'Cannot drop column `{name}` because the following columns depend on it:\n'
555
+ f'{", ".join(c.name for c in dependent_user_cols)}'
556
+ )
557
+ # See if this column has a dependent store. We need to look through all stores in all
558
+ # (transitive) views of this table.
559
+ transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
560
+ dependent_stores = [
561
+ (view, store)
562
+ for view in transitive_views
563
+ for store in view._tbl_version.external_stores.values()
564
+ if col in store.get_local_columns()
565
+ ]
566
+ if len(dependent_stores) > 0:
567
+ dependent_store_names = [
568
+ store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
569
+ for view, store in dependent_stores
570
+ ]
571
+ raise excs.Error(
572
+ f'Cannot drop column `{name}` because the following external stores depend on it:\n'
573
+ f'{", ".join(dependent_store_names)}'
574
+ )
538
575
 
539
576
  # we're creating a new schema version
540
577
  self.version += 1
@@ -557,10 +594,10 @@ class TableVersion:
557
594
  for idx_name in dropped_idx_names:
558
595
  del self.idxs_by_name[idx_name]
559
596
  self._drop_columns(dropped_cols)
560
- self._update_md(time.time(), preceding_schema_version, conn)
597
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
561
598
  _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
562
599
 
563
- def _drop_columns(self, cols: list[Column]) -> None:
600
+ def _drop_columns(self, cols: Iterable[Column]) -> None:
564
601
  """Mark columns as dropped"""
565
602
  assert not self.is_snapshot
566
603
 
@@ -602,7 +639,7 @@ class TableVersion:
602
639
  self.schema_version = self.version
603
640
 
604
641
  with Env.get().engine.begin() as conn:
605
- self._update_md(time.time(), preceding_schema_version, conn)
642
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
606
643
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
607
644
 
608
645
  def set_comment(self, new_comment: Optional[str]):
@@ -621,7 +658,7 @@ class TableVersion:
621
658
  preceding_schema_version = self.schema_version
622
659
  self.schema_version = self.version
623
660
  with Env.get().engine.begin() as conn:
624
- self._update_md(time.time(), preceding_schema_version, conn)
661
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
625
662
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
626
663
 
627
664
  def insert(
@@ -648,7 +685,7 @@ class TableVersion:
648
685
  result.num_excs = num_excs
649
686
  result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
650
687
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
651
- self._update_md(timestamp, None, conn)
688
+ self._update_md(timestamp, conn)
652
689
 
653
690
  # update views
654
691
  for view in self.mutable_views:
@@ -762,7 +799,7 @@ class TableVersion:
762
799
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
763
800
  self.store_tbl.delete_rows(
764
801
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
765
- self._update_md(timestamp, None, conn)
802
+ self._update_md(timestamp, conn)
766
803
 
767
804
  if cascade:
768
805
  base_versions = [None if plan is None else self.version] + base_versions # don't update in place
@@ -812,7 +849,7 @@ class TableVersion:
812
849
  if num_rows > 0:
813
850
  # we're creating a new version
814
851
  self.version += 1
815
- self._update_md(timestamp, None, conn)
852
+ self._update_md(timestamp, conn)
816
853
  else:
817
854
  pass
818
855
  for view in self.mutable_views:
@@ -943,31 +980,29 @@ class TableVersion:
943
980
  view._revert(session)
944
981
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
945
982
 
946
- @classmethod
947
- def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
948
- module = importlib.import_module(remote_md['module'])
949
- remote_cls = getattr(module, remote_md['class'])
950
- remote = remote_cls.from_dict(remote_md['remote_md'])
951
- col_mapping = remote_md['col_mapping']
952
- return remote, col_mapping
953
-
954
- def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
955
- timestamp = time.time()
956
- self.version += 1
957
- self.remotes[remote] = col_mapping
983
+ def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
984
+ for store_md in tbl_md.external_stores:
985
+ store_cls = resolve_symbol(store_md['class'])
986
+ assert isinstance(store_cls, type) and issubclass(store_cls, pixeltable.io.ExternalStore)
987
+ store = store_cls.from_dict(store_md['md'])
988
+ self.external_stores[store.name] = store
989
+
990
+ def link_external_store(self, store: pixeltable.io.ExternalStore) -> None:
958
991
  with Env.get().engine.begin() as conn:
959
- self._update_md(timestamp, None, conn)
992
+ store.link(self, conn) # May result in additional metadata changes
993
+ self.external_stores[store.name] = store
994
+ self._update_md(time.time(), conn, update_tbl_version=False)
960
995
 
961
- def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
962
- assert remote in self.remotes
963
- timestamp = time.time()
964
- self.version += 1
965
- del self.remotes[remote]
996
+ def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
997
+ assert store_name in self.external_stores
998
+ store = self.external_stores[store_name]
966
999
  with Env.get().engine.begin() as conn:
967
- self._update_md(timestamp, None, conn)
1000
+ store.unlink(self, conn) # May result in additional metadata changes
1001
+ del self.external_stores[store_name]
1002
+ self._update_md(time.time(), conn, update_tbl_version=False)
968
1003
 
969
- def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
970
- return self.remotes
1004
+ if delete_external_data and isinstance(store, pixeltable.io.external_store.Project):
1005
+ store.delete()
971
1006
 
972
1007
  def is_view(self) -> bool:
973
1008
  return self.base is not None
@@ -1029,14 +1064,17 @@ class TableVersion:
1029
1064
  args.append(exprs.ColumnRef(param))
1030
1065
  fn = func.make_function(
1031
1066
  col.compute_func, return_type=col.col_type, param_types=[arg.col_type for arg in args])
1032
- col.value_expr = fn(*args)
1067
+ col.set_value_expr(fn(*args))
1033
1068
 
1034
- def _record_value_expr(self, col: Column) -> None:
1069
+ def _record_refd_columns(self, col: Column) -> None:
1035
1070
  """Update Column.dependent_cols for all cols referenced in col.value_expr.
1036
1071
  """
1037
- assert col.value_expr is not None
1038
- from pixeltable.exprs import ColumnRef
1039
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=ColumnRef)]
1072
+ import pixeltable.exprs as exprs
1073
+ if col.value_expr_dict is not None:
1074
+ # if we have a value_expr_dict, use that instead of instantiating the value_expr
1075
+ refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1076
+ else:
1077
+ refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1040
1078
  for refd_col in refd_cols:
1041
1079
  refd_col.dependent_cols.add(col)
1042
1080
 
@@ -1064,7 +1102,7 @@ class TableVersion:
1064
1102
 
1065
1103
  @classmethod
1066
1104
  def _create_column_md(cls, cols: List[Column]) -> dict[int, schema.ColumnMd]:
1067
- column_md: Dict[int, schema.ColumnMd] = {}
1105
+ column_md: dict[int, schema.ColumnMd] = {}
1068
1106
  for col in cols:
1069
1107
  value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1070
1108
  column_md[col.id] = schema.ColumnMd(
@@ -1074,15 +1112,13 @@ class TableVersion:
1074
1112
  return column_md
1075
1113
 
1076
1114
  @classmethod
1077
- def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
1115
+ def _create_stores_md(cls, stores: Iterable['pixeltable.io.ExternalStore']) -> list[dict[str, Any]]:
1078
1116
  return [
1079
1117
  {
1080
- 'module': type(remote).__module__,
1081
- 'class': type(remote).__qualname__,
1082
- 'remote_md': remote.to_dict(),
1083
- 'col_mapping': col_mapping
1118
+ 'class': f'{type(store).__module__}.{type(store).__qualname__}',
1119
+ 'md': store.as_dict()
1084
1120
  }
1085
- for remote, col_mapping in remotes.items()
1121
+ for store in stores
1086
1122
  ]
1087
1123
 
1088
1124
  def _create_tbl_md(self) -> schema.TableMd:
@@ -1090,7 +1126,7 @@ class TableVersion:
1090
1126
  name=self.name, current_version=self.version, current_schema_version=self.schema_version,
1091
1127
  next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
1092
1128
  column_md=self._create_column_md(self.cols), index_md=self.idx_md,
1093
- remotes=self._create_remotes_md(self.remotes), view_md=self.view_md)
1129
+ external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md)
1094
1130
 
1095
1131
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1096
1132
  return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
@@ -1103,3 +1139,13 @@ class TableVersion:
1103
1139
  return schema.TableSchemaVersionMd(
1104
1140
  schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
1105
1141
  columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)
1142
+
1143
+ def as_dict(self) -> dict:
1144
+ return {'id': str(self.id), 'effective_version': self.effective_version}
1145
+
1146
+ @classmethod
1147
+ def from_dict(cls, d: dict) -> 'TableVersion':
1148
+ import pixeltable.catalog as catalog
1149
+ id = UUID(d['id'])
1150
+ effective_version = d['effective_version']
1151
+ return catalog.Catalog.get().tbl_versions[(id, effective_version)]
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional, List, Union
4
+ from typing import Optional, Union
5
5
  from uuid import UUID
6
6
 
7
7
  import pixeltable
8
+ import pixeltable.catalog as catalog
8
9
  from .column import Column
9
10
  from .globals import POS_COLUMN_NAME
10
11
  from .table_version import TableVersion
@@ -59,13 +60,13 @@ class TableVersionPath:
59
60
  def is_insertable(self) -> bool:
60
61
  return self.tbl_version.is_insertable()
61
62
 
62
- def get_tbl_versions(self) -> List[TableVersion]:
63
+ def get_tbl_versions(self) -> list[TableVersion]:
63
64
  """Return all tbl versions"""
64
65
  if self.base is None:
65
66
  return [self.tbl_version]
66
67
  return [self.tbl_version] + self.base.get_tbl_versions()
67
68
 
68
- def get_bases(self) -> List[TableVersion]:
69
+ def get_bases(self) -> list[TableVersion]:
69
70
  """Return all tbl versions"""
70
71
  if self.base is None:
71
72
  return []
@@ -100,15 +101,25 @@ class TableVersionPath:
100
101
  from pixeltable.dataframe import DataFrame
101
102
  return DataFrame(self).__getitem__(index)
102
103
 
103
- def columns(self) -> List[Column]:
104
+ def columns(self) -> list[Column]:
104
105
  """Return all user columns visible in this tbl version path, including columns from bases"""
105
106
  result = list(self.tbl_version.cols_by_name.values())
106
107
  if self.base is not None:
107
108
  base_cols = self.base.columns()
108
109
  # we only include base columns that don't conflict with one of our column names
109
- result.extend([c for c in base_cols if c.name not in self.tbl_version.cols_by_name])
110
+ result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
110
111
  return result
111
112
 
113
+ def cols_by_name(self) -> dict[str, Column]:
114
+ """Return a dict of all user columns visible in this tbl version path, including columns from bases"""
115
+ cols = self.columns()
116
+ return {col.name: col for col in cols}
117
+
118
+ def cols_by_id(self) -> dict[int, Column]:
119
+ """Return a dict of all user columns visible in this tbl version path, including columns from bases"""
120
+ cols = self.columns()
121
+ return {col.id: col for col in cols}
122
+
112
123
  def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
113
124
  """Return the column with the given name, or None if not found"""
114
125
  col = self.tbl_version.cols_by_name.get(name)
@@ -131,3 +142,15 @@ class TableVersionPath:
131
142
  return self.base.has_column(col)
132
143
  else:
133
144
  return False
145
+
146
+ def as_dict(self) -> dict:
147
+ return {
148
+ 'tbl_version': self.tbl_version.as_dict(),
149
+ 'base': self.base.as_dict() if self.base is not None else None
150
+ }
151
+
152
+ @classmethod
153
+ def from_dict(cls, d: dict) -> TableVersionPath:
154
+ tbl_version = TableVersion.from_dict(d['tbl_version'])
155
+ base = TableVersionPath.from_dict(d['base']) if d['base'] is not None else None
156
+ return cls(tbl_version, base)
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
  import logging
3
- from typing import List, Optional, Type, Dict, Set, Any
3
+ from typing import List, Optional, Type, Dict, Set, Any, Iterable
4
4
  from uuid import UUID
5
5
  import inspect
6
6
 
@@ -11,7 +11,7 @@ from .table_version import TableVersion
11
11
  from .table_version_path import TableVersionPath
12
12
  from .column import Column
13
13
  from .catalog import Catalog
14
- from .globals import POS_COLUMN_NAME
14
+ from .globals import POS_COLUMN_NAME, UpdateStatus
15
15
  from pixeltable.env import Env
16
16
  from pixeltable.iterators import ComponentIterator
17
17
  from pixeltable.exceptions import Error
@@ -55,7 +55,7 @@ class View(Table):
55
55
 
56
56
  # verify that filter can be evaluated in the context of the base
57
57
  if predicate is not None:
58
- if not predicate.is_bound_by(base.tbl_version_path):
58
+ if not predicate.is_bound_by(base._tbl_version_path):
59
59
  raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
60
60
  # create a copy that we can modify and store
61
61
  predicate = predicate.copy()
@@ -65,7 +65,7 @@ class View(Table):
65
65
  if not col.is_computed:
66
66
  continue
67
67
  # make sure that the value can be computed in the context of the base
68
- if col.value_expr is not None and not col.value_expr.is_bound_by(base.tbl_version_path):
68
+ if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
69
69
  raise excs.Error(
70
70
  f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
71
71
 
@@ -83,7 +83,7 @@ class View(Table):
83
83
 
84
84
  # construct Signature and type-check bound_args
85
85
  params = [
86
- func.Parameter(param_name, param_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
86
+ func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
87
87
  for param_name, param_type in iterator_cls.input_schema().items()
88
88
  ]
89
89
  sig = func.Signature(InvalidType(), params)
@@ -114,7 +114,7 @@ class View(Table):
114
114
  iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
115
115
  iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
116
116
  else None
117
- base_version_path = cls._get_snapshot_path(base.tbl_version_path) if is_snapshot else base.tbl_version_path
117
+ base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
118
118
  base_versions = [
119
119
  (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
120
120
  for tbl_version in base_version_path.get_tbl_versions()
@@ -127,7 +127,7 @@ class View(Table):
127
127
  if iterator_args_expr is not None else None
128
128
  for col in columns:
129
129
  if col.value_expr is not None:
130
- col.value_expr = col.value_expr.retarget(base_version_path)
130
+ col.set_value_expr(col.value_expr.retarget(base_version_path))
131
131
 
132
132
  view_md = md_schema.ViewMd(
133
133
  is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
@@ -148,7 +148,7 @@ class View(Table):
148
148
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
149
149
 
150
150
  from pixeltable.plan import Planner
151
- plan, num_values_per_row = Planner.create_view_load_plan(view.tbl_version_path)
151
+ plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
152
152
  num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
153
153
  plan, session.connection(), v_min=tbl_version.version)
154
154
  print(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
@@ -161,11 +161,13 @@ class View(Table):
161
161
  return view
162
162
 
163
163
  @classmethod
164
- def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
164
+ def _verify_column(
165
+ cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
166
+ ) -> None:
165
167
  # make sure that columns are nullable or have a default
166
168
  if not col.col_type.nullable and not col.is_computed:
167
169
  raise Error(f'Column {col.name}: non-computed columns in views must be nullable')
168
- super()._verify_column(col, existing_column_names)
170
+ super()._verify_column(col, existing_column_names, existing_query_names)
169
171
 
170
172
  @classmethod
171
173
  def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
@@ -201,3 +203,11 @@ class View(Table):
201
203
  cat.tbl_dependents[self._base._id].remove(self)
202
204
  del cat.tbl_dependents[self._id]
203
205
 
206
+ def insert(
207
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
208
+ fail_on_exception: bool = True, **kwargs: Any
209
+ ) -> UpdateStatus:
210
+ raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
211
+
212
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
213
+ raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')