pixeltable 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (76) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +28 -16
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +298 -204
  10. pixeltable/catalog/table_version.py +104 -139
  11. pixeltable/catalog/table_version_path.py +22 -4
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +128 -25
  14. pixeltable/env.py +21 -14
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +78 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +50 -22
  50. pixeltable/{datatransfer → io}/label_studio.py +279 -166
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +40 -25
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +11 -8
  62. pixeltable/metadata/schema.py +3 -6
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +145 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/METADATA +2 -2
  70. pixeltable-0.2.9.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/remote.py +0 -113
  73. pixeltable/functions/pil/image.py +0 -147
  74. pixeltable-0.2.7.dist-info/RECORD +0 -126
  75. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/LICENSE +0 -0
  76. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/WHEEL +0 -0
@@ -89,8 +89,6 @@ class TableVersion:
89
89
  self.next_idx_id = tbl_md.next_idx_id
90
90
  self.next_rowid = tbl_md.next_row_id
91
91
 
92
- self.remotes = dict(TableVersion._init_remote(remote_md) for remote_md in tbl_md.remotes)
93
-
94
92
  # view-specific initialization
95
93
  from pixeltable import exprs
96
94
  predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
@@ -124,8 +122,13 @@ class TableVersion:
124
122
  self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
125
123
  self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
126
124
  self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
125
+ self.external_stores: dict[str, pixeltable.io.ExternalStore] = {}
126
+
127
127
  self._init_schema(tbl_md, schema_version_md)
128
128
 
129
+ # Init external stores (this needs to happen after the schema is created)
130
+ self._init_external_stores(tbl_md)
131
+
129
132
  def __hash__(self) -> int:
130
133
  return hash(self.id)
131
134
 
@@ -160,7 +163,7 @@ class TableVersion:
160
163
  column_md = cls._create_column_md(cols)
161
164
  table_md = schema.TableMd(
162
165
  name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
163
- next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, remotes=[], view_md=view_md)
166
+ next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, external_stores=[], view_md=view_md)
164
167
  # create a schema.Table here, we need it to call our c'tor;
165
168
  # don't add it to the session yet, we might add index metadata
166
169
  tbl_id = uuid.uuid4()
@@ -240,6 +243,8 @@ class TableVersion:
240
243
  def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
241
244
  """Initialize self.cols with the columns visible in our effective version"""
242
245
  import pixeltable.exprs as exprs
246
+ from pixeltable.catalog import Catalog
247
+
243
248
  self.cols = []
244
249
  self.cols_by_name = {}
245
250
  self.cols_by_id = {}
@@ -248,7 +253,8 @@ class TableVersion:
248
253
  col = Column(
249
254
  col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
250
255
  is_pk=col_md.is_pk, stored=col_md.stored,
251
- schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop)
256
+ schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
257
+ value_expr_dict=col_md.value_expr)
252
258
  col.tbl = self
253
259
  self.cols.append(col)
254
260
 
@@ -266,18 +272,8 @@ class TableVersion:
266
272
  # make sure to traverse columns ordered by position = order in which cols were created;
267
273
  # this guarantees that references always point backwards
268
274
  if col_md.value_expr is not None:
269
- col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
270
- self._record_value_expr(col)
271
-
272
- # if this is a stored proxy column, resolve the relationships with its proxy base.
273
- if col_md.proxy_base is not None:
274
- # proxy_base must have a strictly smaller id, so we must already have encountered it
275
- # in traversal order; and if the proxy column is active at this version, then the
276
- # proxy base must necessarily be active as well. This motivates the following assertion.
277
- assert col_md.proxy_base in self.cols_by_id
278
- base_col = self.cols_by_id[col_md.proxy_base]
279
- base_col.stored_proxy = col
280
- col.proxy_base = base_col
275
+ refd_cols = exprs.Expr.get_refd_columns(col_md.value_expr)
276
+ self._record_refd_columns(col)
281
277
 
282
278
  def _init_idxs(self, tbl_md: schema.TableMd) -> None:
283
279
  self.idx_md = tbl_md.index_md
@@ -317,22 +313,30 @@ class TableVersion:
317
313
  self.store_tbl: StoreBase = StoreTable(self)
318
314
 
319
315
  def _update_md(
320
- self, timestamp: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection
316
+ self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
321
317
  ) -> None:
322
- """Update all recorded metadata in response to a data or schema change.
318
+ """Writes table metadata to the database.
319
+
323
320
  Args:
324
321
  timestamp: timestamp of the change
325
- preceding_schema_version: last schema version if schema change, else None
322
+ conn: database connection to use
323
+ update_tbl_version: if `True`, will also write `TableVersion` metadata
324
+ preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
325
+ specified preceding schema version
326
326
  """
327
+ assert update_tbl_version or preceding_schema_version is None
328
+
327
329
  conn.execute(
328
330
  sql.update(schema.Table.__table__)
329
331
  .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
330
332
  .where(schema.Table.id == self.id))
331
333
 
332
- version_md = self._create_version_md(timestamp)
333
- conn.execute(
334
- sql.insert(schema.TableVersion.__table__)
335
- .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
334
+ if update_tbl_version:
335
+ version_md = self._create_version_md(timestamp)
336
+ conn.execute(
337
+ sql.insert(schema.TableVersion.__table__)
338
+ .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
339
+
336
340
  if preceding_schema_version is not None:
337
341
  schema_version_md = self._create_schema_version_md(preceding_schema_version)
338
342
  conn.execute(
@@ -352,7 +356,7 @@ class TableVersion:
352
356
  self.schema_version = self.version
353
357
  with Env.get().engine.begin() as conn:
354
358
  status = self._add_index(col, idx_name, idx, conn)
355
- self._update_md(time.time(), preceding_schema_version, conn)
359
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
356
360
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
357
361
  return status
358
362
 
@@ -435,7 +439,7 @@ class TableVersion:
435
439
 
436
440
  with Env.get().engine.begin() as conn:
437
441
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
438
- self._update_md(time.time(), preceding_schema_version, conn)
442
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
439
443
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
440
444
 
441
445
  def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
@@ -461,7 +465,7 @@ class TableVersion:
461
465
  status = self._add_columns([col], conn, print_stats=print_stats)
462
466
  _ = self._add_default_index(col, conn)
463
467
  # TODO: what to do about errors?
464
- self._update_md(time.time(), preceding_schema_version, conn)
468
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
465
469
  _logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
466
470
 
467
471
  msg = (
@@ -472,8 +476,9 @@ class TableVersion:
472
476
  _logger.info(f'Column {col.name}: {msg}')
473
477
  return status
474
478
 
475
- def _add_columns(self, cols: List[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
479
+ def _add_columns(self, cols: Iterable[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
476
480
  """Add and populate columns within the current transaction"""
481
+ cols = list(cols)
477
482
  row_count = self.store_tbl.count(conn=conn)
478
483
  for col in cols:
479
484
  if not col.col_type.nullable and not col.is_computed:
@@ -493,7 +498,7 @@ class TableVersion:
493
498
  self.cols_by_id[col.id] = col
494
499
  if col.value_expr is not None:
495
500
  col.check_value_expr()
496
- self._record_value_expr(col)
501
+ self._record_refd_columns(col)
497
502
 
498
503
  if col.is_stored:
499
504
  self.store_tbl.add_column(col, conn)
@@ -507,7 +512,7 @@ class TableVersion:
507
512
  plan.ctx.num_rows = row_count
508
513
 
509
514
  try:
510
- plan.ctx.conn = conn
515
+ plan.ctx.set_conn(conn)
511
516
  plan.open()
512
517
  num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
513
518
  if num_excs > 0:
@@ -537,6 +542,8 @@ class TableVersion:
537
542
  def drop_column(self, name: str) -> None:
538
543
  """Drop a column from the table.
539
544
  """
545
+ from pixeltable.catalog import Catalog
546
+
540
547
  assert not self.is_snapshot
541
548
  if name not in self.cols_by_name:
542
549
  raise excs.Error(f'Unknown column: {name}')
@@ -547,13 +554,24 @@ class TableVersion:
547
554
  f'Cannot drop column `{name}` because the following columns depend on it:\n'
548
555
  f'{", ".join(c.name for c in dependent_user_cols)}'
549
556
  )
550
- dependent_remotes = [remote for remote, col_mapping in self.remotes.items() if name in col_mapping]
551
- if len(dependent_remotes) > 0:
557
+ # See if this column has a dependent store. We need to look through all stores in all
558
+ # (transitive) views of this table.
559
+ transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
560
+ dependent_stores = [
561
+ (view, store)
562
+ for view in transitive_views
563
+ for store in view._tbl_version.external_stores.values()
564
+ if col in store.get_local_columns()
565
+ ]
566
+ if len(dependent_stores) > 0:
567
+ dependent_store_names = [
568
+ store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
569
+ for view, store in dependent_stores
570
+ ]
552
571
  raise excs.Error(
553
- f'Cannot drop column `{name}` because the following remotes depend on it:\n'
554
- f'{", ".join(str(r) for r in dependent_remotes)}'
572
+ f'Cannot drop column `{name}` because the following external stores depend on it:\n'
573
+ f'{", ".join(dependent_store_names)}'
555
574
  )
556
- assert col.stored_proxy is None # since there are no dependent remotes
557
575
 
558
576
  # we're creating a new schema version
559
577
  self.version += 1
@@ -576,10 +594,10 @@ class TableVersion:
576
594
  for idx_name in dropped_idx_names:
577
595
  del self.idxs_by_name[idx_name]
578
596
  self._drop_columns(dropped_cols)
579
- self._update_md(time.time(), preceding_schema_version, conn)
597
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
580
598
  _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
581
599
 
582
- def _drop_columns(self, cols: list[Column]) -> None:
600
+ def _drop_columns(self, cols: Iterable[Column]) -> None:
583
601
  """Mark columns as dropped"""
584
602
  assert not self.is_snapshot
585
603
 
@@ -621,7 +639,7 @@ class TableVersion:
621
639
  self.schema_version = self.version
622
640
 
623
641
  with Env.get().engine.begin() as conn:
624
- self._update_md(time.time(), preceding_schema_version, conn)
642
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
625
643
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
626
644
 
627
645
  def set_comment(self, new_comment: Optional[str]):
@@ -640,7 +658,7 @@ class TableVersion:
640
658
  preceding_schema_version = self.schema_version
641
659
  self.schema_version = self.version
642
660
  with Env.get().engine.begin() as conn:
643
- self._update_md(time.time(), preceding_schema_version, conn)
661
+ self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
644
662
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
645
663
 
646
664
  def insert(
@@ -667,7 +685,7 @@ class TableVersion:
667
685
  result.num_excs = num_excs
668
686
  result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
669
687
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
670
- self._update_md(timestamp, None, conn)
688
+ self._update_md(timestamp, conn)
671
689
 
672
690
  # update views
673
691
  for view in self.mutable_views:
@@ -781,7 +799,7 @@ class TableVersion:
781
799
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
782
800
  self.store_tbl.delete_rows(
783
801
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
784
- self._update_md(timestamp, None, conn)
802
+ self._update_md(timestamp, conn)
785
803
 
786
804
  if cascade:
787
805
  base_versions = [None if plan is None else self.version] + base_versions # don't update in place
@@ -831,7 +849,7 @@ class TableVersion:
831
849
  if num_rows > 0:
832
850
  # we're creating a new version
833
851
  self.version += 1
834
- self._update_md(timestamp, None, conn)
852
+ self._update_md(timestamp, conn)
835
853
  else:
836
854
  pass
837
855
  for view in self.mutable_views:
@@ -962,93 +980,29 @@ class TableVersion:
962
980
  view._revert(session)
963
981
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
964
982
 
965
- @classmethod
966
- def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
967
- remote_cls = resolve_symbol(remote_md['class'])
968
- assert isinstance(remote_cls, type) and issubclass(remote_cls, pixeltable.datatransfer.Remote)
969
- remote = remote_cls.from_dict(remote_md['remote_md'])
970
- col_mapping = remote_md['col_mapping']
971
- return remote, col_mapping
972
-
973
- def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
974
- # All of the media columns being linked need to either be stored, computed columns or have stored proxies.
975
- # This ensures that the media in those columns resides in the media cache, where it can be served.
976
- # First determine which columns (if any) need stored proxies, but don't have one yet.
977
- cols_by_name = self.path.cols_by_name() # Includes base columns
978
- stored_proxies_needed = []
979
- for col_name in col_mapping.keys():
980
- col = cols_by_name[col_name]
981
- if col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy:
982
- stored_proxies_needed.append(col)
983
- with Env.get().engine.begin() as conn:
984
- self.version += 1
985
- self.remotes[remote] = col_mapping
986
- preceding_schema_version = None
987
- if len(stored_proxies_needed) > 0:
988
- _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
989
- # Create stored proxies for columns that need one. Increment the schema version
990
- # accordingly.
991
- preceding_schema_version = self.schema_version
992
- self.schema_version = self.version
993
- proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
994
- # Add the columns; this will also update table metadata.
995
- # TODO Add to base tables
996
- self._add_columns(proxy_cols, conn)
997
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
998
- # invisible to the user.
999
- self._update_md(time.time(), preceding_schema_version, conn)
1000
-
1001
- def create_stored_proxy(self, col: Column) -> Column:
1002
- from pixeltable import exprs
983
+ def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
984
+ for store_md in tbl_md.external_stores:
985
+ store_cls = resolve_symbol(store_md['class'])
986
+ assert isinstance(store_cls, type) and issubclass(store_cls, pixeltable.io.ExternalStore)
987
+ store = store_cls.from_dict(store_md['md'])
988
+ self.external_stores[store.name] = store
1003
989
 
1004
- assert col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy
1005
- proxy_col = Column(
1006
- name=None,
1007
- computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
1008
- stored=True,
1009
- col_id=self.next_col_id,
1010
- sa_col_type=col.col_type.to_sa_type(),
1011
- schema_version_add=self.schema_version
1012
- )
1013
- proxy_col.tbl = self
1014
- self.next_col_id += 1
1015
- col.stored_proxy = proxy_col
1016
- proxy_col.proxy_base = col
1017
- return proxy_col
990
+ def link_external_store(self, store: pixeltable.io.ExternalStore) -> None:
991
+ with Env.get().engine.begin() as conn:
992
+ store.link(self, conn) # May result in additional metadata changes
993
+ self.external_stores[store.name] = store
994
+ self._update_md(time.time(), conn, update_tbl_version=False)
1018
995
 
1019
- def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
1020
- assert remote in self.remotes
1021
- timestamp = time.time()
1022
- this_remote_col_names = list(self.remotes[remote].keys())
1023
- other_remote_col_names = {
1024
- col_name
1025
- for other_remote, col_mapping in self.remotes.items() if other_remote != remote
1026
- for col_name in col_mapping.keys()
1027
- }
1028
- cols_by_name = self.path.cols_by_name() # Includes base columns
1029
- stored_proxy_deletions_needed = [
1030
- cols_by_name[col_name]
1031
- for col_name in this_remote_col_names
1032
- if col_name not in other_remote_col_names and cols_by_name[col_name].stored_proxy
1033
- ]
996
+ def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
997
+ assert store_name in self.external_stores
998
+ store = self.external_stores[store_name]
1034
999
  with Env.get().engine.begin() as conn:
1035
- self.version += 1
1036
- del self.remotes[remote]
1037
- preceding_schema_version = None
1038
- if len(stored_proxy_deletions_needed) > 0:
1039
- preceding_schema_version = self.schema_version
1040
- self.schema_version = self.version
1041
- proxy_cols = [col.stored_proxy for col in stored_proxy_deletions_needed]
1042
- for col in stored_proxy_deletions_needed:
1043
- assert col.stored_proxy is not None and col.stored_proxy.proxy_base == col
1044
- col.stored_proxy.proxy_base = None
1045
- col.stored_proxy = None
1046
- # TODO Drop from base tables
1047
- self._drop_columns(proxy_cols)
1048
- self._update_md(timestamp, preceding_schema_version, conn)
1049
-
1050
- def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
1051
- return self.remotes
1000
+ store.unlink(self, conn) # May result in additional metadata changes
1001
+ del self.external_stores[store_name]
1002
+ self._update_md(time.time(), conn, update_tbl_version=False)
1003
+
1004
+ if delete_external_data and isinstance(store, pixeltable.io.external_store.Project):
1005
+ store.delete()
1052
1006
 
1053
1007
  def is_view(self) -> bool:
1054
1008
  return self.base is not None
@@ -1110,14 +1064,17 @@ class TableVersion:
1110
1064
  args.append(exprs.ColumnRef(param))
1111
1065
  fn = func.make_function(
1112
1066
  col.compute_func, return_type=col.col_type, param_types=[arg.col_type for arg in args])
1113
- col.value_expr = fn(*args)
1067
+ col.set_value_expr(fn(*args))
1114
1068
 
1115
- def _record_value_expr(self, col: Column) -> None:
1069
+ def _record_refd_columns(self, col: Column) -> None:
1116
1070
  """Update Column.dependent_cols for all cols referenced in col.value_expr.
1117
1071
  """
1118
- assert col.value_expr is not None
1119
- from pixeltable.exprs import ColumnRef
1120
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=ColumnRef)]
1072
+ import pixeltable.exprs as exprs
1073
+ if col.value_expr_dict is not None:
1074
+ # if we have a value_expr_dict, use that instead of instantiating the value_expr
1075
+ refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1076
+ else:
1077
+ refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1121
1078
  for refd_col in refd_cols:
1122
1079
  refd_col.dependent_cols.add(col)
1123
1080
 
@@ -1145,25 +1102,23 @@ class TableVersion:
1145
1102
 
1146
1103
  @classmethod
1147
1104
  def _create_column_md(cls, cols: List[Column]) -> dict[int, schema.ColumnMd]:
1148
- column_md: Dict[int, schema.ColumnMd] = {}
1105
+ column_md: dict[int, schema.ColumnMd] = {}
1149
1106
  for col in cols:
1150
1107
  value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1151
1108
  column_md[col.id] = schema.ColumnMd(
1152
1109
  id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
1153
1110
  schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
1154
- value_expr=value_expr_dict, stored=col.stored,
1155
- proxy_base=col.proxy_base.id if col.proxy_base else None)
1111
+ value_expr=value_expr_dict, stored=col.stored)
1156
1112
  return column_md
1157
1113
 
1158
1114
  @classmethod
1159
- def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
1115
+ def _create_stores_md(cls, stores: Iterable['pixeltable.io.ExternalStore']) -> list[dict[str, Any]]:
1160
1116
  return [
1161
1117
  {
1162
- 'class': f'{type(remote).__module__}.{type(remote).__qualname__}',
1163
- 'remote_md': remote.to_dict(),
1164
- 'col_mapping': col_mapping
1118
+ 'class': f'{type(store).__module__}.{type(store).__qualname__}',
1119
+ 'md': store.as_dict()
1165
1120
  }
1166
- for remote, col_mapping in remotes.items()
1121
+ for store in stores
1167
1122
  ]
1168
1123
 
1169
1124
  def _create_tbl_md(self) -> schema.TableMd:
@@ -1171,7 +1126,7 @@ class TableVersion:
1171
1126
  name=self.name, current_version=self.version, current_schema_version=self.schema_version,
1172
1127
  next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
1173
1128
  column_md=self._create_column_md(self.cols), index_md=self.idx_md,
1174
- remotes=self._create_remotes_md(self.remotes), view_md=self.view_md)
1129
+ external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md)
1175
1130
 
1176
1131
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1177
1132
  return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
@@ -1184,3 +1139,13 @@ class TableVersion:
1184
1139
  return schema.TableSchemaVersionMd(
1185
1140
  schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
1186
1141
  columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)
1142
+
1143
+ def as_dict(self) -> dict:
1144
+ return {'id': str(self.id), 'effective_version': self.effective_version}
1145
+
1146
+ @classmethod
1147
+ def from_dict(cls, d: dict) -> 'TableVersion':
1148
+ import pixeltable.catalog as catalog
1149
+ id = UUID(d['id'])
1150
+ effective_version = d['effective_version']
1151
+ return catalog.Catalog.get().tbl_versions[(id, effective_version)]
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional, List, Union
4
+ from typing import Optional, Union
5
5
  from uuid import UUID
6
6
 
7
7
  import pixeltable
8
+ import pixeltable.catalog as catalog
8
9
  from .column import Column
9
10
  from .globals import POS_COLUMN_NAME
10
11
  from .table_version import TableVersion
@@ -59,13 +60,13 @@ class TableVersionPath:
59
60
  def is_insertable(self) -> bool:
60
61
  return self.tbl_version.is_insertable()
61
62
 
62
- def get_tbl_versions(self) -> List[TableVersion]:
63
+ def get_tbl_versions(self) -> list[TableVersion]:
63
64
  """Return all tbl versions"""
64
65
  if self.base is None:
65
66
  return [self.tbl_version]
66
67
  return [self.tbl_version] + self.base.get_tbl_versions()
67
68
 
68
- def get_bases(self) -> List[TableVersion]:
69
+ def get_bases(self) -> list[TableVersion]:
69
70
  """Return all tbl versions"""
70
71
  if self.base is None:
71
72
  return []
@@ -100,7 +101,7 @@ class TableVersionPath:
100
101
  from pixeltable.dataframe import DataFrame
101
102
  return DataFrame(self).__getitem__(index)
102
103
 
103
- def columns(self) -> List[Column]:
104
+ def columns(self) -> list[Column]:
104
105
  """Return all user columns visible in this tbl version path, including columns from bases"""
105
106
  result = list(self.tbl_version.cols_by_name.values())
106
107
  if self.base is not None:
@@ -114,6 +115,11 @@ class TableVersionPath:
114
115
  cols = self.columns()
115
116
  return {col.name: col for col in cols}
116
117
 
118
+ def cols_by_id(self) -> dict[int, Column]:
119
+ """Return a dict of all user columns visible in this tbl version path, including columns from bases"""
120
+ cols = self.columns()
121
+ return {col.id: col for col in cols}
122
+
117
123
  def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
118
124
  """Return the column with the given name, or None if not found"""
119
125
  col = self.tbl_version.cols_by_name.get(name)
@@ -136,3 +142,15 @@ class TableVersionPath:
136
142
  return self.base.has_column(col)
137
143
  else:
138
144
  return False
145
+
146
+ def as_dict(self) -> dict:
147
+ return {
148
+ 'tbl_version': self.tbl_version.as_dict(),
149
+ 'base': self.base.as_dict() if self.base is not None else None
150
+ }
151
+
152
+ @classmethod
153
+ def from_dict(cls, d: dict) -> TableVersionPath:
154
+ tbl_version = TableVersion.from_dict(d['tbl_version'])
155
+ base = TableVersionPath.from_dict(d['base']) if d['base'] is not None else None
156
+ return cls(tbl_version, base)
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
  import logging
3
- from typing import List, Optional, Type, Dict, Set, Any
3
+ from typing import List, Optional, Type, Dict, Set, Any, Iterable
4
4
  from uuid import UUID
5
5
  import inspect
6
6
 
@@ -11,7 +11,7 @@ from .table_version import TableVersion
11
11
  from .table_version_path import TableVersionPath
12
12
  from .column import Column
13
13
  from .catalog import Catalog
14
- from .globals import POS_COLUMN_NAME
14
+ from .globals import POS_COLUMN_NAME, UpdateStatus
15
15
  from pixeltable.env import Env
16
16
  from pixeltable.iterators import ComponentIterator
17
17
  from pixeltable.exceptions import Error
@@ -55,7 +55,7 @@ class View(Table):
55
55
 
56
56
  # verify that filter can be evaluated in the context of the base
57
57
  if predicate is not None:
58
- if not predicate.is_bound_by(base.tbl_version_path):
58
+ if not predicate.is_bound_by(base._tbl_version_path):
59
59
  raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
60
60
  # create a copy that we can modify and store
61
61
  predicate = predicate.copy()
@@ -65,7 +65,7 @@ class View(Table):
65
65
  if not col.is_computed:
66
66
  continue
67
67
  # make sure that the value can be computed in the context of the base
68
- if col.value_expr is not None and not col.value_expr.is_bound_by(base.tbl_version_path):
68
+ if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
69
69
  raise excs.Error(
70
70
  f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
71
71
 
@@ -83,7 +83,7 @@ class View(Table):
83
83
 
84
84
  # construct Signature and type-check bound_args
85
85
  params = [
86
- func.Parameter(param_name, param_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
86
+ func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
87
87
  for param_name, param_type in iterator_cls.input_schema().items()
88
88
  ]
89
89
  sig = func.Signature(InvalidType(), params)
@@ -114,7 +114,7 @@ class View(Table):
114
114
  iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
115
115
  iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
116
116
  else None
117
- base_version_path = cls._get_snapshot_path(base.tbl_version_path) if is_snapshot else base.tbl_version_path
117
+ base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
118
118
  base_versions = [
119
119
  (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
120
120
  for tbl_version in base_version_path.get_tbl_versions()
@@ -127,7 +127,7 @@ class View(Table):
127
127
  if iterator_args_expr is not None else None
128
128
  for col in columns:
129
129
  if col.value_expr is not None:
130
- col.value_expr = col.value_expr.retarget(base_version_path)
130
+ col.set_value_expr(col.value_expr.retarget(base_version_path))
131
131
 
132
132
  view_md = md_schema.ViewMd(
133
133
  is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
@@ -148,7 +148,7 @@ class View(Table):
148
148
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
149
149
 
150
150
  from pixeltable.plan import Planner
151
- plan, num_values_per_row = Planner.create_view_load_plan(view.tbl_version_path)
151
+ plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
152
152
  num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
153
153
  plan, session.connection(), v_min=tbl_version.version)
154
154
  print(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
@@ -161,11 +161,13 @@ class View(Table):
161
161
  return view
162
162
 
163
163
  @classmethod
164
- def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
164
+ def _verify_column(
165
+ cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
166
+ ) -> None:
165
167
  # make sure that columns are nullable or have a default
166
168
  if not col.col_type.nullable and not col.is_computed:
167
169
  raise Error(f'Column {col.name}: non-computed columns in views must be nullable')
168
- super()._verify_column(col, existing_column_names)
170
+ super()._verify_column(col, existing_column_names, existing_query_names)
169
171
 
170
172
  @classmethod
171
173
  def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
@@ -201,3 +203,11 @@ class View(Table):
201
203
  cat.tbl_dependents[self._base._id].remove(self)
202
204
  del cat.tbl_dependents[self._id]
203
205
 
206
+ def insert(
207
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
208
+ fail_on_exception: bool = True, **kwargs: Any
209
+ ) -> UpdateStatus:
210
+ raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
211
+
212
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
213
+ raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')