pixeltable 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/__init__.py CHANGED
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Function, udf, Aggregator, uda, expr_udf
6
- from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, rm_dir, \
7
- list_dirs, list_functions, get_path, configure_logging
6
+ from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
7
+ list_dirs, list_functions, configure_logging
8
8
  from .type_system import (
9
9
  ColumnType,
10
10
  StringType,
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.10"
3
- __version_tuple__ = (0, 2, 10)
2
+ __version__ = "0.2.12"
3
+ __version_tuple__ = (0, 2, 12)
@@ -120,7 +120,7 @@ class Catalog:
120
120
  base = base_version
121
121
  assert base_path is not None
122
122
 
123
- base_tbl = self.tbls[base_path.tbl_version.id]
123
+ base_tbl_id = base_path.tbl_id()
124
124
  is_snapshot = view_md is not None and view_md.is_snapshot
125
125
  snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
126
126
  if snapshot_only:
@@ -134,9 +134,9 @@ class Catalog:
134
134
  view_path = TableVersionPath(tbl_version, base=base_path)
135
135
 
136
136
  tbl = View(
137
- tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl,
137
+ tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
138
138
  snapshot_only=snapshot_only)
139
- self.tbl_dependents[base_tbl._id].append(tbl)
139
+ self.tbl_dependents[base_tbl_id].append(tbl)
140
140
 
141
141
  else:
142
142
  tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
@@ -7,6 +7,8 @@ _logger = logging.getLogger('pixeltable')
7
7
 
8
8
  # name of the position column in a component view
9
9
  POS_COLUMN_NAME = 'pos'
10
+ _ROWID_COLUMN_NAME = '_rowid'
11
+
10
12
 
11
13
  @dataclasses.dataclass
12
14
  class UpdateStatus:
@@ -144,14 +144,4 @@ class InsertableTable(Table):
144
144
 
145
145
  >>> tbl.delete(tbl.a > 5)
146
146
  """
147
- from pixeltable.exprs import Predicate
148
- from pixeltable.plan import Planner
149
- if where is not None:
150
- if not isinstance(where, Predicate):
151
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
152
- analysis_info = Planner.analyze(self._tbl_version_path, where)
153
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
154
- if analysis_info.filter is not None:
155
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
156
-
157
- return self.tbl_version.delete(where)
147
+ return self._tbl_version.delete(where=where)
@@ -1,7 +1,10 @@
1
1
  from abc import abstractmethod
2
- from typing import Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
  from uuid import UUID
4
4
 
5
+ if TYPE_CHECKING:
6
+ from pixeltable import catalog
7
+
5
8
 
6
9
  class SchemaObject:
7
10
  """
@@ -17,9 +20,32 @@ class SchemaObject:
17
20
  def _get_id(self) -> UUID:
18
21
  return self._id
19
22
 
20
- def get_name(self) -> str:
23
+ @property
24
+ def name(self) -> str:
25
+ """Returns the name of this schema object."""
21
26
  return self._name
22
27
 
28
+ @property
29
+ def parent(self) -> Optional['catalog.Dir']:
30
+ """Returns the parent directory of this schema object."""
31
+ from pixeltable import catalog
32
+ if self._dir_id is None:
33
+ return None
34
+ dir = catalog.Catalog.get().paths.get_schema_obj(self._dir_id)
35
+ assert isinstance(dir, catalog.Dir)
36
+ return dir
37
+
38
+ @property
39
+ def path(self) -> str:
40
+ """Returns the path to this schema object."""
41
+ parent = self.parent
42
+ if parent is None or parent.parent is None:
43
+ # Either this is the root directory, with empty path, or its parent is the
44
+ # root directory. Either way, we return just the name.
45
+ return self.name
46
+ else:
47
+ return f'{parent.path}.{self.name}'
48
+
23
49
  @classmethod
24
50
  @abstractmethod
25
51
  def display_name(cls) -> str:
@@ -19,7 +19,7 @@ import pixeltable.index as index
19
19
  import pixeltable.metadata.schema as schema
20
20
  import pixeltable.type_system as ts
21
21
  from .column import Column
22
- from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
22
+ from .globals import _ROWID_COLUMN_NAME, is_valid_identifier, is_system_column_name, UpdateStatus
23
23
  from .schema_object import SchemaObject
24
24
  from .table_version import TableVersion
25
25
  from .table_version_path import TableVersionPath
@@ -29,8 +29,6 @@ _logger = logging.getLogger('pixeltable')
29
29
  class Table(SchemaObject):
30
30
  """Base class for all tabular SchemaObjects."""
31
31
 
32
- __ROWID_COLUMN_NAME = '_rowid'
33
-
34
32
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
35
33
  super().__init__(id, name, dir_id)
36
34
  self._is_dropped = False
@@ -84,16 +82,24 @@ class Table(SchemaObject):
84
82
  return self._queries[index]
85
83
  return self._tbl_version_path.__getitem__(index)
86
84
 
87
- def get_views(self, *, recursive: bool = False) -> list['Table']:
85
+ def list_views(self, *, recursive: bool = True) -> list[str]:
88
86
  """
89
- All views and snapshots of this `Table`.
87
+ Returns a list of all views and snapshots of this `Table`.
88
+
89
+ Args:
90
+ recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
91
+ all sub-views (including views of views, etc.)
90
92
  """
93
+ return [t.path for t in self._get_views(recursive=recursive)]
94
+
95
+ def _get_views(self, *, recursive: bool = True) -> list['Table']:
96
+ dependents = catalog.Catalog.get().tbl_dependents[self._get_id()]
91
97
  if recursive:
92
- return [self] + [t for view in self.get_views(recursive=False) for t in view.get_views(recursive=True)]
98
+ return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
93
99
  else:
94
- return catalog.Catalog.get().tbl_dependents[self._get_id()]
100
+ return dependents
95
101
 
96
- def df(self) -> 'pixeltable.dataframe.DataFrame':
102
+ def _df(self) -> 'pixeltable.dataframe.DataFrame':
97
103
  """Return a DataFrame for this table.
98
104
  """
99
105
  # local import: avoid circular imports
@@ -132,30 +138,30 @@ class Table(SchemaObject):
132
138
 
133
139
  def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
134
140
  """Return rows from this table."""
135
- return self.df().collect()
141
+ return self._df().collect()
136
142
 
137
143
  def show(
138
144
  self, *args, **kwargs
139
145
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
140
146
  """Return rows from this table.
141
147
  """
142
- return self.df().show(*args, **kwargs)
148
+ return self._df().show(*args, **kwargs)
143
149
 
144
150
  def head(
145
151
  self, *args, **kwargs
146
152
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
147
153
  """Return the first n rows inserted into this table."""
148
- return self.df().head(*args, **kwargs)
154
+ return self._df().head(*args, **kwargs)
149
155
 
150
156
  def tail(
151
157
  self, *args, **kwargs
152
158
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
153
159
  """Return the last n rows inserted into this table."""
154
- return self.df().tail(*args, **kwargs)
160
+ return self._df().tail(*args, **kwargs)
155
161
 
156
162
  def count(self) -> int:
157
163
  """Return the number of rows in this table."""
158
- return self.df().count()
164
+ return self._df().count()
159
165
 
160
166
  def column_names(self) -> list[str]:
161
167
  """Return the names of the columns in this table."""
@@ -502,7 +508,37 @@ class Table(SchemaObject):
502
508
  >>> tbl.drop_column('factorial')
503
509
  """
504
510
  self._check_is_dropped()
505
- self._tbl_version.drop_column(name)
511
+
512
+ if name not in self._tbl_version.cols_by_name:
513
+ raise excs.Error(f'Unknown column: {name}')
514
+ col = self._tbl_version.cols_by_name[name]
515
+
516
+ dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
517
+ if len(dependent_user_cols) > 0:
518
+ raise excs.Error(
519
+ f'Cannot drop column `{name}` because the following columns depend on it:\n'
520
+ f'{", ".join(c.name for c in dependent_user_cols)}'
521
+ )
522
+
523
+ # See if this column has a dependent store. We need to look through all stores in all
524
+ # (transitive) views of this table.
525
+ dependent_stores = [
526
+ (view, store)
527
+ for view in [self] + self._get_views(recursive=True)
528
+ for store in view._tbl_version.external_stores.values()
529
+ if col in store.get_local_columns()
530
+ ]
531
+ if len(dependent_stores) > 0:
532
+ dependent_store_names = [
533
+ store.name if view._get_id() == self._get_id() else f'{store.name} (in view `{view.name}`)'
534
+ for view, store in dependent_stores
535
+ ]
536
+ raise excs.Error(
537
+ f'Cannot drop column `{name}` because the following external stores depend on it:\n'
538
+ f'{", ".join(dependent_store_names)}'
539
+ )
540
+
541
+ self._tbl_version.drop_column(col)
506
542
 
507
543
  def rename_column(self, old_name: str, new_name: str) -> None:
508
544
  """Rename a column.
@@ -524,15 +560,15 @@ class Table(SchemaObject):
524
560
 
525
561
  def add_embedding_index(
526
562
  self, col_name: str, *, idx_name: Optional[str] = None,
527
- text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
563
+ string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
528
564
  metric: str = 'cosine'
529
565
  ) -> None:
530
566
  """Add an index to the table.
531
567
  Args:
532
568
  col_name: name of column to index
533
569
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
534
- text_embed: function to embed text; required if the column is a text column
535
- img_embed: function to embed images; required if the column is an image column
570
+ string_embed: function to embed text; required if the column is a text column
571
+ image_embed: function to embed images; required if the column is an image column
536
572
  metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
537
573
 
538
574
  Raises:
@@ -541,13 +577,13 @@ class Table(SchemaObject):
541
577
  Examples:
542
578
  Add an index to the ``img`` column:
543
579
 
544
- >>> tbl.add_embedding_index('img', img_embed=...)
580
+ >>> tbl.add_embedding_index('img', image_embed=...)
545
581
 
546
582
  Add another index to the ``img`` column, using the inner product as the distance metric,
547
- and with a specific name; ``text_embed`` is also specified in order to search with text:
583
+ and with a specific name; ``string_embed`` is also specified in order to search with text:
548
584
 
549
585
  >>> tbl.add_embedding_index(
550
- 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
586
+ 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
551
587
  """
552
588
  if self._tbl_version_path.is_snapshot():
553
589
  raise excs.Error('Cannot add an index to a snapshot')
@@ -559,7 +595,7 @@ class Table(SchemaObject):
559
595
  raise excs.Error(f'Duplicate index name: {idx_name}')
560
596
  from pixeltable.index import EmbeddingIndex
561
597
  # create the EmbeddingIndex instance to verify args
562
- idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
598
+ idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
563
599
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
564
600
  # TODO: how to deal with exceptions here? drop the index and raise?
565
601
 
@@ -606,26 +642,26 @@ class Table(SchemaObject):
606
642
  raise excs.Error('Cannot drop an index from a snapshot')
607
643
  self._check_is_dropped()
608
644
  if (column_name is None) == (idx_name is None):
609
- raise excs.Error('Exactly one of column_name or idx_name must be provided')
645
+ raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
610
646
 
611
647
  if idx_name is not None:
612
648
  if idx_name not in self._tbl_version.idxs_by_name:
613
- raise excs.Error(f'Index {idx_name} does not exist')
649
+ raise excs.Error(f'Index {idx_name!r} does not exist')
614
650
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
615
651
  else:
616
652
  col = self._tbl_version_path.get_column(column_name, include_bases=True)
617
653
  if col is None:
618
- raise excs.Error(f'Column {column_name} unknown')
654
+ raise excs.Error(f'Column {column_name!r} unknown')
619
655
  if col.tbl.id != self._tbl_version.id:
620
656
  raise excs.Error(
621
- f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
657
+ f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
622
658
  idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
623
659
  if _idx_class is not None:
624
660
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
625
661
  if len(idx_info) == 0:
626
- raise excs.Error(f'Column {column_name} does not have an index')
662
+ raise excs.Error(f'Column {column_name!r} does not have an index')
627
663
  if len(idx_info) > 1:
628
- raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
664
+ raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
629
665
  idx_id = idx_info[0].id
630
666
  self._tbl_version.drop_index(idx_id)
631
667
 
@@ -706,21 +742,8 @@ class Table(SchemaObject):
706
742
 
707
743
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
708
744
  """
709
- if self._tbl_version_path.is_snapshot():
710
- raise excs.Error('Cannot update a snapshot')
711
745
  self._check_is_dropped()
712
-
713
- update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
714
- from pixeltable.plan import Planner
715
- if where is not None:
716
- if not isinstance(where, exprs.Predicate):
717
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
718
- analysis_info = Planner.analyze(self._tbl_version_path, where)
719
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
720
- if analysis_info.filter is not None:
721
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
722
-
723
- return self._tbl_version.update(update_spec, where, cascade)
746
+ return self._tbl_version.update(value_spec, where, cascade)
724
747
 
725
748
  def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
726
749
  """Update rows in this table.
@@ -738,22 +761,23 @@ class Table(SchemaObject):
738
761
  if self._tbl_version_path.is_snapshot():
739
762
  raise excs.Error('Cannot update a snapshot')
740
763
  self._check_is_dropped()
764
+ rows = list(rows)
741
765
 
742
766
  row_updates: list[dict[Column, exprs.Expr]] = []
743
767
  pk_col_names = set(c.name for c in self._tbl_version.primary_key_columns())
744
768
 
745
769
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
746
- has_rowid = self.__ROWID_COLUMN_NAME in rows[0]
770
+ has_rowid = _ROWID_COLUMN_NAME in rows[0]
747
771
  rowids: list[Tuple[int, ...]] = []
748
772
  if len(pk_col_names) == 0 and not has_rowid:
749
773
  raise excs.Error('Table must have primary key for batch update')
750
774
 
751
775
  for row_spec in rows:
752
- col_vals = self._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
776
+ col_vals = self._tbl_version._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
753
777
  if has_rowid:
754
778
  # we expect the _rowid column to be present for each row
755
- assert self.__ROWID_COLUMN_NAME in row_spec
756
- rowids.append(row_spec[self.__ROWID_COLUMN_NAME])
779
+ assert _ROWID_COLUMN_NAME in row_spec
780
+ rowids.append(row_spec[_ROWID_COLUMN_NAME])
757
781
  else:
758
782
  col_names = set(col.name for col in col_vals.keys())
759
783
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
@@ -762,51 +786,6 @@ class Table(SchemaObject):
762
786
  row_updates.append(col_vals)
763
787
  return self._tbl_version.batch_update(row_updates, rowids, cascade)
764
788
 
765
- def _validate_update_spec(
766
- self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
767
- ) -> dict[Column, 'pixeltable.exprs.Expr']:
768
- from pixeltable import exprs
769
- update_targets: dict[Column, exprs.Expr] = {}
770
- for col_name, val in value_spec.items():
771
- if not isinstance(col_name, str):
772
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
773
- if col_name == self.__ROWID_COLUMN_NAME:
774
- # ignore pseudo-column _rowid
775
- continue
776
- col = self._tbl_version_path.get_column(col_name, include_bases=False)
777
- if col is None:
778
- # TODO: return more informative error if this is trying to update a base column
779
- raise excs.Error(f'Column {col_name} unknown')
780
- if col.is_computed:
781
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
782
- if col.is_pk and not allow_pk:
783
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
784
- if col.col_type.is_media_type():
785
- raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
786
-
787
- # make sure that the value is compatible with the column type
788
- try:
789
- # check if this is a literal
790
- value_expr = exprs.Literal(val, col_type=col.col_type)
791
- except TypeError:
792
- if not allow_exprs:
793
- raise excs.Error(
794
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
795
- f'(expected {col.col_type})')
796
- # it's not a literal, let's try to create an expr from it
797
- value_expr = exprs.Expr.from_object(val)
798
- if value_expr is None:
799
- raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
800
- if not col.col_type.matches(value_expr.col_type):
801
- raise excs.Error((
802
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
803
- f'{col_name} ({col.col_type})'
804
- ))
805
- update_targets[col] = value_expr
806
-
807
- return update_targets
808
-
809
- @abc.abstractmethod
810
789
  def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
811
790
  """Delete rows in this table.
812
791
 
@@ -882,13 +861,13 @@ class Table(SchemaObject):
882
861
  Links the specified `ExternalStore` to this table.
883
862
  """
884
863
  if self._tbl_version.is_snapshot:
885
- raise excs.Error(f'Table `{self.get_name()}` is a snapshot, so it cannot be linked to an external store.')
864
+ raise excs.Error(f'Table `{self.name}` is a snapshot, so it cannot be linked to an external store.')
886
865
  self._check_is_dropped()
887
866
  if store.name in self.external_stores:
888
- raise excs.Error(f'Table `{self.get_name()}` already has an external store with that name: {store.name}')
889
- _logger.info(f'Linking external store `{store.name}` to table `{self.get_name()}`')
867
+ raise excs.Error(f'Table `{self.name}` already has an external store with that name: {store.name}')
868
+ _logger.info(f'Linking external store `{store.name}` to table `{self.name}`')
890
869
  self._tbl_version.link_external_store(store)
891
- print(f'Linked external store `{store.name}` to table `{self.get_name()}`.')
870
+ print(f'Linked external store `{store.name}` to table `{self.name}`.')
892
871
 
893
872
  def unlink_external_stores(
894
873
  self,
@@ -920,11 +899,11 @@ class Table(SchemaObject):
920
899
  if not ignore_errors:
921
900
  for store in stores:
922
901
  if store not in all_stores:
923
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
902
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
924
903
 
925
904
  for store in stores:
926
905
  self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
927
- print(f'Unlinked external store from table `{self.get_name()}`: {store}')
906
+ print(f'Unlinked external store from table `{self.name}`: {store}')
928
907
 
929
908
  def sync(
930
909
  self,
@@ -952,7 +931,7 @@ class Table(SchemaObject):
952
931
 
953
932
  for store in stores:
954
933
  if store not in all_stores:
955
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
934
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
956
935
 
957
936
  from pixeltable.io import SyncStatus
958
937