pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (48) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/insertable_table.py +2 -2
  4. pixeltable/catalog/schema_object.py +28 -2
  5. pixeltable/catalog/table.py +68 -30
  6. pixeltable/catalog/table_version.py +14 -43
  7. pixeltable/catalog/view.py +2 -2
  8. pixeltable/dataframe.py +8 -7
  9. pixeltable/exec/expr_eval_node.py +8 -1
  10. pixeltable/exec/sql_scan_node.py +1 -1
  11. pixeltable/exprs/__init__.py +0 -1
  12. pixeltable/exprs/column_ref.py +2 -7
  13. pixeltable/exprs/comparison.py +5 -5
  14. pixeltable/exprs/compound_predicate.py +12 -12
  15. pixeltable/exprs/expr.py +32 -0
  16. pixeltable/exprs/in_predicate.py +3 -3
  17. pixeltable/exprs/is_null.py +5 -5
  18. pixeltable/exprs/similarity_expr.py +27 -16
  19. pixeltable/func/aggregate_function.py +10 -4
  20. pixeltable/func/callable_function.py +4 -0
  21. pixeltable/func/function_registry.py +2 -0
  22. pixeltable/functions/globals.py +36 -1
  23. pixeltable/functions/huggingface.py +62 -4
  24. pixeltable/functions/image.py +17 -0
  25. pixeltable/functions/openai.py +1 -1
  26. pixeltable/functions/string.py +622 -7
  27. pixeltable/functions/video.py +26 -8
  28. pixeltable/globals.py +54 -50
  29. pixeltable/index/embedding_index.py +28 -27
  30. pixeltable/io/external_store.py +2 -2
  31. pixeltable/io/globals.py +54 -5
  32. pixeltable/io/label_studio.py +45 -5
  33. pixeltable/io/pandas.py +18 -7
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_17.py +26 -0
  36. pixeltable/plan.py +6 -6
  37. pixeltable/tool/create_test_db_dump.py +2 -2
  38. pixeltable/tool/doc_plugins/griffe.py +77 -0
  39. pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
  40. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
  41. pixeltable/utils/s3.py +1 -1
  42. pixeltable-0.2.13.dist-info/METADATA +206 -0
  43. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
  44. pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
  45. pixeltable/exprs/predicate.py +0 -44
  46. pixeltable-0.2.11.dist-info/METADATA +0 -137
  47. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
  48. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/__init__.py CHANGED
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Function, udf, Aggregator, uda, expr_udf
6
- from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, rm_dir, \
7
- list_dirs, list_functions, get_path, configure_logging
6
+ from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
7
+ list_dirs, list_functions, configure_logging
8
8
  from .type_system import (
9
9
  ColumnType,
10
10
  StringType,
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.11"
3
- __version_tuple__ = (0, 2, 11)
2
+ __version__ = "0.2.13"
3
+ __version_tuple__ = (0, 2, 13)
@@ -129,11 +129,11 @@ class InsertableTable(Table):
129
129
  msg = str(e)
130
130
  raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
131
131
 
132
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
132
+ def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
133
133
  """Delete rows in this table.
134
134
 
135
135
  Args:
136
- where: a Predicate to filter rows to delete.
136
+ where: a predicate to filter rows to delete.
137
137
 
138
138
  Examples:
139
139
  Delete all rows in a table:
@@ -1,7 +1,10 @@
1
1
  from abc import abstractmethod
2
- from typing import Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
  from uuid import UUID
4
4
 
5
+ if TYPE_CHECKING:
6
+ from pixeltable import catalog
7
+
5
8
 
6
9
  class SchemaObject:
7
10
  """
@@ -17,9 +20,32 @@ class SchemaObject:
17
20
  def _get_id(self) -> UUID:
18
21
  return self._id
19
22
 
20
- def get_name(self) -> str:
23
+ @property
24
+ def name(self) -> str:
25
+ """Returns the name of this schema object."""
21
26
  return self._name
22
27
 
28
+ @property
29
+ def parent(self) -> Optional['catalog.Dir']:
30
+ """Returns the parent directory of this schema object."""
31
+ from pixeltable import catalog
32
+ if self._dir_id is None:
33
+ return None
34
+ dir = catalog.Catalog.get().paths.get_schema_obj(self._dir_id)
35
+ assert isinstance(dir, catalog.Dir)
36
+ return dir
37
+
38
+ @property
39
+ def path(self) -> str:
40
+ """Returns the path to this schema object."""
41
+ parent = self.parent
42
+ if parent is None or parent.parent is None:
43
+ # Either this is the root directory, with empty path, or its parent is the
44
+ # root directory. Either way, we return just the name.
45
+ return self.name
46
+ else:
47
+ return f'{parent.path}.{self.name}'
48
+
23
49
  @classmethod
24
50
  @abstractmethod
25
51
  def display_name(cls) -> str:
@@ -82,14 +82,22 @@ class Table(SchemaObject):
82
82
  return self._queries[index]
83
83
  return self._tbl_version_path.__getitem__(index)
84
84
 
85
- def get_views(self, *, recursive: bool = False) -> list['Table']:
85
+ def list_views(self, *, recursive: bool = True) -> list[str]:
86
86
  """
87
- All views and snapshots of this `Table`.
87
+ Returns a list of all views and snapshots of this `Table`.
88
+
89
+ Args:
90
+ recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
91
+ all sub-views (including views of views, etc.)
88
92
  """
93
+ return [t.path for t in self._get_views(recursive=recursive)]
94
+
95
+ def _get_views(self, *, recursive: bool = True) -> list['Table']:
96
+ dependents = catalog.Catalog.get().tbl_dependents[self._get_id()]
89
97
  if recursive:
90
- return [self] + [t for view in self.get_views(recursive=False) for t in view.get_views(recursive=True)]
98
+ return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
91
99
  else:
92
- return catalog.Catalog.get().tbl_dependents[self._get_id()]
100
+ return dependents
93
101
 
94
102
  def _df(self) -> 'pixeltable.dataframe.DataFrame':
95
103
  """Return a DataFrame for this table.
@@ -105,7 +113,7 @@ class Table(SchemaObject):
105
113
  from pixeltable.dataframe import DataFrame
106
114
  return DataFrame(self._tbl_version_path).select(*items, **named_items)
107
115
 
108
- def where(self, pred: 'exprs.Predicate') -> 'pixeltable.dataframe.DataFrame':
116
+ def where(self, pred: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
109
117
  """Return a DataFrame for this table.
110
118
  """
111
119
  # local import: avoid circular imports
@@ -500,7 +508,37 @@ class Table(SchemaObject):
500
508
  >>> tbl.drop_column('factorial')
501
509
  """
502
510
  self._check_is_dropped()
503
- self._tbl_version.drop_column(name)
511
+
512
+ if name not in self._tbl_version.cols_by_name:
513
+ raise excs.Error(f'Unknown column: {name}')
514
+ col = self._tbl_version.cols_by_name[name]
515
+
516
+ dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
517
+ if len(dependent_user_cols) > 0:
518
+ raise excs.Error(
519
+ f'Cannot drop column `{name}` because the following columns depend on it:\n'
520
+ f'{", ".join(c.name for c in dependent_user_cols)}'
521
+ )
522
+
523
+ # See if this column has a dependent store. We need to look through all stores in all
524
+ # (transitive) views of this table.
525
+ dependent_stores = [
526
+ (view, store)
527
+ for view in [self] + self._get_views(recursive=True)
528
+ for store in view._tbl_version.external_stores.values()
529
+ if col in store.get_local_columns()
530
+ ]
531
+ if len(dependent_stores) > 0:
532
+ dependent_store_names = [
533
+ store.name if view._get_id() == self._get_id() else f'{store.name} (in view `{view.name}`)'
534
+ for view, store in dependent_stores
535
+ ]
536
+ raise excs.Error(
537
+ f'Cannot drop column `{name}` because the following external stores depend on it:\n'
538
+ f'{", ".join(dependent_store_names)}'
539
+ )
540
+
541
+ self._tbl_version.drop_column(col)
504
542
 
505
543
  def rename_column(self, old_name: str, new_name: str) -> None:
506
544
  """Rename a column.
@@ -522,15 +560,15 @@ class Table(SchemaObject):
522
560
 
523
561
  def add_embedding_index(
524
562
  self, col_name: str, *, idx_name: Optional[str] = None,
525
- text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
563
+ string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
526
564
  metric: str = 'cosine'
527
565
  ) -> None:
528
566
  """Add an index to the table.
529
567
  Args:
530
568
  col_name: name of column to index
531
569
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
532
- text_embed: function to embed text; required if the column is a text column
533
- img_embed: function to embed images; required if the column is an image column
570
+ string_embed: function to embed text; required if the column is a text column
571
+ image_embed: function to embed images; required if the column is an image column
534
572
  metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
535
573
 
536
574
  Raises:
@@ -539,13 +577,13 @@ class Table(SchemaObject):
539
577
  Examples:
540
578
  Add an index to the ``img`` column:
541
579
 
542
- >>> tbl.add_embedding_index('img', img_embed=...)
580
+ >>> tbl.add_embedding_index('img', image_embed=...)
543
581
 
544
582
  Add another index to the ``img`` column, using the inner product as the distance metric,
545
- and with a specific name; ``text_embed`` is also specified in order to search with text:
583
+ and with a specific name; ``string_embed`` is also specified in order to search with text:
546
584
 
547
585
  >>> tbl.add_embedding_index(
548
- 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
586
+ 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
549
587
  """
550
588
  if self._tbl_version_path.is_snapshot():
551
589
  raise excs.Error('Cannot add an index to a snapshot')
@@ -557,7 +595,7 @@ class Table(SchemaObject):
557
595
  raise excs.Error(f'Duplicate index name: {idx_name}')
558
596
  from pixeltable.index import EmbeddingIndex
559
597
  # create the EmbeddingIndex instance to verify args
560
- idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
598
+ idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
561
599
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
562
600
  # TODO: how to deal with exceptions here? drop the index and raise?
563
601
 
@@ -604,26 +642,26 @@ class Table(SchemaObject):
604
642
  raise excs.Error('Cannot drop an index from a snapshot')
605
643
  self._check_is_dropped()
606
644
  if (column_name is None) == (idx_name is None):
607
- raise excs.Error('Exactly one of column_name or idx_name must be provided')
645
+ raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
608
646
 
609
647
  if idx_name is not None:
610
648
  if idx_name not in self._tbl_version.idxs_by_name:
611
- raise excs.Error(f'Index {idx_name} does not exist')
649
+ raise excs.Error(f'Index {idx_name!r} does not exist')
612
650
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
613
651
  else:
614
652
  col = self._tbl_version_path.get_column(column_name, include_bases=True)
615
653
  if col is None:
616
- raise excs.Error(f'Column {column_name} unknown')
654
+ raise excs.Error(f'Column {column_name!r} unknown')
617
655
  if col.tbl.id != self._tbl_version.id:
618
656
  raise excs.Error(
619
- f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
657
+ f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
620
658
  idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
621
659
  if _idx_class is not None:
622
660
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
623
661
  if len(idx_info) == 0:
624
- raise excs.Error(f'Column {column_name} does not have an index')
662
+ raise excs.Error(f'Column {column_name!r} does not have an index')
625
663
  if len(idx_info) > 1:
626
- raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
664
+ raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
627
665
  idx_id = idx_info[0].id
628
666
  self._tbl_version.drop_index(idx_id)
629
667
 
@@ -678,13 +716,13 @@ class Table(SchemaObject):
678
716
  raise NotImplementedError
679
717
 
680
718
  def update(
681
- self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
719
+ self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
682
720
  ) -> UpdateStatus:
683
721
  """Update rows in this table.
684
722
 
685
723
  Args:
686
724
  value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
687
- where: a Predicate to filter rows to update.
725
+ where: a predicate to filter rows to update.
688
726
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
689
727
 
690
728
  Examples:
@@ -748,11 +786,11 @@ class Table(SchemaObject):
748
786
  row_updates.append(col_vals)
749
787
  return self._tbl_version.batch_update(row_updates, rowids, cascade)
750
788
 
751
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
789
+ def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
752
790
  """Delete rows in this table.
753
791
 
754
792
  Args:
755
- where: a Predicate to filter rows to delete.
793
+ where: a predicate to filter rows to delete.
756
794
 
757
795
  Examples:
758
796
  Delete all rows in a table:
@@ -823,13 +861,13 @@ class Table(SchemaObject):
823
861
  Links the specified `ExternalStore` to this table.
824
862
  """
825
863
  if self._tbl_version.is_snapshot:
826
- raise excs.Error(f'Table `{self.get_name()}` is a snapshot, so it cannot be linked to an external store.')
864
+ raise excs.Error(f'Table `{self.name}` is a snapshot, so it cannot be linked to an external store.')
827
865
  self._check_is_dropped()
828
866
  if store.name in self.external_stores:
829
- raise excs.Error(f'Table `{self.get_name()}` already has an external store with that name: {store.name}')
830
- _logger.info(f'Linking external store `{store.name}` to table `{self.get_name()}`')
867
+ raise excs.Error(f'Table `{self.name}` already has an external store with that name: {store.name}')
868
+ _logger.info(f'Linking external store `{store.name}` to table `{self.name}`')
831
869
  self._tbl_version.link_external_store(store)
832
- print(f'Linked external store `{store.name}` to table `{self.get_name()}`.')
870
+ print(f'Linked external store `{store.name}` to table `{self.name}`.')
833
871
 
834
872
  def unlink_external_stores(
835
873
  self,
@@ -861,11 +899,11 @@ class Table(SchemaObject):
861
899
  if not ignore_errors:
862
900
  for store in stores:
863
901
  if store not in all_stores:
864
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
902
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
865
903
 
866
904
  for store in stores:
867
905
  self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
868
- print(f'Unlinked external store from table `{self.get_name()}`: {store}')
906
+ print(f'Unlinked external store from table `{self.name}`: {store}')
869
907
 
870
908
  def sync(
871
909
  self,
@@ -893,7 +931,7 @@ class Table(SchemaObject):
893
931
 
894
932
  for store in stores:
895
933
  if store not in all_stores:
896
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
934
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
897
935
 
898
936
  from pixeltable.io import SyncStatus
899
937
 
@@ -540,39 +540,12 @@ class TableVersion:
540
540
  num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
541
541
  cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
542
542
 
543
- def drop_column(self, name: str) -> None:
543
+ def drop_column(self, col: Column) -> None:
544
544
  """Drop a column from the table.
545
545
  """
546
546
  from pixeltable.catalog import Catalog
547
547
 
548
548
  assert not self.is_snapshot
549
- if name not in self.cols_by_name:
550
- raise excs.Error(f'Unknown column: {name}')
551
- col = self.cols_by_name[name]
552
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
553
- if len(dependent_user_cols) > 0:
554
- raise excs.Error(
555
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
556
- f'{", ".join(c.name for c in dependent_user_cols)}'
557
- )
558
- # See if this column has a dependent store. We need to look through all stores in all
559
- # (transitive) views of this table.
560
- transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
561
- dependent_stores = [
562
- (view, store)
563
- for view in transitive_views
564
- for store in view._tbl_version.external_stores.values()
565
- if col in store.get_local_columns()
566
- ]
567
- if len(dependent_stores) > 0:
568
- dependent_store_names = [
569
- store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
570
- for view, store in dependent_stores
571
- ]
572
- raise excs.Error(
573
- f'Cannot drop column `{name}` because the following external stores depend on it:\n'
574
- f'{", ".join(dependent_store_names)}'
575
- )
576
549
 
577
550
  # we're creating a new schema version
578
551
  self.version += 1
@@ -596,7 +569,7 @@ class TableVersion:
596
569
  del self.idxs_by_name[idx_name]
597
570
  self._drop_columns(dropped_cols)
598
571
  self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
599
- _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
572
+ _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
600
573
 
601
574
  def _drop_columns(self, cols: Iterable[Column]) -> None:
602
575
  """Mark columns as dropped"""
@@ -705,12 +678,12 @@ class TableVersion:
705
678
  return result
706
679
 
707
680
  def update(
708
- self, value_spec: dict[str, Any], where: Optional['exprs.Predicate'] = None, cascade: bool = True
681
+ self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
709
682
  ) -> UpdateStatus:
710
683
  """Update rows in this TableVersionPath.
711
684
  Args:
712
685
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
713
- where: a Predicate to filter rows to update.
686
+ where: a predicate to filter rows to update.
714
687
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
715
688
  including within views.
716
689
  """
@@ -721,8 +694,8 @@ class TableVersion:
721
694
 
722
695
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
723
696
  if where is not None:
724
- if not isinstance(where, exprs.Predicate):
725
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
697
+ if not isinstance(where, exprs.Expr):
698
+ raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
726
699
  analysis_info = Planner.analyze(self.path, where)
727
700
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
728
701
  if analysis_info.filter is not None:
@@ -784,7 +757,7 @@ class TableVersion:
784
757
 
785
758
  def _update(
786
759
  self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
787
- where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
760
+ where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
788
761
  show_progress: bool = True
789
762
  ) -> UpdateStatus:
790
763
  from pixeltable.plan import Planner
@@ -816,8 +789,6 @@ class TableVersion:
816
789
  raise excs.Error(f'Column {col_name} is computed and cannot be updated')
817
790
  if col.is_pk and not allow_pk:
818
791
  raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
819
- if col.col_type.is_media_type():
820
- raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
821
792
 
822
793
  # make sure that the value is compatible with the column type
823
794
  try:
@@ -875,17 +846,17 @@ class TableVersion:
875
846
  result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
876
847
  return result
877
848
 
878
- def delete(self, where: Optional['exprs.Predicate'] = None) -> UpdateStatus:
849
+ def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
879
850
  """Delete rows in this table.
880
851
  Args:
881
- where: a Predicate to filter rows to delete.
852
+ where: a predicate to filter rows to delete.
882
853
  """
883
854
  assert self.is_insertable()
884
- from pixeltable.exprs import Predicate
855
+ from pixeltable.exprs import Expr
885
856
  from pixeltable.plan import Planner
886
857
  if where is not None:
887
- if not isinstance(where, Predicate):
888
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
858
+ if not isinstance(where, Expr):
859
+ raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
889
860
  analysis_info = Planner.analyze(self.path, where)
890
861
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
891
862
  if analysis_info.filter is not None:
@@ -899,11 +870,11 @@ class TableVersion:
899
870
  return status
900
871
 
901
872
  def propagate_delete(
902
- self, where: Optional['exprs.Predicate'], base_versions: List[Optional[int]],
873
+ self, where: Optional['exprs.Expr'], base_versions: List[Optional[int]],
903
874
  conn: sql.engine.Connection, timestamp: float) -> int:
904
875
  """Delete rows in this table and propagate to views.
905
876
  Args:
906
- where: a Predicate to filter rows to delete.
877
+ where: a predicate to filter rows to delete.
907
878
  Returns:
908
879
  number of deleted rows
909
880
  """
@@ -51,7 +51,7 @@ class View(Table):
51
51
  @classmethod
52
52
  def create(
53
53
  cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
54
- predicate: 'pxt.exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
54
+ predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
55
55
  iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
56
56
  ) -> View:
57
57
  columns = cls._create_columns(schema)
@@ -213,5 +213,5 @@ class View(Table):
213
213
  ) -> UpdateStatus:
214
214
  raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
215
215
 
216
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
216
+ def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
217
217
  raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')
pixeltable/dataframe.py CHANGED
@@ -153,7 +153,7 @@ class DataFrame:
153
153
  self,
154
154
  tbl: catalog.TableVersionPath,
155
155
  select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
156
- where_clause: Optional[exprs.Predicate] = None,
156
+ where_clause: Optional[exprs.Expr] = None,
157
157
  group_by_clause: Optional[List[exprs.Expr]] = None,
158
158
  grouping_tbl: Optional[catalog.TableVersion] = None,
159
159
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, # List[(expr, asc)]
@@ -530,7 +530,11 @@ class DataFrame:
530
530
  limit=self.limit_val,
531
531
  )
532
532
 
533
- def where(self, pred: exprs.Predicate) -> DataFrame:
533
+ def where(self, pred: exprs.Expr) -> DataFrame:
534
+ if not isinstance(pred, exprs.Expr):
535
+ raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
536
+ if not pred.col_type.is_bool_type():
537
+ raise excs.Error(f'Where(): expression needs to return bool, but instead returns {pred.col_type}')
534
538
  return DataFrame(
535
539
  self.tbl,
536
540
  select_list=self.select_list,
@@ -558,7 +562,7 @@ class DataFrame:
558
562
  # we need to make sure that the grouping table is a base of self.tbl
559
563
  base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
560
564
  if base is None or base.id == self.tbl.tbl_id():
561
- raise excs.Error(f'group_by(): {item.get_name()} is not a base table of {self.tbl.tbl_name()}')
565
+ raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
562
566
  grouping_tbl = item._tbl_version_path.tbl_version
563
567
  break
564
568
  if not isinstance(item, exprs.Expr):
@@ -628,12 +632,9 @@ class DataFrame:
628
632
  def __getitem__(self, index: object) -> DataFrame:
629
633
  """
630
634
  Allowed:
631
- - [<Predicate>]: filter operation
632
635
  - [List[Expr]]/[Tuple[Expr]]: setting the select list
633
636
  - [Expr]: setting a single-col select list
634
637
  """
635
- if isinstance(index, exprs.Predicate):
636
- return self.where(index)
637
638
  if isinstance(index, tuple):
638
639
  index = list(index)
639
640
  if isinstance(index, exprs.Expr):
@@ -668,7 +669,7 @@ class DataFrame:
668
669
  tbl = catalog.TableVersionPath.from_dict(d['tbl'])
669
670
  select_list = [(exprs.Expr.from_dict(e), name) for e, name in d['select_list']] \
670
671
  if d['select_list'] is not None else None
671
- where_clause = exprs.Predicate.from_dict(d['where_clause']) \
672
+ where_clause = exprs.Expr.from_dict(d['where_clause']) \
672
673
  if d['where_clause'] is not None else None
673
674
  group_by_clause = [exprs.Expr.from_dict(e) for e in d['group_by_clause']] \
674
675
  if d['group_by_clause'] is not None else None
@@ -50,7 +50,14 @@ class ExprEvalNode(ExecNode):
50
50
 
51
51
  def _open(self) -> None:
52
52
  warnings.simplefilter("ignore", category=TqdmWarning)
53
- if self.ctx.show_pbar:
53
+ # This is a temporary hack. When B-tree indices on string columns were implemented (via computed columns
54
+ # that invoke the `BtreeIndex.str_filter` udf), it resulted in frivolous progress bars appearing on every
55
+ # insertion. This special-cases the `str_filter` call to suppress the corresponding progress bar.
56
+ # TODO(aaron-siegel) Remove this hack once we clean up progress bars more generally.
57
+ is_str_filter_node = all(
58
+ isinstance(expr, exprs.FunctionCall) and expr.fn.name == 'str_filter' for expr in self.output_exprs
59
+ )
60
+ if self.ctx.show_pbar and not is_str_filter_node:
54
61
  self.pbar = tqdm(
55
62
  total=len(self.target_exprs) * self.ctx.num_rows,
56
63
  desc='Computing cells',
@@ -19,7 +19,7 @@ class SqlScanNode(ExecNode):
19
19
  def __init__(
20
20
  self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
21
21
  select_list: Iterable[exprs.Expr],
22
- where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Predicate] = None,
22
+ where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
23
23
  order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
24
24
  limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
25
25
  ):
@@ -17,7 +17,6 @@ from .json_mapper import JsonMapper
17
17
  from .json_path import RELATIVE_PATH_ROOT, JsonPath
18
18
  from .literal import Literal
19
19
  from .object_ref import ObjectRef
20
- from .predicate import Predicate
21
20
  from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
22
21
  from .rowid_ref import RowidRef
23
22
  from .similarity_expr import SimilarityExpr
@@ -63,14 +63,9 @@ class ColumnRef(Expr):
63
63
 
64
64
  return super().__getattr__(name)
65
65
 
66
- def similarity(self, other: Any) -> Expr:
67
- # if isinstance(other, Expr):
68
- # raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
- item = Expr.from_object(other)
70
- if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
71
- raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
66
+ def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
72
67
  from .similarity_expr import SimilarityExpr
73
- return SimilarityExpr(self, item)
68
+ return SimilarityExpr(self, item, idx_name=idx)
74
69
 
75
70
  def default_column_name(self) -> Optional[str]:
76
71
  return str(self)
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, List, Any, Dict, Tuple
3
+ from typing import Optional, List, Any, Dict
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -9,15 +9,15 @@ from .data_row import DataRow
9
9
  from .expr import Expr
10
10
  from .globals import ComparisonOperator
11
11
  from .literal import Literal
12
- from .predicate import Predicate
13
12
  from .row_builder import RowBuilder
14
13
  import pixeltable.exceptions as excs
15
14
  import pixeltable.index as index
15
+ import pixeltable.type_system as ts
16
16
 
17
17
 
18
- class Comparison(Predicate):
18
+ class Comparison(Expr):
19
19
  def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
20
- super().__init__()
20
+ super().__init__(ts.BoolType())
21
21
  self.operator = operator
22
22
 
23
23
  # if this is a comparison of a column to a literal (ie, could be used as a search argument in an index lookup),
@@ -50,7 +50,7 @@ class Comparison(Predicate):
50
50
  def _equals(self, other: Comparison) -> bool:
51
51
  return self.operator == other.operator
52
52
 
53
- def _id_attrs(self) -> List[Tuple[str, Any]]:
53
+ def _id_attrs(self) -> list[tuple[str, Any]]:
54
54
  return super()._id_attrs() + [('operator', self.operator.value)]
55
55
 
56
56
  @property
@@ -1,20 +1,20 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple, Callable
2
+
3
3
  import operator
4
+ from typing import Optional, List, Any, Dict, Callable
4
5
 
5
6
  import sqlalchemy as sql
6
7
 
8
+ from .data_row import DataRow
7
9
  from .expr import Expr
8
10
  from .globals import LogicalOperator
9
- from .predicate import Predicate
10
- from .data_row import DataRow
11
11
  from .row_builder import RowBuilder
12
- import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
13
 
14
14
 
15
- class CompoundPredicate(Predicate):
16
- def __init__(self, operator: LogicalOperator, operands: List[Predicate]):
17
- super().__init__()
15
+ class CompoundPredicate(Expr):
16
+ def __init__(self, operator: LogicalOperator, operands: List[Expr]):
17
+ super().__init__(ts.BoolType())
18
18
  self.operator = operator
19
19
  # operands are stored in self.components
20
20
  if self.operator == LogicalOperator.NOT:
@@ -22,7 +22,7 @@ class CompoundPredicate(Predicate):
22
22
  self.components = operands
23
23
  else:
24
24
  assert len(operands) > 1
25
- self.operands: List[Predicate] = []
25
+ self.operands: List[Expr] = []
26
26
  for operand in operands:
27
27
  self._merge_operand(operand)
28
28
 
@@ -34,14 +34,14 @@ class CompoundPredicate(Predicate):
34
34
  return f' {self.operator} '.join([f'({e})' for e in self.components])
35
35
 
36
36
  @classmethod
37
- def make_conjunction(cls, operands: List[Predicate]) -> Optional[Predicate]:
37
+ def make_conjunction(cls, operands: List[Expr]) -> Optional[Expr]:
38
38
  if len(operands) == 0:
39
39
  return None
40
40
  if len(operands) == 1:
41
41
  return operands[0]
42
42
  return CompoundPredicate(LogicalOperator.AND, operands)
43
43
 
44
- def _merge_operand(self, operand: Predicate) -> None:
44
+ def _merge_operand(self, operand: Expr) -> None:
45
45
  """
46
46
  Merge this operand, if possible, otherwise simply record it.
47
47
  """
@@ -55,11 +55,11 @@ class CompoundPredicate(Predicate):
55
55
  def _equals(self, other: CompoundPredicate) -> bool:
56
56
  return self.operator == other.operator
57
57
 
58
- def _id_attrs(self) -> List[Tuple[str, Any]]:
58
+ def _id_attrs(self) -> list[tuple[str, Any]]:
59
59
  return super()._id_attrs() + [('operator', self.operator.value)]
60
60
 
61
61
  def split_conjuncts(
62
- self, condition: Callable[[Predicate], bool]) -> Tuple[List[Predicate], Optional[Predicate]]:
62
+ self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
63
63
  if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
64
64
  return super().split_conjuncts(condition)
65
65
  matches = [op for op in self.components if condition(op)]