pixeltable 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/table.py +247 -83
  3. pixeltable/catalog/view.py +5 -2
  4. pixeltable/dataframe.py +240 -92
  5. pixeltable/exec/__init__.py +1 -1
  6. pixeltable/exec/exec_node.py +6 -7
  7. pixeltable/exec/sql_node.py +91 -44
  8. pixeltable/exprs/__init__.py +1 -0
  9. pixeltable/exprs/arithmetic_expr.py +1 -1
  10. pixeltable/exprs/array_slice.py +1 -1
  11. pixeltable/exprs/column_property_ref.py +1 -1
  12. pixeltable/exprs/column_ref.py +29 -2
  13. pixeltable/exprs/comparison.py +1 -1
  14. pixeltable/exprs/compound_predicate.py +1 -1
  15. pixeltable/exprs/expr.py +11 -5
  16. pixeltable/exprs/expr_set.py +8 -0
  17. pixeltable/exprs/function_call.py +14 -11
  18. pixeltable/exprs/in_predicate.py +1 -1
  19. pixeltable/exprs/inline_expr.py +3 -3
  20. pixeltable/exprs/is_null.py +1 -1
  21. pixeltable/exprs/json_mapper.py +1 -1
  22. pixeltable/exprs/json_path.py +1 -1
  23. pixeltable/exprs/method_ref.py +1 -1
  24. pixeltable/exprs/rowid_ref.py +1 -1
  25. pixeltable/exprs/similarity_expr.py +4 -1
  26. pixeltable/exprs/sql_element_cache.py +4 -0
  27. pixeltable/exprs/type_cast.py +2 -2
  28. pixeltable/exprs/variable.py +3 -0
  29. pixeltable/func/expr_template_function.py +3 -0
  30. pixeltable/func/function.py +37 -1
  31. pixeltable/func/signature.py +1 -0
  32. pixeltable/functions/mistralai.py +0 -2
  33. pixeltable/functions/ollama.py +4 -4
  34. pixeltable/globals.py +32 -18
  35. pixeltable/index/embedding_index.py +6 -1
  36. pixeltable/io/__init__.py +1 -1
  37. pixeltable/io/parquet.py +39 -19
  38. pixeltable/iterators/__init__.py +1 -0
  39. pixeltable/iterators/image.py +100 -0
  40. pixeltable/iterators/video.py +7 -8
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_22.py +17 -0
  43. pixeltable/metadata/notes.py +1 -0
  44. pixeltable/plan.py +129 -51
  45. pixeltable/store.py +1 -1
  46. pixeltable/tool/create_test_db_dump.py +4 -1
  47. pixeltable/type_system.py +1 -1
  48. pixeltable/utils/arrow.py +8 -3
  49. pixeltable/utils/description_helper.py +89 -0
  50. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/METADATA +28 -12
  51. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/RECORD +54 -51
  52. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/WHEEL +1 -1
  53. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/LICENSE +0 -0
  54. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.24"
3
- __version_tuple__ = (0, 2, 24)
2
+ __version__ = "0.2.26"
3
+ __version_tuple__ = (0, 2, 26)
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Se
10
10
  from uuid import UUID
11
11
 
12
12
  import pandas as pd
13
- import pandas.io.formats.style
14
13
  import sqlalchemy as sql
15
14
 
16
15
  import pixeltable as pxt
@@ -21,16 +20,19 @@ import pixeltable.exprs as exprs
21
20
  import pixeltable.index as index
22
21
  import pixeltable.metadata.schema as schema
23
22
  import pixeltable.type_system as ts
24
- from pixeltable.utils.filecache import FileCache
25
23
 
24
+ from ..exprs import ColumnRef
25
+ from ..utils.description_helper import DescriptionHelper
26
+ from ..utils.filecache import FileCache
26
27
  from .column import Column
27
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
28
+ from .globals import _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_system_column_name, is_valid_identifier
28
29
  from .schema_object import SchemaObject
29
30
  from .table_version import TableVersion
30
31
  from .table_version_path import TableVersionPath
31
32
 
32
33
  if TYPE_CHECKING:
33
34
  import torch.utils.data
35
+ import pixeltable.plan
34
36
 
35
37
  _logger = logging.getLogger('pixeltable')
36
38
 
@@ -45,7 +47,7 @@ class Table(SchemaObject):
45
47
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
46
48
  super().__init__(id, name, dir_id)
47
49
  self._is_dropped = False
48
- self._tbl_version_path = tbl_version_path
50
+ self.__tbl_version_path = tbl_version_path
49
51
  self.__query_scope = self.QueryScope(self)
50
52
 
51
53
  class QueryScope:
@@ -62,6 +64,7 @@ class Table(SchemaObject):
62
64
  raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
63
65
 
64
66
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
67
+ self._check_is_dropped()
65
68
  super()._move(new_name, new_dir_id)
66
69
  with env.Env.get().engine.begin() as conn:
67
70
  stmt = sql.text((
@@ -95,6 +98,7 @@ class Table(SchemaObject):
95
98
  }
96
99
  ```
97
100
  """
101
+ self._check_is_dropped()
98
102
  md = super().get_metadata()
99
103
  md['base'] = self._base._path if self._base is not None else None
100
104
  md['schema'] = self._schema
@@ -115,6 +119,12 @@ class Table(SchemaObject):
115
119
  """Return TableVersion for just this table."""
116
120
  return self._tbl_version_path.tbl_version
117
121
 
122
+ @property
123
+ def _tbl_version_path(self) -> TableVersionPath:
124
+ """Return TableVersionPath for just this table."""
125
+ self._check_is_dropped()
126
+ return self.__tbl_version_path
127
+
118
128
  def __hash__(self) -> int:
119
129
  return hash(self._tbl_version.id)
120
130
 
@@ -152,6 +162,7 @@ class Table(SchemaObject):
152
162
  Returns:
153
163
  A list of view paths.
154
164
  """
165
+ self._check_is_dropped()
155
166
  return [t._path for t in self._get_views(recursive=recursive)]
156
167
 
157
168
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
@@ -165,7 +176,8 @@ class Table(SchemaObject):
165
176
  """Return a DataFrame for this table.
166
177
  """
167
178
  # local import: avoid circular imports
168
- return pxt.DataFrame(self._tbl_version_path)
179
+ from pixeltable.plan import FromClause
180
+ return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
169
181
 
170
182
  @property
171
183
  def queries(self) -> 'Table.QueryScope':
@@ -179,6 +191,13 @@ class Table(SchemaObject):
179
191
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
180
192
  return self._df().where(pred)
181
193
 
194
+ def join(
195
+ self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
196
+ how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
197
+ ) -> 'pxt.DataFrame':
198
+ """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
199
+ return self._df().join(other, on=on, how=how)
200
+
182
201
  def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
183
202
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
184
203
  return self._df().order_by(*items, asc=asc)
@@ -244,6 +263,18 @@ class Table(SchemaObject):
244
263
  base_id = self._tbl_version_path.base.tbl_version.id
245
264
  return catalog.Catalog.get().tbls[base_id]
246
265
 
266
+ @property
267
+ def _bases(self) -> list['Table']:
268
+ """
269
+ The ancestor list of bases of this table, starting with its immediate base.
270
+ """
271
+ bases = []
272
+ base = self._base
273
+ while base is not None:
274
+ bases.append(base)
275
+ base = base._base
276
+ return bases
277
+
247
278
  @property
248
279
  def _comment(self) -> str:
249
280
  return self._tbl_version.comment
@@ -256,48 +287,103 @@ class Table(SchemaObject):
256
287
  def _media_validation(self) -> MediaValidation:
257
288
  return self._tbl_version.media_validation
258
289
 
259
- def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
260
- cols = self._tbl_version_path.columns()
261
- df = pd.DataFrame({
262
- 'Column Name': [c.name for c in cols],
263
- 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
264
- 'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
265
- })
266
- return df
267
-
268
- def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
269
- pd_df = self._description(cols)
270
- # white-space: pre-wrap: print \n as newline
271
- # th: center-align headings
272
- return (
273
- pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
274
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
275
- .hide(axis='index')
290
+ def __repr__(self) -> str:
291
+ return self._descriptors().to_string()
292
+
293
+ def _repr_html_(self) -> str:
294
+ return self._descriptors().to_html()
295
+
296
+ def _descriptors(self) -> DescriptionHelper:
297
+ """
298
+ Constructs a list of descriptors for this table that can be pretty-printed.
299
+ """
300
+ helper = DescriptionHelper()
301
+ helper.append(self._title_descriptor())
302
+ helper.append(self._col_descriptor())
303
+ idxs = self._index_descriptor()
304
+ if not idxs.empty:
305
+ helper.append(idxs)
306
+ stores = self._external_store_descriptor()
307
+ if not stores.empty:
308
+ helper.append(stores)
309
+ if self._comment:
310
+ helper.append(f'COMMENT: {self._comment}')
311
+ return helper
312
+
313
+ def _title_descriptor(self) -> str:
314
+ title: str
315
+ if self._base is None:
316
+ title = f'Table\n{self._path!r}'
317
+ else:
318
+ title = f'View\n{self._path!r}'
319
+ title += f'\n(of {self.__bases_to_desc()})'
320
+ return title
321
+
322
+ def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
323
+ return pd.DataFrame(
324
+ {
325
+ 'Column Name': col.name,
326
+ 'Type': col.col_type._to_str(as_schema=True),
327
+ 'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
328
+ }
329
+ for col in self.__tbl_version_path.columns()
330
+ if columns is None or col.name in columns
276
331
  )
277
332
 
333
+ def __bases_to_desc(self) -> str:
334
+ bases = self._bases
335
+ assert len(bases) >= 1
336
+ if len(bases) <= 2:
337
+ return ', '.join(repr(b._path) for b in bases)
338
+ else:
339
+ return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
340
+
341
+ def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
342
+ from pixeltable import index
343
+
344
+ pd_rows = []
345
+ for name, info in self._tbl_version.idxs_by_name.items():
346
+ if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
347
+ display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
348
+ if info.idx.string_embed is not None and info.idx.image_embed is not None:
349
+ embed_str = f'{display_embed} (+1)'
350
+ else:
351
+ embed_str = str(display_embed)
352
+ row = {
353
+ 'Index Name': name,
354
+ 'Column': info.col.name,
355
+ 'Metric': str(info.idx.metric.name.lower()),
356
+ 'Embedding': embed_str,
357
+ }
358
+ pd_rows.append(row)
359
+ return pd.DataFrame(pd_rows)
360
+
361
+ def _external_store_descriptor(self) -> pd.DataFrame:
362
+ pd_rows = []
363
+ for name, store in self._tbl_version.external_stores.items():
364
+ row = {
365
+ 'External Store': name,
366
+ 'Type': type(store).__name__,
367
+ }
368
+ pd_rows.append(row)
369
+ return pd.DataFrame(pd_rows)
370
+
278
371
  def describe(self) -> None:
279
372
  """
280
373
  Print the table schema.
281
374
  """
375
+ self._check_is_dropped()
282
376
  if getattr(builtins, '__IPYTHON__', False):
283
377
  from IPython.display import display
284
- display(self._description_html())
378
+ display(self._repr_html_())
285
379
  else:
286
380
  print(repr(self))
287
381
 
288
- # TODO: Display comments in _repr_html()
289
- def __repr__(self) -> str:
290
- description_str = self._description().to_string(index=False)
291
- if self._comment is None:
292
- comment = ''
293
- else:
294
- comment = f'{self._comment}\n'
295
- return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
296
-
297
- def _repr_html_(self) -> str:
298
- return self._description_html()._repr_html_() # type: ignore[attr-defined]
299
-
300
382
  def _drop(self) -> None:
383
+ cat = catalog.Catalog.get()
384
+ # verify all dependents are deleted by now
385
+ for dep in cat.tbl_dependents[self._id]:
386
+ assert dep._is_dropped
301
387
  self._check_is_dropped()
302
388
  self._tbl_version.drop()
303
389
  self._is_dropped = True
@@ -331,6 +417,7 @@ class Table(SchemaObject):
331
417
 
332
418
  For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
333
419
  """
420
+ self._check_is_dropped()
334
421
  if not isinstance(col_name, str):
335
422
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
336
423
  if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
@@ -598,31 +685,49 @@ class Table(SchemaObject):
598
685
  cls._verify_column(col, column_names)
599
686
  column_names.add(col.name)
600
687
 
601
- def drop_column(self, name: str) -> None:
688
+ def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
689
+ col = self._tbl_version_path.get_column(column_name, include_bases)
690
+ if col is None:
691
+ raise excs.Error(f'Column {column_name!r} unknown')
692
+
693
+ def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
694
+ exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
695
+ if not exists:
696
+ raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
697
+
698
+ def drop_column(self, column: Union[str, ColumnRef]) -> None:
602
699
  """Drop a column from the table.
603
700
 
604
701
  Args:
605
- name: The name of the column to drop.
702
+ column: The name or reference of the column to drop.
606
703
 
607
704
  Raises:
608
705
  Error: If the column does not exist or if it is referenced by a dependent computed column.
609
706
 
610
707
  Examples:
611
- Drop the column `col` from the table `my_table`:
708
+ Drop the column `col` from the table `my_table` by column name:
612
709
 
613
710
  >>> tbl = pxt.get_table('my_table')
614
711
  ... tbl.drop_column('col')
712
+
713
+ Drop the column `col` from the table `my_table` by column reference:
714
+
715
+ >>> tbl = pxt.get_table('my_table')
716
+ ... tbl.drop_column(tbl.col)
615
717
  """
616
718
  self._check_is_dropped()
617
-
618
- if name not in self._tbl_version.cols_by_name:
619
- raise excs.Error(f'Unknown column: {name}')
620
- col = self._tbl_version.cols_by_name[name]
719
+ col: Column = None
720
+ if isinstance(column, str):
721
+ self.__check_column_name_exists(column)
722
+ col = self._tbl_version.cols_by_name[column]
723
+ else:
724
+ self.__check_column_ref_exists(column)
725
+ col = column.col
621
726
 
622
727
  dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
623
728
  if len(dependent_user_cols) > 0:
624
729
  raise excs.Error(
625
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
730
+ f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
626
731
  f'{", ".join(c.name for c in dependent_user_cols)}'
627
732
  )
628
733
 
@@ -640,7 +745,7 @@ class Table(SchemaObject):
640
745
  for view, store in dependent_stores
641
746
  ]
642
747
  raise excs.Error(
643
- f'Cannot drop column `{name}` because the following external stores depend on it:\n'
748
+ f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
644
749
  f'{", ".join(dependent_store_names)}'
645
750
  )
646
751
 
@@ -662,11 +767,10 @@ class Table(SchemaObject):
662
767
  >>> tbl = pxt.get_table('my_table')
663
768
  ... tbl.rename_column('col1', 'col2')
664
769
  """
665
- self._check_is_dropped()
666
770
  self._tbl_version.rename_column(old_name, new_name)
667
771
 
668
772
  def add_embedding_index(
669
- self, col_name: str, *, idx_name: Optional[str] = None,
773
+ self, column: Union[str, ColumnRef], *, idx_name: Optional[str] = None,
670
774
  string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
671
775
  metric: str = 'cosine'
672
776
  ) -> None:
@@ -680,7 +784,7 @@ class Table(SchemaObject):
680
784
  of text over an image column.
681
785
 
682
786
  Args:
683
- col_name: The name of column to index; must be a `String` or `Image` column.
787
+ column: The name of, or reference to, the column to index; must be a `String` or `Image` column.
684
788
  idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
685
789
  If specified, the name must be unique for this table.
686
790
  string_embed: A function to embed text; required if the column is a `String` column.
@@ -692,11 +796,15 @@ class Table(SchemaObject):
692
796
  Error: If an index with that name already exists for the table, or if the specified column does not exist.
693
797
 
694
798
  Examples:
695
- Add an index to the `img` column of the table `my_table`:
799
+ Add an index to the `img` column of the table `my_table` by column name:
696
800
 
697
801
  >>> tbl = pxt.get_table('my_table')
698
802
  ... tbl.add_embedding_index('img', image_embed=my_image_func)
699
803
 
804
+ Add an index to the `img` column of the table `my_table` by column reference:
805
+ >>> tbl = pxt.get_table('my_table')
806
+ ... tbl.add_embedding_index(tbl.img, image_embed=my_image_func)
807
+
700
808
  Add another index to the `img` column, using the inner product as the distance metric,
701
809
  and with a specific name; `string_embed` is also specified in order to search with text:
702
810
 
@@ -707,13 +815,27 @@ class Table(SchemaObject):
707
815
  ... string_embed=my_string_func,
708
816
  ... metric='ip'
709
817
  ... )
818
+
819
+ Alternatively:
820
+
821
+ >>> tbl.add_embedding_index(
822
+ ... tbl.img,
823
+ ... idx_name='clip_idx',
824
+ ... image_embed=my_image_func,
825
+ ... string_embed=my_string_func,
826
+ ... metric='ip'
827
+ ... )
710
828
  """
711
829
  if self._tbl_version_path.is_snapshot():
712
830
  raise excs.Error('Cannot add an index to a snapshot')
713
- self._check_is_dropped()
714
- col = self._tbl_version_path.get_column(col_name, include_bases=True)
715
- if col is None:
716
- raise excs.Error(f'Column {col_name} unknown')
831
+ col: Column
832
+ if isinstance(column, str):
833
+ self.__check_column_name_exists(column, include_bases=True)
834
+ col = self._tbl_version_path.get_column(column, include_bases=True)
835
+ else:
836
+ self.__check_column_ref_exists(column, include_bases=True)
837
+ col = column.col
838
+
717
839
  if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
718
840
  raise excs.Error(f'Duplicate index name: {idx_name}')
719
841
  from pixeltable.index import EmbeddingIndex
@@ -724,82 +846,128 @@ class Table(SchemaObject):
724
846
  # TODO: how to deal with exceptions here? drop the index and raise?
725
847
  FileCache.get().emit_eviction_warnings()
726
848
 
727
- def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
849
+ def drop_embedding_index(
850
+ self, *,
851
+ column: Union[str, ColumnRef, None] = None,
852
+ idx_name: Optional[str] = None) -> None:
728
853
  """
729
854
  Drop an embedding index from the table. Either a column name or an index name (but not both) must be
730
- specified. If a column name is specified, it must be a column containing exactly one embedding index;
731
- otherwise the specific index name must be provided instead.
855
+ specified. If a column name or reference is specified, it must be a column containing exactly one
856
+ embedding index; otherwise the specific index name must be provided instead.
732
857
 
733
858
  Args:
734
- column_name: The name of the column from which to drop the index. Invalid if the column has multiple
735
- embedding indices.
859
+ column: The name of, or reference to, the column from which to drop the index.
860
+ The column must have only one embedding index.
736
861
  idx_name: The name of the index to drop.
737
862
 
738
863
  Raises:
739
- Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
864
+ Error: If `column` is specified, but the column does not exist, or it contains no embedding
740
865
  indices or multiple embedding indices.
741
866
  Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
742
867
 
743
868
  Examples:
744
- Drop the embedding index on the `img` column of the table `my_table`:
869
+ Drop the embedding index on the `img` column of the table `my_table` by column name:
745
870
 
746
871
  >>> tbl = pxt.get_table('my_table')
747
- ... tbl.drop_embedding_index(column_name='img')
748
- """
749
- self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
872
+ ... tbl.drop_embedding_index(column='img')
873
+
874
+ Drop the embedding index on the `img` column of the table `my_table` by column reference:
875
+
876
+ >>> tbl = pxt.get_table('my_table')
877
+ ... tbl.drop_embedding_index(column=tbl.img)
878
+
879
+ Drop the embedding index `idx1` of the table `my_table` by index name:
880
+ >>> tbl = pxt.get_table('my_table')
881
+ ... tbl.drop_embedding_index(idx_name='idx1')
750
882
 
751
- def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
883
+ """
884
+ if (column is None) == (idx_name is None):
885
+ raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
886
+
887
+ col: Column = None
888
+ if idx_name is None:
889
+ if isinstance(column, str):
890
+ self.__check_column_name_exists(column, include_bases=True)
891
+ col = self._tbl_version_path.get_column(column, include_bases=True)
892
+ else:
893
+ self.__check_column_ref_exists(column, include_bases=True)
894
+ col = column.col
895
+ assert col is not None
896
+ self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
897
+
898
+ def drop_index(
899
+ self, *,
900
+ column: Union[str, ColumnRef, None] = None,
901
+ idx_name: Optional[str] = None) -> None:
752
902
  """
753
903
  Drop an index from the table. Either a column name or an index name (but not both) must be
754
- specified. If a column name is specified, it must be a column containing exactly one index;
904
+ specified. If a column name or reference is specified, it must be a column containing exactly one index;
755
905
  otherwise the specific index name must be provided instead.
756
906
 
757
907
  Args:
758
- column_name: The name of the column from which to drop the index. Invalid if the column has multiple
759
- indices.
908
+ column: The name of, or reference to, the column from which to drop the index.
909
+ The column must have only one embedding index.
760
910
  idx_name: The name of the index to drop.
761
911
 
762
912
  Raises:
763
- Error: If `column_name` is specified, but the column does not exist, or it contains no
913
+ Error: If `column` is specified, but the column does not exist, or it contains no
764
914
  indices or multiple indices.
765
915
  Error: If `idx_name` is specified, but the index does not exist.
766
916
 
767
917
  Examples:
768
- Drop the index on the `img` column of the table `my_table`:
918
+ Drop the index on the `img` column of the table `my_table` by column name:
769
919
 
770
920
  >>> tbl = pxt.get_table('my_table')
771
921
  ... tbl.drop_index(column_name='img')
922
+
923
+ Drop the index on the `img` column of the table `my_table` by column reference:
924
+
925
+ >>> tbl = pxt.get_table('my_table')
926
+ ... tbl.drop_index(tbl.img)
927
+
928
+ Drop the index `idx1` of the table `my_table` by index name:
929
+ >>> tbl = pxt.get_table('my_table')
930
+ ... tbl.drop_index(idx_name='idx1')
931
+
772
932
  """
773
- self._drop_index(column_name=column_name, idx_name=idx_name)
933
+ if (column is None) == (idx_name is None):
934
+ raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
935
+
936
+ col: Column = None
937
+ if idx_name is None:
938
+ if isinstance(column, str):
939
+ self.__check_column_name_exists(column, include_bases=True)
940
+ col = self._tbl_version_path.get_column(column, include_bases=True)
941
+ else:
942
+ self.__check_column_ref_exists(column, include_bases=True)
943
+ col = column.col
944
+ assert col is not None
945
+ self._drop_index(col=col, idx_name=idx_name)
774
946
 
775
947
  def _drop_index(
776
- self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
948
+ self, *, col: Optional[Column] = None,
949
+ idx_name: Optional[str] = None,
777
950
  _idx_class: Optional[type[index.IndexBase]] = None
778
951
  ) -> None:
779
952
  if self._tbl_version_path.is_snapshot():
780
953
  raise excs.Error('Cannot drop an index from a snapshot')
781
- self._check_is_dropped()
782
- if (column_name is None) == (idx_name is None):
783
- raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
954
+ assert (col is None) != (idx_name is None)
784
955
 
785
956
  if idx_name is not None:
786
957
  if idx_name not in self._tbl_version.idxs_by_name:
787
958
  raise excs.Error(f'Index {idx_name!r} does not exist')
788
959
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
789
960
  else:
790
- col = self._tbl_version_path.get_column(column_name, include_bases=True)
791
- if col is None:
792
- raise excs.Error(f'Column {column_name!r} unknown')
793
961
  if col.tbl.id != self._tbl_version.id:
794
962
  raise excs.Error(
795
- f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
963
+ f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
796
964
  idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
797
965
  if _idx_class is not None:
798
966
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
799
967
  if len(idx_info) == 0:
800
- raise excs.Error(f'Column {column_name!r} does not have an index')
968
+ raise excs.Error(f'Column {col.name!r} does not have an index')
801
969
  if len(idx_info) > 1:
802
- raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
970
+ raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
803
971
  idx_id = idx_info[0].id
804
972
  self._tbl_version.drop_index(idx_id)
805
973
 
@@ -919,7 +1087,6 @@ class Table(SchemaObject):
919
1087
 
920
1088
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
921
1089
  """
922
- self._check_is_dropped()
923
1090
  status = self._tbl_version.update(value_spec, where, cascade)
924
1091
  FileCache.get().emit_eviction_warnings()
925
1092
  return status
@@ -955,7 +1122,6 @@ class Table(SchemaObject):
955
1122
  """
956
1123
  if self._tbl_version_path.is_snapshot():
957
1124
  raise excs.Error('Cannot update a snapshot')
958
- self._check_is_dropped()
959
1125
  rows = list(rows)
960
1126
 
961
1127
  row_updates: list[dict[Column, exprs.Expr]] = []
@@ -1010,7 +1176,6 @@ class Table(SchemaObject):
1010
1176
  """
1011
1177
  if self._tbl_version_path.is_snapshot():
1012
1178
  raise excs.Error('Cannot revert a snapshot')
1013
- self._check_is_dropped()
1014
1179
  self._tbl_version.revert()
1015
1180
 
1016
1181
  @overload
@@ -1060,7 +1225,6 @@ class Table(SchemaObject):
1060
1225
  """
1061
1226
  if self._tbl_version.is_snapshot:
1062
1227
  raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1063
- self._check_is_dropped()
1064
1228
  if store.name in self.external_stores:
1065
1229
  raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1066
1230
  _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
@@ -59,7 +59,7 @@ class View(Table):
59
59
 
60
60
  # verify that filter can be evaluated in the context of the base
61
61
  if predicate is not None:
62
- if not predicate.is_bound_by(base):
62
+ if not predicate.is_bound_by([base]):
63
63
  raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
64
64
  # create a copy that we can modify and store
65
65
  predicate = predicate.copy()
@@ -69,7 +69,7 @@ class View(Table):
69
69
  if not col.is_computed:
70
70
  continue
71
71
  # make sure that the value can be computed in the context of the base
72
- if col.value_expr is not None and not col.value_expr.is_bound_by(base):
72
+ if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
73
73
  raise excs.Error(
74
74
  f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
75
75
 
@@ -194,6 +194,9 @@ class View(Table):
194
194
 
195
195
  def _drop(self) -> None:
196
196
  cat = catalog.Catalog.get()
197
+ # verify all dependents are deleted by now
198
+ for dep in cat.tbl_dependents[self._id]:
199
+ assert dep._is_dropped
197
200
  if self._snapshot_only:
198
201
  # there is not TableVersion to drop
199
202
  self._check_is_dropped()