pixeltable 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (101) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +531 -251
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +8 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +17 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +227 -23
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +65 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/mistralai.py +0 -2
  58. pixeltable/functions/ollama.py +8 -8
  59. pixeltable/functions/openai.py +51 -4
  60. pixeltable/functions/timestamp.py +1 -1
  61. pixeltable/functions/video.py +3 -9
  62. pixeltable/functions/vision.py +1 -1
  63. pixeltable/globals.py +374 -89
  64. pixeltable/index/embedding_index.py +106 -29
  65. pixeltable/io/__init__.py +1 -1
  66. pixeltable/io/label_studio.py +1 -1
  67. pixeltable/io/parquet.py +39 -19
  68. pixeltable/iterators/__init__.py +1 -0
  69. pixeltable/iterators/document.py +12 -0
  70. pixeltable/iterators/image.py +100 -0
  71. pixeltable/iterators/video.py +7 -8
  72. pixeltable/metadata/__init__.py +1 -1
  73. pixeltable/metadata/converters/convert_16.py +2 -1
  74. pixeltable/metadata/converters/convert_17.py +2 -1
  75. pixeltable/metadata/converters/convert_22.py +17 -0
  76. pixeltable/metadata/converters/convert_23.py +35 -0
  77. pixeltable/metadata/converters/convert_24.py +56 -0
  78. pixeltable/metadata/converters/convert_25.py +19 -0
  79. pixeltable/metadata/converters/util.py +4 -2
  80. pixeltable/metadata/notes.py +4 -0
  81. pixeltable/metadata/schema.py +1 -0
  82. pixeltable/plan.py +129 -51
  83. pixeltable/store.py +1 -1
  84. pixeltable/type_system.py +196 -54
  85. pixeltable/utils/arrow.py +8 -3
  86. pixeltable/utils/description_helper.py +89 -0
  87. pixeltable/utils/documents.py +14 -0
  88. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/METADATA +32 -22
  89. pixeltable-0.3.0.dist-info/RECORD +155 -0
  90. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  91. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  92. pixeltable/tool/create_test_db_dump.py +0 -308
  93. pixeltable/tool/create_test_video.py +0 -81
  94. pixeltable/tool/doc_plugins/griffe.py +0 -50
  95. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  96. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  97. pixeltable/tool/embed_udf.py +0 -9
  98. pixeltable/tool/mypy_plugin.py +0 -55
  99. pixeltable-0.2.24.dist-info/RECORD +0 -153
  100. pixeltable-0.2.24.dist-info/entry_points.txt +0 -3
  101. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Se
10
10
  from uuid import UUID
11
11
 
12
12
  import pandas as pd
13
- import pandas.io.formats.style
14
13
  import sqlalchemy as sql
15
14
 
16
15
  import pixeltable as pxt
@@ -21,10 +20,13 @@ import pixeltable.exprs as exprs
21
20
  import pixeltable.index as index
22
21
  import pixeltable.metadata.schema as schema
23
22
  import pixeltable.type_system as ts
24
- from pixeltable.utils.filecache import FileCache
25
23
 
24
+ from ..exprs import ColumnRef
25
+ from ..utils.description_helper import DescriptionHelper
26
+ from ..utils.filecache import FileCache
26
27
  from .column import Column
27
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
28
+ from .globals import (_ROWID_COLUMN_NAME, IfExistsParam, IfNotExistsParam, MediaValidation, UpdateStatus,
29
+ is_system_column_name, is_valid_identifier)
28
30
  from .schema_object import SchemaObject
29
31
  from .table_version import TableVersion
30
32
  from .table_version_path import TableVersionPath
@@ -32,6 +34,8 @@ from .table_version_path import TableVersionPath
32
34
  if TYPE_CHECKING:
33
35
  import torch.utils.data
34
36
 
37
+ import pixeltable.plan
38
+
35
39
  _logger = logging.getLogger('pixeltable')
36
40
 
37
41
  class Table(SchemaObject):
@@ -45,23 +49,15 @@ class Table(SchemaObject):
45
49
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
46
50
  super().__init__(id, name, dir_id)
47
51
  self._is_dropped = False
48
- self._tbl_version_path = tbl_version_path
49
- self.__query_scope = self.QueryScope(self)
50
-
51
- class QueryScope:
52
- __table: 'Table'
53
- _queries: dict[str, pxt.func.QueryTemplateFunction]
54
-
55
- def __init__(self, table: 'Table') -> None:
56
- self.__table = table
57
- self._queries = {}
52
+ self.__tbl_version_path = tbl_version_path
58
53
 
59
- def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
60
- if name in self._queries:
61
- return self._queries[name]
62
- raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
54
+ @property
55
+ def _has_dependents(self) -> bool:
56
+ """Returns True if this table has any dependent views, or snapshots."""
57
+ return len(self._get_views(recursive=False)) > 0
63
58
 
64
59
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
60
+ self._check_is_dropped()
65
61
  super()._move(new_name, new_dir_id)
66
62
  with env.Env.get().engine.begin() as conn:
67
63
  stmt = sql.text((
@@ -95,6 +91,7 @@ class Table(SchemaObject):
95
91
  }
96
92
  ```
97
93
  """
94
+ self._check_is_dropped()
98
95
  md = super().get_metadata()
99
96
  md['base'] = self._base._path if self._base is not None else None
100
97
  md['schema'] = self._schema
@@ -115,6 +112,12 @@ class Table(SchemaObject):
115
112
  """Return TableVersion for just this table."""
116
113
  return self._tbl_version_path.tbl_version
117
114
 
115
+ @property
116
+ def _tbl_version_path(self) -> TableVersionPath:
117
+ """Return TableVersionPath for just this table."""
118
+ self._check_is_dropped()
119
+ return self.__tbl_version_path
120
+
118
121
  def __hash__(self) -> int:
119
122
  return hash(self._tbl_version.id)
120
123
 
@@ -123,23 +126,12 @@ class Table(SchemaObject):
123
126
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
124
127
 
125
128
  def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
126
- """Return a ColumnRef for the given name.
127
- """
129
+ """Return a ColumnRef for the given name."""
128
130
  return self._tbl_version_path.get_column_ref(name)
129
131
 
130
- @overload
131
- def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
132
-
133
- @overload
134
- def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
135
-
136
- def __getitem__(self, index):
137
- """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
138
- """
139
- if isinstance(index, str):
140
- return getattr(self, index)
141
- else:
142
- return self._df()[index]
132
+ def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef':
133
+ """Return a ColumnRef for the given name."""
134
+ return getattr(self, name)
143
135
 
144
136
  def list_views(self, *, recursive: bool = True) -> list[str]:
145
137
  """
@@ -152,6 +144,7 @@ class Table(SchemaObject):
152
144
  Returns:
153
145
  A list of view paths.
154
146
  """
147
+ self._check_is_dropped()
155
148
  return [t._path for t in self._get_views(recursive=recursive)]
156
149
 
157
150
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
@@ -165,26 +158,42 @@ class Table(SchemaObject):
165
158
  """Return a DataFrame for this table.
166
159
  """
167
160
  # local import: avoid circular imports
168
- return pxt.DataFrame(self._tbl_version_path)
169
-
170
- @property
171
- def queries(self) -> 'Table.QueryScope':
172
- return self.__query_scope
161
+ from pixeltable.plan import FromClause
162
+ return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
173
163
 
174
164
  def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
175
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
165
+ """ Select columns or expressions from this table.
166
+
167
+ See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
168
+ """
176
169
  return self._df().select(*items, **named_items)
177
170
 
178
171
  def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
179
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
172
+ """Filter rows from this table based on the expression.
173
+
174
+ See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
175
+ """
180
176
  return self._df().where(pred)
181
177
 
178
+ def join(
179
+ self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
180
+ how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
181
+ ) -> 'pxt.DataFrame':
182
+ """Join this table with another table."""
183
+ return self._df().join(other, on=on, how=how)
184
+
182
185
  def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
183
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
186
+ """Order the rows of this table based on the expression.
187
+
188
+ See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
189
+ """
184
190
  return self._df().order_by(*items, asc=asc)
185
191
 
186
192
  def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
187
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
193
+ """Group the rows of this table based on the expression.
194
+
195
+ See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
196
+ """
188
197
  return self._df().group_by(*items)
189
198
 
190
199
  def limit(self, n: int) -> 'pxt.DataFrame':
@@ -228,11 +237,6 @@ class Table(SchemaObject):
228
237
  """Return the schema (column names and column types) of this table."""
229
238
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
230
239
 
231
- @property
232
- def _query_names(self) -> list[str]:
233
- """Return the names of the registered queries for this table."""
234
- return list(self.__query_scope._queries.keys())
235
-
236
240
  @property
237
241
  def _base(self) -> Optional['Table']:
238
242
  """
@@ -244,6 +248,18 @@ class Table(SchemaObject):
244
248
  base_id = self._tbl_version_path.base.tbl_version.id
245
249
  return catalog.Catalog.get().tbls[base_id]
246
250
 
251
+ @property
252
+ def _bases(self) -> list['Table']:
253
+ """
254
+ The ancestor list of bases of this table, starting with its immediate base.
255
+ """
256
+ bases = []
257
+ base = self._base
258
+ while base is not None:
259
+ bases.append(base)
260
+ base = base._base
261
+ return bases
262
+
247
263
  @property
248
264
  def _comment(self) -> str:
249
265
  return self._tbl_version.comment
@@ -256,48 +272,103 @@ class Table(SchemaObject):
256
272
  def _media_validation(self) -> MediaValidation:
257
273
  return self._tbl_version.media_validation
258
274
 
259
- def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
260
- cols = self._tbl_version_path.columns()
261
- df = pd.DataFrame({
262
- 'Column Name': [c.name for c in cols],
263
- 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
264
- 'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
265
- })
266
- return df
267
-
268
- def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
269
- pd_df = self._description(cols)
270
- # white-space: pre-wrap: print \n as newline
271
- # th: center-align headings
272
- return (
273
- pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
274
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
275
- .hide(axis='index')
275
+ def __repr__(self) -> str:
276
+ return self._descriptors().to_string()
277
+
278
+ def _repr_html_(self) -> str:
279
+ return self._descriptors().to_html()
280
+
281
+ def _descriptors(self) -> DescriptionHelper:
282
+ """
283
+ Constructs a list of descriptors for this table that can be pretty-printed.
284
+ """
285
+ helper = DescriptionHelper()
286
+ helper.append(self._title_descriptor())
287
+ helper.append(self._col_descriptor())
288
+ idxs = self._index_descriptor()
289
+ if not idxs.empty:
290
+ helper.append(idxs)
291
+ stores = self._external_store_descriptor()
292
+ if not stores.empty:
293
+ helper.append(stores)
294
+ if self._comment:
295
+ helper.append(f'COMMENT: {self._comment}')
296
+ return helper
297
+
298
+ def _title_descriptor(self) -> str:
299
+ title: str
300
+ if self._base is None:
301
+ title = f'Table\n{self._path!r}'
302
+ else:
303
+ title = f'View\n{self._path!r}'
304
+ title += f'\n(of {self.__bases_to_desc()})'
305
+ return title
306
+
307
+ def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
308
+ return pd.DataFrame(
309
+ {
310
+ 'Column Name': col.name,
311
+ 'Type': col.col_type._to_str(as_schema=True),
312
+ 'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
313
+ }
314
+ for col in self.__tbl_version_path.columns()
315
+ if columns is None or col.name in columns
276
316
  )
277
317
 
318
+ def __bases_to_desc(self) -> str:
319
+ bases = self._bases
320
+ assert len(bases) >= 1
321
+ if len(bases) <= 2:
322
+ return ', '.join(repr(b._path) for b in bases)
323
+ else:
324
+ return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
325
+
326
+ def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
327
+ from pixeltable import index
328
+
329
+ pd_rows = []
330
+ for name, info in self._tbl_version.idxs_by_name.items():
331
+ if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
332
+ display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
333
+ if info.idx.string_embed is not None and info.idx.image_embed is not None:
334
+ embed_str = f'{display_embed} (+1)'
335
+ else:
336
+ embed_str = str(display_embed)
337
+ row = {
338
+ 'Index Name': name,
339
+ 'Column': info.col.name,
340
+ 'Metric': str(info.idx.metric.name.lower()),
341
+ 'Embedding': embed_str,
342
+ }
343
+ pd_rows.append(row)
344
+ return pd.DataFrame(pd_rows)
345
+
346
+ def _external_store_descriptor(self) -> pd.DataFrame:
347
+ pd_rows = []
348
+ for name, store in self._tbl_version.external_stores.items():
349
+ row = {
350
+ 'External Store': name,
351
+ 'Type': type(store).__name__,
352
+ }
353
+ pd_rows.append(row)
354
+ return pd.DataFrame(pd_rows)
355
+
278
356
  def describe(self) -> None:
279
357
  """
280
358
  Print the table schema.
281
359
  """
360
+ self._check_is_dropped()
282
361
  if getattr(builtins, '__IPYTHON__', False):
283
362
  from IPython.display import display
284
- display(self._description_html())
363
+ display(self._repr_html_())
285
364
  else:
286
365
  print(repr(self))
287
366
 
288
- # TODO: Display comments in _repr_html()
289
- def __repr__(self) -> str:
290
- description_str = self._description().to_string(index=False)
291
- if self._comment is None:
292
- comment = ''
293
- else:
294
- comment = f'{self._comment}\n'
295
- return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
296
-
297
- def _repr_html_(self) -> str:
298
- return self._description_html()._repr_html_() # type: ignore[attr-defined]
299
-
300
367
  def _drop(self) -> None:
368
+ cat = catalog.Catalog.get()
369
+ # verify all dependents are deleted by now
370
+ for dep in cat.tbl_dependents[self._id]:
371
+ assert dep._is_dropped
301
372
  self._check_is_dropped()
302
373
  self._tbl_version.drop()
303
374
  self._is_dropped = True
@@ -319,27 +390,54 @@ class Table(SchemaObject):
319
390
  """
320
391
  return self._df().to_coco_dataset()
321
392
 
322
- def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
323
- """
324
- Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
325
-
326
- >>> tbl['new_col'] = pxt.Int
327
-
328
- is exactly equivalent to
393
+ def _column_has_dependents(self, col: Column) -> bool:
394
+ """Returns True if the column has dependents, False otherwise."""
395
+ assert col is not None
396
+ assert col.name in self._schema.keys()
397
+ if any(c.name is not None for c in col.dependent_cols):
398
+ return True
399
+ return any(
400
+ col in store.get_local_columns()
401
+ for view in [self] + self._get_views(recursive=True)
402
+ for store in view._tbl_version.external_stores.values())
329
403
 
330
- >>> tbl.add_column(new_col=pxt.Int)
404
+ def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
405
+ """ Check and handle existing columns in the new column specification based on the if_exists parameter.
331
406
 
332
- For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
407
+ If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
333
408
  """
334
- if not isinstance(col_name, str):
335
- raise excs.Error(f'Column name must be a string, got {type(col_name)}')
336
- if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
337
- raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
338
- self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
409
+ assert not self.get_metadata()['is_snapshot']
410
+ existing_col_names = set(self._schema.keys())
411
+ cols_to_ignore = []
412
+ for new_col_name in new_col_names:
413
+ if new_col_name in existing_col_names:
414
+ if if_exists == IfExistsParam.ERROR:
415
+ raise excs.Error(f'Duplicate column name: {new_col_name!r}')
416
+ elif if_exists == IfExistsParam.IGNORE:
417
+ cols_to_ignore.append(new_col_name)
418
+ elif if_exists == IfExistsParam.REPLACE or if_exists == IfExistsParam.REPLACE_FORCE:
419
+ if new_col_name not in self._tbl_version.cols_by_name:
420
+ # for views, it is possible that the existing column
421
+ # is a base table column; in that case, we should not
422
+ # drop/replace that column. Continue to raise error.
423
+ raise excs.Error(
424
+ f'Column {new_col_name!r} is a base table column. Cannot replace it.'
425
+ )
426
+ col = self._tbl_version.cols_by_name[new_col_name]
427
+ # cannot drop a column with dependents; so reject
428
+ # replace directive if column has dependents.
429
+ if self._column_has_dependents(col):
430
+ raise excs.Error(
431
+ f'Column {new_col_name!r} already exists and has dependents. Cannot {if_exists.name.lower()} it.'
432
+ )
433
+ self.drop_column(new_col_name)
434
+ assert new_col_name not in self._tbl_version.cols_by_name
435
+ return cols_to_ignore
339
436
 
340
437
  def add_columns(
341
438
  self,
342
- schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
439
+ schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]],
440
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
343
441
  ) -> UpdateStatus:
344
442
  """
345
443
  Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
@@ -350,12 +448,21 @@ class Table(SchemaObject):
350
448
 
351
449
  Args:
352
450
  schema: A dictionary mapping column names to types.
451
+ if_exists: Determines the behavior if a column already exists. Must be one of the following:
452
+
453
+ - `'error'`: an exception will be raised.
454
+ - `'ignore'`: do nothing and return.
455
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
456
+
457
+ Note that the `if_exists` parameter is applied to all columns in the schema.
458
+ To apply different behaviors to different columns, please use [`add_column()`][pixeltable.Table.add_column] for each column.
353
459
 
354
460
  Returns:
355
461
  Information about the execution status of the operation.
356
462
 
357
463
  Raises:
358
- Error: If any column name is invalid or already exists.
464
+ Error: If any column name is invalid, or already exists and `if_exists='error'`,
465
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
359
466
 
360
467
  Examples:
361
468
  Add multiple columns to the table `my_table`:
@@ -368,49 +475,51 @@ class Table(SchemaObject):
368
475
  ... tbl.add_columns(schema)
369
476
  """
370
477
  self._check_is_dropped()
478
+ if self.get_metadata()['is_snapshot']:
479
+ raise excs.Error('Cannot add column to a snapshot.')
371
480
  col_schema = {
372
481
  col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
373
482
  for col_name, spec in schema.items()
374
483
  }
484
+ # handle existing columns based on if_exists parameter
485
+ cols_to_ignore = self._ignore_or_drop_existing_columns(list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists'))
486
+ # if all columns to be added already exist and user asked to ignore
487
+ # existing columns, there's nothing to do.
488
+ for cname in cols_to_ignore:
489
+ assert cname in col_schema
490
+ del col_schema[cname]
491
+ if len(col_schema) == 0:
492
+ return UpdateStatus()
375
493
  new_cols = self._create_columns(col_schema)
376
494
  for new_col in new_cols:
377
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
495
+ self._verify_column(new_col)
378
496
  status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
379
497
  FileCache.get().emit_eviction_warnings()
380
498
  return status
381
499
 
382
- # TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
383
- # will be supported only through add_computed_column(). At that point, we can remove the `stored`,
384
- # `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
385
- # simplifying the code. For the time being, there's some obvious code duplication.
386
500
  def add_column(
387
501
  self,
388
502
  *,
389
- stored: Optional[bool] = None,
390
- print_stats: bool = False,
391
- on_error: Literal['abort', 'ignore'] = 'abort',
503
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
392
504
  **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
393
505
  ) -> UpdateStatus:
394
506
  """
395
- Adds a column to the table.
507
+ Adds an ordinary (non-computed) column to the table.
396
508
 
397
509
  Args:
398
510
  kwargs: Exactly one keyword argument of the form `col_name=col_type`.
399
- stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
400
- print_stats: If `True`, print execution metrics during evaluation.
401
- on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
402
- row.
511
+ if_exists: Determines the behavior if the column already exists. Must be one of the following:
403
512
 
404
- - `'abort'`: an exception will be raised and the column will not be added.
405
- - `'ignore'`: execution will continue and the column will be added. Any rows
406
- with errors will have a `None` value for the column, with information about the error stored in the
407
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
513
+ - `'error'`: an exception will be raised.
514
+ - `'ignore'`: do nothing and return.
515
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
408
516
 
409
517
  Returns:
410
518
  Information about the execution status of the operation.
411
519
 
412
520
  Raises:
413
- Error: If the column name is invalid or already exists.
521
+ Error: If the column name is invalid, or already exists and `if_exists='erorr'`,
522
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
414
523
 
415
524
  Examples:
416
525
  Add an int column:
@@ -422,29 +531,22 @@ class Table(SchemaObject):
422
531
  >>> tbl['new_col'] = pxt.Int
423
532
  """
424
533
  self._check_is_dropped()
534
+ # verify kwargs
535
+ if self._tbl_version.is_snapshot:
536
+ raise excs.Error('Cannot add column to a snapshot.')
425
537
  # verify kwargs and construct column schema dict
426
538
  if len(kwargs) != 1:
427
539
  raise excs.Error(
428
540
  f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
429
- f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
541
+ f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
430
542
  )
431
- col_name, spec = next(iter(kwargs.items()))
432
- if not is_valid_identifier(col_name):
433
- raise excs.Error(f'Invalid column name: {col_name!r}')
434
-
435
- col_schema: dict[str, Any] = {}
436
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
437
- col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
438
- else:
439
- col_schema['value'] = spec
440
- if stored is not None:
441
- col_schema['stored'] = stored
543
+ col_type = next(iter(kwargs.values()))
544
+ if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
545
+ raise excs.Error(
546
+ f'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
547
+ )
548
+ return self.add_columns(kwargs, if_exists=if_exists)
442
549
 
443
- new_col = self._create_columns({col_name: col_schema})[0]
444
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
445
- status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
446
- FileCache.get().emit_eviction_warnings()
447
- return status
448
550
 
449
551
  def add_computed_column(
450
552
  self,
@@ -452,6 +554,7 @@ class Table(SchemaObject):
452
554
  stored: Optional[bool] = None,
453
555
  print_stats: bool = False,
454
556
  on_error: Literal['abort', 'ignore'] = 'abort',
557
+ if_exists: Literal['error', 'ignore', 'replace'] = 'error',
455
558
  **kwargs: exprs.Expr
456
559
  ) -> UpdateStatus:
457
560
  """
@@ -459,12 +562,27 @@ class Table(SchemaObject):
459
562
 
460
563
  Args:
461
564
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
565
+ stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
566
+ print_stats: If `True`, print execution metrics during evaluation.
567
+ on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
568
+ row.
569
+
570
+ - `'abort'`: an exception will be raised and the column will not be added.
571
+ - `'ignore'`: execution will continue and the column will be added. Any rows
572
+ with errors will have a `None` value for the column, with information about the error stored in the
573
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
574
+ if_exists: Determines the behavior if the column already exists. Must be one of the following:
575
+
576
+ - `'error'`: an exception will be raised.
577
+ - `'ignore'`: do nothing and return.
578
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has no dependents.
462
579
 
463
580
  Returns:
464
581
  Information about the execution status of the operation.
465
582
 
466
583
  Raises:
467
- Error: If the column name is invalid or already exists.
584
+ Error: If the column name is invalid or already exists and `if_exists='error'`,
585
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
468
586
 
469
587
  Examples:
470
588
  For a table with an image column `frame`, add an image column `rotated` that rotates the image by
@@ -477,6 +595,8 @@ class Table(SchemaObject):
477
595
  >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
478
596
  """
479
597
  self._check_is_dropped()
598
+ if self.get_metadata()['is_snapshot']:
599
+ raise excs.Error('Cannot add column to a snapshot.')
480
600
  if len(kwargs) != 1:
481
601
  raise excs.Error(
482
602
  f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
@@ -490,8 +610,16 @@ class Table(SchemaObject):
490
610
  if stored is not None:
491
611
  col_schema['stored'] = stored
492
612
 
613
+ # handle existing columns based on if_exists parameter
614
+ cols_to_ignore = self._ignore_or_drop_existing_columns([col_name], IfExistsParam.validated(if_exists, 'if_exists'))
615
+ # if the column to add already exists and user asked to ignore
616
+ # exiting column, there's nothing to do.
617
+ if len(cols_to_ignore) != 0:
618
+ assert cols_to_ignore[0] == col_name
619
+ return UpdateStatus()
620
+
493
621
  new_col = self._create_columns({col_name: col_schema})[0]
494
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
622
+ self._verify_column(new_col)
495
623
  status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
496
624
  FileCache.get().emit_eviction_warnings()
497
625
  return status
@@ -571,18 +699,12 @@ class Table(SchemaObject):
571
699
  return columns
572
700
 
573
701
  @classmethod
574
- def _verify_column(
575
- cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
576
- ) -> None:
702
+ def _verify_column(cls, col: Column) -> None:
577
703
  """Check integrity of user-supplied Column and supply defaults"""
578
704
  if is_system_column_name(col.name):
579
705
  raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
580
706
  if not is_valid_identifier(col.name):
581
707
  raise excs.Error(f"Invalid column name: {col.name!r}")
582
- if col.name in existing_column_names:
583
- raise excs.Error(f'Duplicate column name: {col.name!r}')
584
- if existing_query_names is not None and col.name in existing_query_names:
585
- raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
586
708
  if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
587
709
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
588
710
  if col.stored is False and col.has_window_fn_call():
@@ -595,34 +717,75 @@ class Table(SchemaObject):
595
717
  """Check integrity of user-supplied schema and set defaults"""
596
718
  column_names: set[str] = set()
597
719
  for col in schema:
598
- cls._verify_column(col, column_names)
720
+ cls._verify_column(col)
599
721
  column_names.add(col.name)
600
722
 
601
- def drop_column(self, name: str) -> None:
723
+ def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
724
+ col = self._tbl_version_path.get_column(column_name, include_bases)
725
+ if col is None:
726
+ raise excs.Error(f'Column {column_name!r} unknown')
727
+
728
+ def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
729
+ exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
730
+ if not exists:
731
+ raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
732
+
733
+ def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
602
734
  """Drop a column from the table.
603
735
 
604
736
  Args:
605
- name: The name of the column to drop.
737
+ column: The name or reference of the column to drop.
738
+ if_not_exists: Directive for handling a non-existent column. Must be one of the following:
739
+
740
+ - `'error'`: raise an error if the column does not exist.
741
+ - `'ignore'`: do nothing if the column does not exist.
606
742
 
607
743
  Raises:
608
- Error: If the column does not exist or if it is referenced by a dependent computed column.
744
+ Error: If the column does not exist and `if_exists='error'`,
745
+ or if it is referenced by a dependent computed column.
609
746
 
610
747
  Examples:
611
- Drop the column `col` from the table `my_table`:
748
+ Drop the column `col` from the table `my_table` by column name:
612
749
 
613
750
  >>> tbl = pxt.get_table('my_table')
614
751
  ... tbl.drop_column('col')
752
+
753
+ Drop the column `col` from the table `my_table` by column reference:
754
+
755
+ >>> tbl = pxt.get_table('my_table')
756
+ ... tbl.drop_column(tbl.col)
757
+
758
+ Drop the column `col` from the table `my_table` if it exists, otherwise do nothing:
759
+
760
+ >>> tbl = pxt.get_table('my_table')
761
+ ... tbl.drop_col(tbl.col, if_not_exists='ignore')
615
762
  """
616
763
  self._check_is_dropped()
617
-
618
- if name not in self._tbl_version.cols_by_name:
619
- raise excs.Error(f'Unknown column: {name}')
620
- col = self._tbl_version.cols_by_name[name]
764
+ if self._tbl_version_path.is_snapshot():
765
+ raise excs.Error('Cannot drop column from a snapshot.')
766
+ col: Column = None
767
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
768
+ if isinstance(column, str):
769
+ col = self._tbl_version_path.get_column(column, include_bases=False)
770
+ if col is None:
771
+ if _if_not_exists == IfNotExistsParam.ERROR:
772
+ raise excs.Error(f'Column {column!r} unknown')
773
+ assert _if_not_exists == IfNotExistsParam.IGNORE
774
+ return
775
+ col = self._tbl_version.cols_by_name[column]
776
+ else:
777
+ exists = self._tbl_version_path.has_column(column.col, include_bases=False)
778
+ if not exists:
779
+ if _if_not_exists == IfNotExistsParam.ERROR:
780
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
781
+ assert _if_not_exists == IfNotExistsParam.IGNORE
782
+ return
783
+ col = column.col
621
784
 
622
785
  dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
623
786
  if len(dependent_user_cols) > 0:
624
787
  raise excs.Error(
625
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
788
+ f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
626
789
  f'{", ".join(c.name for c in dependent_user_cols)}'
627
790
  )
628
791
 
@@ -640,7 +803,7 @@ class Table(SchemaObject):
640
803
  for view, store in dependent_stores
641
804
  ]
642
805
  raise excs.Error(
643
- f'Cannot drop column `{name}` because the following external stores depend on it:\n'
806
+ f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
644
807
  f'{", ".join(dependent_store_names)}'
645
808
  )
646
809
 
@@ -662,144 +825,302 @@ class Table(SchemaObject):
662
825
  >>> tbl = pxt.get_table('my_table')
663
826
  ... tbl.rename_column('col1', 'col2')
664
827
  """
665
- self._check_is_dropped()
666
828
  self._tbl_version.rename_column(old_name, new_name)
667
829
 
830
+ def _list_index_info_for_test(self) -> list[dict[str, Any]]:
831
+ """
832
+ Returns list of all the indexes on this table. Used for testing.
833
+
834
+ Returns:
835
+ A list of index information, each containing the index's
836
+ id, name, and the name of the column it indexes.
837
+ """
838
+ assert not self._is_dropped
839
+ index_info = []
840
+ for idx_name, idx in self._tbl_version.idxs_by_name.items():
841
+ index_info.append({
842
+ '_id': idx.id,
843
+ '_name': idx_name,
844
+ '_column': idx.col.name
845
+ })
846
+ return index_info
847
+
668
848
  def add_embedding_index(
669
- self, col_name: str, *, idx_name: Optional[str] = None,
849
+ self, column: Union[str, ColumnRef], *, idx_name: Optional[str] = None,
850
+ embedding: Optional[pxt.Function] = None,
670
851
  string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
671
- metric: str = 'cosine'
852
+ metric: str = 'cosine',
853
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
672
854
  ) -> None:
673
855
  """
674
- Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
856
+ Add an embedding index to the table. Once the index is created, it will be automatically kept up-to-date as new
675
857
  rows are inserted into the table.
676
858
 
677
- Indices are currently supported only for `String` and `Image` columns. The index must specify, at
678
- minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
679
- and image embedding (into the same vector space); in particular, this can be used to provide similarity search
680
- of text over an image column.
859
+ To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
860
+ Only `String` and `Image` columns are currently supported. Here's an example that uses a
861
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
862
+
863
+ >>> from pixeltable.functions.huggingface import clip
864
+ ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
865
+ ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
866
+
867
+ Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
868
+
869
+ >>> reference_img = PIL.Image.open('my_image.jpg')
870
+ ... sim = tbl.img.similarity(reference_img)
871
+ ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
872
+
873
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
874
+ performed using any of its supported types. In our example, CLIP supports both text and images, so we can
875
+ also search for images using a text description:
876
+
877
+ >>> sim = tbl.img.similarity('a picture of a train')
878
+ ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
681
879
 
682
880
  Args:
683
- col_name: The name of column to index; must be a `String` or `Image` column.
684
- idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
685
- If specified, the name must be unique for this table.
686
- string_embed: A function to embed text; required if the column is a `String` column.
687
- image_embed: A function to embed images; required if the column is an `Image` column.
688
- metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
689
- the default is `'cosine'`.
881
+ column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
882
+ idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
883
+ automatically. If specified, the name must be unique for this table.
884
+ embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
885
+ or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
886
+ array of floats.
887
+ string_embed: An optional UDF to use for the string embedding component of this index.
888
+ Can be used in conjunction with `image_embed` to construct multimodal embeddings manually, by
889
+ specifying different embedding functions for different data types.
890
+ image_embed: An optional UDF to use for the image embedding component of this index.
891
+ Can be used in conjunction with `string_embed` to construct multimodal embeddings manually, by
892
+ specifying different embedding functions for different data types.
893
+ metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`.
894
+ The default is `'cosine'`.
895
+ if_exists: Directive for handling an existing index with the same name. Must be one of the following:
896
+
897
+ - `'error'`: raise an error if an index with the same name already exists.
898
+ - `'ignore'`: do nothing if an index with the same name already exists.
899
+ - `'replace'` or `'replace_force'`: replace the existing index with the new one.
690
900
 
691
901
  Raises:
692
- Error: If an index with that name already exists for the table, or if the specified column does not exist.
902
+ Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if the specified column does not exist.
693
903
 
694
904
  Examples:
695
905
  Add an index to the `img` column of the table `my_table`:
696
906
 
697
- >>> tbl = pxt.get_table('my_table')
698
- ... tbl.add_embedding_index('img', image_embed=my_image_func)
907
+ >>> from pixeltable.functions.huggingface import clip
908
+ ... tbl = pxt.get_table('my_table')
909
+ ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
910
+ ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
911
+
912
+ Alternatively, the `img` column may be specified by name:
699
913
 
700
- Add another index to the `img` column, using the inner product as the distance metric,
701
- and with a specific name; `string_embed` is also specified in order to search with text:
914
+ >>> tbl.add_embedding_index('img', embedding=embedding_fn)
915
+
916
+ Add a second index to the `img` column, using the inner product as the distance metric,
917
+ and with a specific name:
702
918
 
703
919
  >>> tbl.add_embedding_index(
704
- ... 'img',
705
- ... idx_name='clip_idx',
706
- ... image_embed=my_image_func,
707
- ... string_embed=my_string_func,
920
+ ... tbl.img,
921
+ ... idx_name='ip_idx',
922
+ ... embedding=embedding_fn,
708
923
  ... metric='ip'
709
924
  ... )
925
+
926
+ Add an index using separately specified string and image embeddings:
927
+
928
+ >>> tbl.add_embedding_index(
929
+ ... tbl.img,
930
+ ... string_embed=string_embedding_fn,
931
+ ... image_embed=image_embedding_fn
932
+ ... )
710
933
  """
711
934
  if self._tbl_version_path.is_snapshot():
712
935
  raise excs.Error('Cannot add an index to a snapshot')
713
- self._check_is_dropped()
714
- col = self._tbl_version_path.get_column(col_name, include_bases=True)
715
- if col is None:
716
- raise excs.Error(f'Column {col_name} unknown')
936
+ col: Column
937
+ if isinstance(column, str):
938
+ self.__check_column_name_exists(column, include_bases=True)
939
+ col = self._tbl_version_path.get_column(column, include_bases=True)
940
+ else:
941
+ self.__check_column_ref_exists(column, include_bases=True)
942
+ col = column.col
943
+
717
944
  if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
718
- raise excs.Error(f'Duplicate index name: {idx_name}')
945
+ _if_exists = IfExistsParam.validated(if_exists, 'if_exists')
946
+ # An index with the same name already exists.
947
+ # Handle it according to if_exists.
948
+ if _if_exists == IfExistsParam.ERROR:
949
+ raise excs.Error(f'Duplicate index name: {idx_name}')
950
+ if not isinstance(self._tbl_version.idxs_by_name[idx_name].idx, index.EmbeddingIndex):
951
+ raise excs.Error(f'Index `{idx_name}` is not an embedding index. Cannot {_if_exists.name.lower()} it.')
952
+ if _if_exists == IfExistsParam.IGNORE:
953
+ return
954
+ assert _if_exists == IfExistsParam.REPLACE or _if_exists == IfExistsParam.REPLACE_FORCE
955
+ self.drop_index(idx_name=idx_name)
956
+ assert idx_name not in self._tbl_version.idxs_by_name
719
957
  from pixeltable.index import EmbeddingIndex
720
958
 
721
959
  # create the EmbeddingIndex instance to verify args
722
- idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
960
+ idx = EmbeddingIndex(col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
723
961
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
724
962
  # TODO: how to deal with exceptions here? drop the index and raise?
725
963
  FileCache.get().emit_eviction_warnings()
726
964
 
727
- def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
965
+ def drop_embedding_index(
966
+ self, *,
967
+ column: Union[str, ColumnRef, None] = None,
968
+ idx_name: Optional[str] = None,
969
+ if_not_exists: Literal['error', 'ignore'] = 'error'
970
+ ) -> None:
728
971
  """
729
972
  Drop an embedding index from the table. Either a column name or an index name (but not both) must be
730
- specified. If a column name is specified, it must be a column containing exactly one embedding index;
731
- otherwise the specific index name must be provided instead.
973
+ specified. If a column name or reference is specified, it must be a column containing exactly one
974
+ embedding index; otherwise the specific index name must be provided instead.
732
975
 
733
976
  Args:
734
- column_name: The name of the column from which to drop the index. Invalid if the column has multiple
735
- embedding indices.
977
+ column: The name of, or reference to, the column from which to drop the index.
978
+ The column must have only one embedding index.
736
979
  idx_name: The name of the index to drop.
980
+ if_not_exists: Directive for handling a non-existent index. Must be one of the following:
981
+
982
+ - `'error'`: raise an error if the index does not exist.
983
+ - `'ignore'`: do nothing if the index does not exist.
984
+
985
+ Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
986
+ and it does not exist, or when `column` is specified and it has no index.
987
+ `if_not_exists` does not apply to non-exisitng column.
737
988
 
738
989
  Raises:
739
- Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
740
- indices or multiple embedding indices.
741
- Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
990
+ Error: If `column` is specified, but the column does not exist, or it contains no embedding
991
+ indices and `if_not_exists='error'`, or the column has multiple embedding indices.
992
+ Error: If `idx_name` is specified, but the index is not an embedding index, or
993
+ the index does not exist and `if_not_exists='error'`.
742
994
 
743
995
  Examples:
744
- Drop the embedding index on the `img` column of the table `my_table`:
996
+ Drop the embedding index on the `img` column of the table `my_table` by column name:
745
997
 
746
998
  >>> tbl = pxt.get_table('my_table')
747
- ... tbl.drop_embedding_index(column_name='img')
748
- """
749
- self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
999
+ ... tbl.drop_embedding_index(column='img')
750
1000
 
751
- def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
1001
+ Drop the embedding index on the `img` column of the table `my_table` by column reference:
1002
+
1003
+ >>> tbl = pxt.get_table('my_table')
1004
+ ... tbl.drop_embedding_index(column=tbl.img)
1005
+
1006
+ Drop the embedding index `idx1` of the table `my_table` by index name:
1007
+ >>> tbl = pxt.get_table('my_table')
1008
+ ... tbl.drop_embedding_index(idx_name='idx1')
1009
+
1010
+ Drop the embedding index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
1011
+ >>> tbl = pxt.get_table('my_table')
1012
+ ... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
1013
+ """
1014
+ if (column is None) == (idx_name is None):
1015
+ raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1016
+
1017
+ col: Column = None
1018
+ if idx_name is None:
1019
+ if isinstance(column, str):
1020
+ self.__check_column_name_exists(column, include_bases=True)
1021
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1022
+ else:
1023
+ self.__check_column_ref_exists(column, include_bases=True)
1024
+ col = column.col
1025
+ assert col is not None
1026
+ self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1027
+
1028
+ def drop_index(
1029
+ self, *,
1030
+ column: Union[str, ColumnRef, None] = None,
1031
+ idx_name: Optional[str] = None,
1032
+ if_not_exists: Literal['error', 'ignore'] = 'error'
1033
+ ) -> None:
752
1034
  """
753
1035
  Drop an index from the table. Either a column name or an index name (but not both) must be
754
- specified. If a column name is specified, it must be a column containing exactly one index;
1036
+ specified. If a column name or reference is specified, it must be a column containing exactly one index;
755
1037
  otherwise the specific index name must be provided instead.
756
1038
 
757
1039
  Args:
758
- column_name: The name of the column from which to drop the index. Invalid if the column has multiple
759
- indices.
1040
+ column: The name of, or reference to, the column from which to drop the index.
1041
+ The column must have only one embedding index.
760
1042
  idx_name: The name of the index to drop.
1043
+ if_not_exists: Directive for handling a non-existent index. Must be one of the following:
1044
+
1045
+ - `'error'`: raise an error if the index does not exist.
1046
+ - `'ignore'`: do nothing if the index does not exist.
1047
+
1048
+ Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
1049
+ and it does not exist, or when `column` is specified and it has no index.
1050
+ `if_not_exists` does not apply to non-exisitng column.
761
1051
 
762
1052
  Raises:
763
- Error: If `column_name` is specified, but the column does not exist, or it contains no
1053
+ Error: If `column` is specified, but the column does not exist, or it contains no
764
1054
  indices or multiple indices.
765
1055
  Error: If `idx_name` is specified, but the index does not exist.
766
1056
 
767
1057
  Examples:
768
- Drop the index on the `img` column of the table `my_table`:
1058
+ Drop the index on the `img` column of the table `my_table` by column name:
769
1059
 
770
1060
  >>> tbl = pxt.get_table('my_table')
771
1061
  ... tbl.drop_index(column_name='img')
1062
+
1063
+ Drop the index on the `img` column of the table `my_table` by column reference:
1064
+
1065
+ >>> tbl = pxt.get_table('my_table')
1066
+ ... tbl.drop_index(tbl.img)
1067
+
1068
+ Drop the index `idx1` of the table `my_table` by index name:
1069
+ >>> tbl = pxt.get_table('my_table')
1070
+ ... tbl.drop_index(idx_name='idx1')
1071
+
1072
+ Drop the index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
1073
+ >>> tbl = pxt.get_table('my_table')
1074
+ ... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
1075
+
772
1076
  """
773
- self._drop_index(column_name=column_name, idx_name=idx_name)
1077
+ if (column is None) == (idx_name is None):
1078
+ raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1079
+
1080
+ col: Column = None
1081
+ if idx_name is None:
1082
+ if isinstance(column, str):
1083
+ self.__check_column_name_exists(column, include_bases=True)
1084
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1085
+ else:
1086
+ self.__check_column_ref_exists(column, include_bases=True)
1087
+ col = column.col
1088
+ assert col is not None
1089
+ self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
774
1090
 
775
1091
  def _drop_index(
776
- self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
777
- _idx_class: Optional[type[index.IndexBase]] = None
1092
+ self, *, col: Optional[Column] = None,
1093
+ idx_name: Optional[str] = None,
1094
+ _idx_class: Optional[type[index.IndexBase]] = None,
1095
+ if_not_exists: Literal['error', 'ignore'] = 'error'
778
1096
  ) -> None:
779
1097
  if self._tbl_version_path.is_snapshot():
780
1098
  raise excs.Error('Cannot drop an index from a snapshot')
781
- self._check_is_dropped()
782
- if (column_name is None) == (idx_name is None):
783
- raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
1099
+ assert (col is None) != (idx_name is None)
784
1100
 
785
1101
  if idx_name is not None:
1102
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
786
1103
  if idx_name not in self._tbl_version.idxs_by_name:
787
- raise excs.Error(f'Index {idx_name!r} does not exist')
1104
+ if _if_not_exists == IfNotExistsParam.ERROR:
1105
+ raise excs.Error(f'Index {idx_name!r} does not exist')
1106
+ assert _if_not_exists == IfNotExistsParam.IGNORE
1107
+ return
788
1108
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
789
1109
  else:
790
- col = self._tbl_version_path.get_column(column_name, include_bases=True)
791
- if col is None:
792
- raise excs.Error(f'Column {column_name!r} unknown')
793
1110
  if col.tbl.id != self._tbl_version.id:
794
1111
  raise excs.Error(
795
- f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
1112
+ f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
796
1113
  idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
797
1114
  if _idx_class is not None:
798
1115
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
799
1116
  if len(idx_info) == 0:
800
- raise excs.Error(f'Column {column_name!r} does not have an index')
1117
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1118
+ if _if_not_exists == IfNotExistsParam.ERROR:
1119
+ raise excs.Error(f'Column {col.name!r} does not have an index')
1120
+ assert _if_not_exists == IfNotExistsParam.IGNORE
1121
+ return
801
1122
  if len(idx_info) > 1:
802
- raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
1123
+ raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
803
1124
  idx_id = idx_info[0].id
804
1125
  self._tbl_version.drop_index(idx_id)
805
1126
 
@@ -919,7 +1240,6 @@ class Table(SchemaObject):
919
1240
 
920
1241
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
921
1242
  """
922
- self._check_is_dropped()
923
1243
  status = self._tbl_version.update(value_spec, where, cascade)
924
1244
  FileCache.get().emit_eviction_warnings()
925
1245
  return status
@@ -955,7 +1275,6 @@ class Table(SchemaObject):
955
1275
  """
956
1276
  if self._tbl_version_path.is_snapshot():
957
1277
  raise excs.Error('Cannot update a snapshot')
958
- self._check_is_dropped()
959
1278
  rows = list(rows)
960
1279
 
961
1280
  row_updates: list[dict[Column, exprs.Expr]] = []
@@ -1010,46 +1329,8 @@ class Table(SchemaObject):
1010
1329
  """
1011
1330
  if self._tbl_version_path.is_snapshot():
1012
1331
  raise excs.Error('Cannot revert a snapshot')
1013
- self._check_is_dropped()
1014
1332
  self._tbl_version.revert()
1015
1333
 
1016
- @overload
1017
- def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
1018
-
1019
- @overload
1020
- def query(
1021
- self, *, param_types: Optional[list[ts.ColumnType]] = None
1022
- ) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
1023
-
1024
- def query(self, *args: Any, **kwargs: Any) -> Any:
1025
- def make_query_template(
1026
- py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
1027
- ) -> 'pxt.func.QueryTemplateFunction':
1028
- if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
1029
- # this is a named function in a module
1030
- function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
1031
- else:
1032
- function_path = None
1033
- query_name = py_fn.__name__
1034
- if query_name in self._schema.keys():
1035
- raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
1036
- if query_name in self.__query_scope._queries and function_path is not None:
1037
- raise excs.Error(f'Duplicate query name: {query_name!r}')
1038
- query_fn = pxt.func.QueryTemplateFunction.create(
1039
- py_fn, param_types=param_types, path=function_path, name=query_name)
1040
- self.__query_scope._queries[query_name] = query_fn
1041
- return query_fn
1042
-
1043
- # TODO: verify that the inferred return type matches that of the template
1044
- # TODO: verify that the signature doesn't contain batched parameters
1045
-
1046
- if len(args) == 1:
1047
- assert len(kwargs) == 0 and callable(args[0])
1048
- return make_query_template(args[0], None)
1049
- else:
1050
- assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
1051
- return lambda py_fn: make_query_template(py_fn, kwargs['param_types'])
1052
-
1053
1334
  @property
1054
1335
  def external_stores(self) -> list[str]:
1055
1336
  return list(self._tbl_version.external_stores.keys())
@@ -1060,7 +1341,6 @@ class Table(SchemaObject):
1060
1341
  """
1061
1342
  if self._tbl_version.is_snapshot:
1062
1343
  raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1063
- self._check_is_dropped()
1064
1344
  if store.name in self.external_stores:
1065
1345
  raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1066
1346
  _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
@@ -1140,7 +1420,7 @@ class Table(SchemaObject):
1140
1420
  return sync_status
1141
1421
 
1142
1422
  def __dir__(self) -> list[str]:
1143
- return list(super().__dir__()) + list(self._schema.keys()) + self._query_names
1423
+ return list(super().__dir__()) + list(self._schema.keys())
1144
1424
 
1145
1425
  def _ipython_key_completions_(self) -> list[str]:
1146
- return list(self._schema.keys()) + self._query_names
1426
+ return list(self._schema.keys())