pixeltable 0.2.25__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (97) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +421 -231
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +5 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +14 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +195 -27
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +64 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/ollama.py +8 -8
  58. pixeltable/functions/openai.py +51 -4
  59. pixeltable/functions/timestamp.py +1 -1
  60. pixeltable/functions/video.py +3 -9
  61. pixeltable/functions/vision.py +1 -1
  62. pixeltable/globals.py +354 -80
  63. pixeltable/index/embedding_index.py +106 -34
  64. pixeltable/io/__init__.py +1 -1
  65. pixeltable/io/label_studio.py +1 -1
  66. pixeltable/io/parquet.py +39 -19
  67. pixeltable/iterators/document.py +12 -0
  68. pixeltable/metadata/__init__.py +1 -1
  69. pixeltable/metadata/converters/convert_16.py +2 -1
  70. pixeltable/metadata/converters/convert_17.py +2 -1
  71. pixeltable/metadata/converters/convert_22.py +17 -0
  72. pixeltable/metadata/converters/convert_23.py +35 -0
  73. pixeltable/metadata/converters/convert_24.py +56 -0
  74. pixeltable/metadata/converters/convert_25.py +19 -0
  75. pixeltable/metadata/converters/util.py +4 -2
  76. pixeltable/metadata/notes.py +4 -0
  77. pixeltable/metadata/schema.py +1 -0
  78. pixeltable/plan.py +128 -50
  79. pixeltable/store.py +1 -1
  80. pixeltable/type_system.py +196 -54
  81. pixeltable/utils/arrow.py +8 -3
  82. pixeltable/utils/description_helper.py +89 -0
  83. pixeltable/utils/documents.py +14 -0
  84. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/METADATA +30 -20
  85. pixeltable-0.3.0.dist-info/RECORD +155 -0
  86. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  87. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  88. pixeltable/tool/create_test_db_dump.py +0 -311
  89. pixeltable/tool/create_test_video.py +0 -81
  90. pixeltable/tool/doc_plugins/griffe.py +0 -50
  91. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  92. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  93. pixeltable/tool/embed_udf.py +0 -9
  94. pixeltable/tool/mypy_plugin.py +0 -55
  95. pixeltable-0.2.25.dist-info/RECORD +0 -154
  96. pixeltable-0.2.25.dist-info/entry_points.txt +0 -3
  97. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Se
10
10
  from uuid import UUID
11
11
 
12
12
  import pandas as pd
13
- import pandas.io.formats.style
14
13
  import sqlalchemy as sql
15
14
 
16
15
  import pixeltable as pxt
@@ -21,18 +20,22 @@ import pixeltable.exprs as exprs
21
20
  import pixeltable.index as index
22
21
  import pixeltable.metadata.schema as schema
23
22
  import pixeltable.type_system as ts
24
- from pixeltable.utils.filecache import FileCache
25
23
 
24
+ from ..exprs import ColumnRef
25
+ from ..utils.description_helper import DescriptionHelper
26
+ from ..utils.filecache import FileCache
26
27
  from .column import Column
27
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
28
+ from .globals import (_ROWID_COLUMN_NAME, IfExistsParam, IfNotExistsParam, MediaValidation, UpdateStatus,
29
+ is_system_column_name, is_valid_identifier)
28
30
  from .schema_object import SchemaObject
29
31
  from .table_version import TableVersion
30
32
  from .table_version_path import TableVersionPath
31
- from ..exprs import ColumnRef
32
33
 
33
34
  if TYPE_CHECKING:
34
35
  import torch.utils.data
35
36
 
37
+ import pixeltable.plan
38
+
36
39
  _logger = logging.getLogger('pixeltable')
37
40
 
38
41
  class Table(SchemaObject):
@@ -46,23 +49,15 @@ class Table(SchemaObject):
46
49
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
47
50
  super().__init__(id, name, dir_id)
48
51
  self._is_dropped = False
49
- self._tbl_version_path = tbl_version_path
50
- self.__query_scope = self.QueryScope(self)
51
-
52
- class QueryScope:
53
- __table: 'Table'
54
- _queries: dict[str, pxt.func.QueryTemplateFunction]
55
-
56
- def __init__(self, table: 'Table') -> None:
57
- self.__table = table
58
- self._queries = {}
52
+ self.__tbl_version_path = tbl_version_path
59
53
 
60
- def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
61
- if name in self._queries:
62
- return self._queries[name]
63
- raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
54
+ @property
55
+ def _has_dependents(self) -> bool:
56
+ """Returns True if this table has any dependent views, or snapshots."""
57
+ return len(self._get_views(recursive=False)) > 0
64
58
 
65
59
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
60
+ self._check_is_dropped()
66
61
  super()._move(new_name, new_dir_id)
67
62
  with env.Env.get().engine.begin() as conn:
68
63
  stmt = sql.text((
@@ -96,6 +91,7 @@ class Table(SchemaObject):
96
91
  }
97
92
  ```
98
93
  """
94
+ self._check_is_dropped()
99
95
  md = super().get_metadata()
100
96
  md['base'] = self._base._path if self._base is not None else None
101
97
  md['schema'] = self._schema
@@ -116,6 +112,12 @@ class Table(SchemaObject):
116
112
  """Return TableVersion for just this table."""
117
113
  return self._tbl_version_path.tbl_version
118
114
 
115
+ @property
116
+ def _tbl_version_path(self) -> TableVersionPath:
117
+ """Return TableVersionPath for just this table."""
118
+ self._check_is_dropped()
119
+ return self.__tbl_version_path
120
+
119
121
  def __hash__(self) -> int:
120
122
  return hash(self._tbl_version.id)
121
123
 
@@ -124,23 +126,12 @@ class Table(SchemaObject):
124
126
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
125
127
 
126
128
  def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
127
- """Return a ColumnRef for the given name.
128
- """
129
+ """Return a ColumnRef for the given name."""
129
130
  return self._tbl_version_path.get_column_ref(name)
130
131
 
131
- @overload
132
- def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
133
-
134
- @overload
135
- def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
136
-
137
- def __getitem__(self, index):
138
- """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
139
- """
140
- if isinstance(index, str):
141
- return getattr(self, index)
142
- else:
143
- return self._df()[index]
132
+ def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef':
133
+ """Return a ColumnRef for the given name."""
134
+ return getattr(self, name)
144
135
 
145
136
  def list_views(self, *, recursive: bool = True) -> list[str]:
146
137
  """
@@ -153,6 +144,7 @@ class Table(SchemaObject):
153
144
  Returns:
154
145
  A list of view paths.
155
146
  """
147
+ self._check_is_dropped()
156
148
  return [t._path for t in self._get_views(recursive=recursive)]
157
149
 
158
150
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
@@ -166,26 +158,42 @@ class Table(SchemaObject):
166
158
  """Return a DataFrame for this table.
167
159
  """
168
160
  # local import: avoid circular imports
169
- return pxt.DataFrame(self._tbl_version_path)
170
-
171
- @property
172
- def queries(self) -> 'Table.QueryScope':
173
- return self.__query_scope
161
+ from pixeltable.plan import FromClause
162
+ return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
174
163
 
175
164
  def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
176
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
165
+ """ Select columns or expressions from this table.
166
+
167
+ See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
168
+ """
177
169
  return self._df().select(*items, **named_items)
178
170
 
179
171
  def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
180
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
172
+ """Filter rows from this table based on the expression.
173
+
174
+ See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
175
+ """
181
176
  return self._df().where(pred)
182
177
 
178
+ def join(
179
+ self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
180
+ how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
181
+ ) -> 'pxt.DataFrame':
182
+ """Join this table with another table."""
183
+ return self._df().join(other, on=on, how=how)
184
+
183
185
  def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
184
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
186
+ """Order the rows of this table based on the expression.
187
+
188
+ See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
189
+ """
185
190
  return self._df().order_by(*items, asc=asc)
186
191
 
187
192
  def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
188
- """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
193
+ """Group the rows of this table based on the expression.
194
+
195
+ See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
196
+ """
189
197
  return self._df().group_by(*items)
190
198
 
191
199
  def limit(self, n: int) -> 'pxt.DataFrame':
@@ -200,7 +208,6 @@ class Table(SchemaObject):
200
208
  ) -> 'pxt.dataframe.DataFrameResultSet':
201
209
  """Return rows from this table.
202
210
  """
203
- self._check_is_dropped()
204
211
  return self._df().show(*args, **kwargs)
205
212
 
206
213
  def head(
@@ -230,11 +237,6 @@ class Table(SchemaObject):
230
237
  """Return the schema (column names and column types) of this table."""
231
238
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
232
239
 
233
- @property
234
- def _query_names(self) -> list[str]:
235
- """Return the names of the registered queries for this table."""
236
- return list(self.__query_scope._queries.keys())
237
-
238
240
  @property
239
241
  def _base(self) -> Optional['Table']:
240
242
  """
@@ -246,6 +248,18 @@ class Table(SchemaObject):
246
248
  base_id = self._tbl_version_path.base.tbl_version.id
247
249
  return catalog.Catalog.get().tbls[base_id]
248
250
 
251
+ @property
252
+ def _bases(self) -> list['Table']:
253
+ """
254
+ The ancestor list of bases of this table, starting with its immediate base.
255
+ """
256
+ bases = []
257
+ base = self._base
258
+ while base is not None:
259
+ bases.append(base)
260
+ base = base._base
261
+ return bases
262
+
249
263
  @property
250
264
  def _comment(self) -> str:
251
265
  return self._tbl_version.comment
@@ -258,47 +272,98 @@ class Table(SchemaObject):
258
272
  def _media_validation(self) -> MediaValidation:
259
273
  return self._tbl_version.media_validation
260
274
 
261
- def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
262
- cols = self._tbl_version_path.columns()
263
- df = pd.DataFrame({
264
- 'Column Name': [c.name for c in cols],
265
- 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
266
- 'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
267
- })
268
- return df
269
-
270
- def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
271
- pd_df = self._description(cols)
272
- # white-space: pre-wrap: print \n as newline
273
- # th: center-align headings
274
- return (
275
- pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
276
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
277
- .hide(axis='index')
275
+ def __repr__(self) -> str:
276
+ return self._descriptors().to_string()
277
+
278
+ def _repr_html_(self) -> str:
279
+ return self._descriptors().to_html()
280
+
281
+ def _descriptors(self) -> DescriptionHelper:
282
+ """
283
+ Constructs a list of descriptors for this table that can be pretty-printed.
284
+ """
285
+ helper = DescriptionHelper()
286
+ helper.append(self._title_descriptor())
287
+ helper.append(self._col_descriptor())
288
+ idxs = self._index_descriptor()
289
+ if not idxs.empty:
290
+ helper.append(idxs)
291
+ stores = self._external_store_descriptor()
292
+ if not stores.empty:
293
+ helper.append(stores)
294
+ if self._comment:
295
+ helper.append(f'COMMENT: {self._comment}')
296
+ return helper
297
+
298
+ def _title_descriptor(self) -> str:
299
+ title: str
300
+ if self._base is None:
301
+ title = f'Table\n{self._path!r}'
302
+ else:
303
+ title = f'View\n{self._path!r}'
304
+ title += f'\n(of {self.__bases_to_desc()})'
305
+ return title
306
+
307
+ def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
308
+ return pd.DataFrame(
309
+ {
310
+ 'Column Name': col.name,
311
+ 'Type': col.col_type._to_str(as_schema=True),
312
+ 'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
313
+ }
314
+ for col in self.__tbl_version_path.columns()
315
+ if columns is None or col.name in columns
278
316
  )
279
317
 
318
+ def __bases_to_desc(self) -> str:
319
+ bases = self._bases
320
+ assert len(bases) >= 1
321
+ if len(bases) <= 2:
322
+ return ', '.join(repr(b._path) for b in bases)
323
+ else:
324
+ return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
325
+
326
+ def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
327
+ from pixeltable import index
328
+
329
+ pd_rows = []
330
+ for name, info in self._tbl_version.idxs_by_name.items():
331
+ if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
332
+ display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
333
+ if info.idx.string_embed is not None and info.idx.image_embed is not None:
334
+ embed_str = f'{display_embed} (+1)'
335
+ else:
336
+ embed_str = str(display_embed)
337
+ row = {
338
+ 'Index Name': name,
339
+ 'Column': info.col.name,
340
+ 'Metric': str(info.idx.metric.name.lower()),
341
+ 'Embedding': embed_str,
342
+ }
343
+ pd_rows.append(row)
344
+ return pd.DataFrame(pd_rows)
345
+
346
+ def _external_store_descriptor(self) -> pd.DataFrame:
347
+ pd_rows = []
348
+ for name, store in self._tbl_version.external_stores.items():
349
+ row = {
350
+ 'External Store': name,
351
+ 'Type': type(store).__name__,
352
+ }
353
+ pd_rows.append(row)
354
+ return pd.DataFrame(pd_rows)
355
+
280
356
  def describe(self) -> None:
281
357
  """
282
358
  Print the table schema.
283
359
  """
360
+ self._check_is_dropped()
284
361
  if getattr(builtins, '__IPYTHON__', False):
285
362
  from IPython.display import display
286
- display(self._description_html())
363
+ display(self._repr_html_())
287
364
  else:
288
365
  print(repr(self))
289
366
 
290
- # TODO: Display comments in _repr_html()
291
- def __repr__(self) -> str:
292
- description_str = self._description().to_string(index=False)
293
- if self._comment is None:
294
- comment = ''
295
- else:
296
- comment = f'{self._comment}\n'
297
- return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
298
-
299
- def _repr_html_(self) -> str:
300
- return self._description_html()._repr_html_() # type: ignore[attr-defined]
301
-
302
367
  def _drop(self) -> None:
303
368
  cat = catalog.Catalog.get()
304
369
  # verify all dependents are deleted by now
@@ -325,27 +390,54 @@ class Table(SchemaObject):
325
390
  """
326
391
  return self._df().to_coco_dataset()
327
392
 
328
- def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
329
- """
330
- Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
331
-
332
- >>> tbl['new_col'] = pxt.Int
333
-
334
- is exactly equivalent to
393
+ def _column_has_dependents(self, col: Column) -> bool:
394
+ """Returns True if the column has dependents, False otherwise."""
395
+ assert col is not None
396
+ assert col.name in self._schema.keys()
397
+ if any(c.name is not None for c in col.dependent_cols):
398
+ return True
399
+ return any(
400
+ col in store.get_local_columns()
401
+ for view in [self] + self._get_views(recursive=True)
402
+ for store in view._tbl_version.external_stores.values())
335
403
 
336
- >>> tbl.add_column(new_col=pxt.Int)
404
+ def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
405
+ """ Check and handle existing columns in the new column specification based on the if_exists parameter.
337
406
 
338
- For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
407
+ If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
339
408
  """
340
- if not isinstance(col_name, str):
341
- raise excs.Error(f'Column name must be a string, got {type(col_name)}')
342
- if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
343
- raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
344
- self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
409
+ assert not self.get_metadata()['is_snapshot']
410
+ existing_col_names = set(self._schema.keys())
411
+ cols_to_ignore = []
412
+ for new_col_name in new_col_names:
413
+ if new_col_name in existing_col_names:
414
+ if if_exists == IfExistsParam.ERROR:
415
+ raise excs.Error(f'Duplicate column name: {new_col_name!r}')
416
+ elif if_exists == IfExistsParam.IGNORE:
417
+ cols_to_ignore.append(new_col_name)
418
+ elif if_exists == IfExistsParam.REPLACE or if_exists == IfExistsParam.REPLACE_FORCE:
419
+ if new_col_name not in self._tbl_version.cols_by_name:
420
+ # for views, it is possible that the existing column
421
+ # is a base table column; in that case, we should not
422
+ # drop/replace that column. Continue to raise error.
423
+ raise excs.Error(
424
+ f'Column {new_col_name!r} is a base table column. Cannot replace it.'
425
+ )
426
+ col = self._tbl_version.cols_by_name[new_col_name]
427
+ # cannot drop a column with dependents; so reject
428
+ # replace directive if column has dependents.
429
+ if self._column_has_dependents(col):
430
+ raise excs.Error(
431
+ f'Column {new_col_name!r} already exists and has dependents. Cannot {if_exists.name.lower()} it.'
432
+ )
433
+ self.drop_column(new_col_name)
434
+ assert new_col_name not in self._tbl_version.cols_by_name
435
+ return cols_to_ignore
345
436
 
346
437
  def add_columns(
347
438
  self,
348
- schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
439
+ schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]],
440
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
349
441
  ) -> UpdateStatus:
350
442
  """
351
443
  Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
@@ -356,12 +448,21 @@ class Table(SchemaObject):
356
448
 
357
449
  Args:
358
450
  schema: A dictionary mapping column names to types.
451
+ if_exists: Determines the behavior if a column already exists. Must be one of the following:
452
+
453
+ - `'error'`: an exception will be raised.
454
+ - `'ignore'`: do nothing and return.
455
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
456
+
457
+ Note that the `if_exists` parameter is applied to all columns in the schema.
458
+ To apply different behaviors to different columns, please use [`add_column()`][pixeltable.Table.add_column] for each column.
359
459
 
360
460
  Returns:
361
461
  Information about the execution status of the operation.
362
462
 
363
463
  Raises:
364
- Error: If any column name is invalid or already exists.
464
+ Error: If any column name is invalid, or already exists and `if_exists='error'`,
465
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
365
466
 
366
467
  Examples:
367
468
  Add multiple columns to the table `my_table`:
@@ -374,49 +475,51 @@ class Table(SchemaObject):
374
475
  ... tbl.add_columns(schema)
375
476
  """
376
477
  self._check_is_dropped()
478
+ if self.get_metadata()['is_snapshot']:
479
+ raise excs.Error('Cannot add column to a snapshot.')
377
480
  col_schema = {
378
481
  col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
379
482
  for col_name, spec in schema.items()
380
483
  }
484
+ # handle existing columns based on if_exists parameter
485
+ cols_to_ignore = self._ignore_or_drop_existing_columns(list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists'))
486
+ # if all columns to be added already exist and user asked to ignore
487
+ # existing columns, there's nothing to do.
488
+ for cname in cols_to_ignore:
489
+ assert cname in col_schema
490
+ del col_schema[cname]
491
+ if len(col_schema) == 0:
492
+ return UpdateStatus()
381
493
  new_cols = self._create_columns(col_schema)
382
494
  for new_col in new_cols:
383
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
495
+ self._verify_column(new_col)
384
496
  status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
385
497
  FileCache.get().emit_eviction_warnings()
386
498
  return status
387
499
 
388
- # TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
389
- # will be supported only through add_computed_column(). At that point, we can remove the `stored`,
390
- # `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
391
- # simplifying the code. For the time being, there's some obvious code duplication.
392
500
  def add_column(
393
501
  self,
394
502
  *,
395
- stored: Optional[bool] = None,
396
- print_stats: bool = False,
397
- on_error: Literal['abort', 'ignore'] = 'abort',
503
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
398
504
  **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
399
505
  ) -> UpdateStatus:
400
506
  """
401
- Adds a column to the table.
507
+ Adds an ordinary (non-computed) column to the table.
402
508
 
403
509
  Args:
404
510
  kwargs: Exactly one keyword argument of the form `col_name=col_type`.
405
- stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
406
- print_stats: If `True`, print execution metrics during evaluation.
407
- on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
408
- row.
511
+ if_exists: Determines the behavior if the column already exists. Must be one of the following:
409
512
 
410
- - `'abort'`: an exception will be raised and the column will not be added.
411
- - `'ignore'`: execution will continue and the column will be added. Any rows
412
- with errors will have a `None` value for the column, with information about the error stored in the
413
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
513
+ - `'error'`: an exception will be raised.
514
+ - `'ignore'`: do nothing and return.
515
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
414
516
 
415
517
  Returns:
416
518
  Information about the execution status of the operation.
417
519
 
418
520
  Raises:
419
- Error: If the column name is invalid or already exists.
521
+ Error: If the column name is invalid, or already exists and `if_exists='erorr'`,
522
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
420
523
 
421
524
  Examples:
422
525
  Add an int column:
@@ -428,29 +531,22 @@ class Table(SchemaObject):
428
531
  >>> tbl['new_col'] = pxt.Int
429
532
  """
430
533
  self._check_is_dropped()
534
+ # verify kwargs
535
+ if self._tbl_version.is_snapshot:
536
+ raise excs.Error('Cannot add column to a snapshot.')
431
537
  # verify kwargs and construct column schema dict
432
538
  if len(kwargs) != 1:
433
539
  raise excs.Error(
434
540
  f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
435
- f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
541
+ f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
436
542
  )
437
- col_name, spec = next(iter(kwargs.items()))
438
- if not is_valid_identifier(col_name):
439
- raise excs.Error(f'Invalid column name: {col_name!r}')
440
-
441
- col_schema: dict[str, Any] = {}
442
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
443
- col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
444
- else:
445
- col_schema['value'] = spec
446
- if stored is not None:
447
- col_schema['stored'] = stored
543
+ col_type = next(iter(kwargs.values()))
544
+ if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
545
+ raise excs.Error(
546
+ f'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
547
+ )
548
+ return self.add_columns(kwargs, if_exists=if_exists)
448
549
 
449
- new_col = self._create_columns({col_name: col_schema})[0]
450
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
451
- status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
452
- FileCache.get().emit_eviction_warnings()
453
- return status
454
550
 
455
551
  def add_computed_column(
456
552
  self,
@@ -458,6 +554,7 @@ class Table(SchemaObject):
458
554
  stored: Optional[bool] = None,
459
555
  print_stats: bool = False,
460
556
  on_error: Literal['abort', 'ignore'] = 'abort',
557
+ if_exists: Literal['error', 'ignore', 'replace'] = 'error',
461
558
  **kwargs: exprs.Expr
462
559
  ) -> UpdateStatus:
463
560
  """
@@ -465,12 +562,27 @@ class Table(SchemaObject):
465
562
 
466
563
  Args:
467
564
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
565
+ stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
566
+ print_stats: If `True`, print execution metrics during evaluation.
567
+ on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
568
+ row.
569
+
570
+ - `'abort'`: an exception will be raised and the column will not be added.
571
+ - `'ignore'`: execution will continue and the column will be added. Any rows
572
+ with errors will have a `None` value for the column, with information about the error stored in the
573
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
574
+ if_exists: Determines the behavior if the column already exists. Must be one of the following:
575
+
576
+ - `'error'`: an exception will be raised.
577
+ - `'ignore'`: do nothing and return.
578
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has no dependents.
468
579
 
469
580
  Returns:
470
581
  Information about the execution status of the operation.
471
582
 
472
583
  Raises:
473
- Error: If the column name is invalid or already exists.
584
+ Error: If the column name is invalid or already exists and `if_exists='error'`,
585
+ or `if_exists='replace*'` but the column has dependents or is a basetable column.
474
586
 
475
587
  Examples:
476
588
  For a table with an image column `frame`, add an image column `rotated` that rotates the image by
@@ -483,6 +595,8 @@ class Table(SchemaObject):
483
595
  >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
484
596
  """
485
597
  self._check_is_dropped()
598
+ if self.get_metadata()['is_snapshot']:
599
+ raise excs.Error('Cannot add column to a snapshot.')
486
600
  if len(kwargs) != 1:
487
601
  raise excs.Error(
488
602
  f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
@@ -496,8 +610,16 @@ class Table(SchemaObject):
496
610
  if stored is not None:
497
611
  col_schema['stored'] = stored
498
612
 
613
+ # handle existing columns based on if_exists parameter
614
+ cols_to_ignore = self._ignore_or_drop_existing_columns([col_name], IfExistsParam.validated(if_exists, 'if_exists'))
615
+ # if the column to add already exists and user asked to ignore
616
+ # exiting column, there's nothing to do.
617
+ if len(cols_to_ignore) != 0:
618
+ assert cols_to_ignore[0] == col_name
619
+ return UpdateStatus()
620
+
499
621
  new_col = self._create_columns({col_name: col_schema})[0]
500
- self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
622
+ self._verify_column(new_col)
501
623
  status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
502
624
  FileCache.get().emit_eviction_warnings()
503
625
  return status
@@ -577,18 +699,12 @@ class Table(SchemaObject):
577
699
  return columns
578
700
 
579
701
  @classmethod
580
- def _verify_column(
581
- cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
582
- ) -> None:
702
+ def _verify_column(cls, col: Column) -> None:
583
703
  """Check integrity of user-supplied Column and supply defaults"""
584
704
  if is_system_column_name(col.name):
585
705
  raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
586
706
  if not is_valid_identifier(col.name):
587
707
  raise excs.Error(f"Invalid column name: {col.name!r}")
588
- if col.name in existing_column_names:
589
- raise excs.Error(f'Duplicate column name: {col.name!r}')
590
- if existing_query_names is not None and col.name in existing_query_names:
591
- raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
592
708
  if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
593
709
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
594
710
  if col.stored is False and col.has_window_fn_call():
@@ -601,7 +717,7 @@ class Table(SchemaObject):
601
717
  """Check integrity of user-supplied schema and set defaults"""
602
718
  column_names: set[str] = set()
603
719
  for col in schema:
604
- cls._verify_column(col, column_names)
720
+ cls._verify_column(col)
605
721
  column_names.add(col.name)
606
722
 
607
723
  def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
@@ -614,14 +730,19 @@ class Table(SchemaObject):
614
730
  if not exists:
615
731
  raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
616
732
 
617
- def drop_column(self, column: Union[str, ColumnRef]) -> None:
733
+ def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
618
734
  """Drop a column from the table.
619
735
 
620
736
  Args:
621
737
  column: The name or reference of the column to drop.
738
+ if_not_exists: Directive for handling a non-existent column. Must be one of the following:
739
+
740
+ - `'error'`: raise an error if the column does not exist.
741
+ - `'ignore'`: do nothing if the column does not exist.
622
742
 
623
743
  Raises:
624
- Error: If the column does not exist or if it is referenced by a dependent computed column.
744
+ Error: If the column does not exist and `if_exists='error'`,
745
+ or if it is referenced by a dependent computed column.
625
746
 
626
747
  Examples:
627
748
  Drop the column `col` from the table `my_table` by column name:
@@ -633,14 +754,32 @@ class Table(SchemaObject):
633
754
 
634
755
  >>> tbl = pxt.get_table('my_table')
635
756
  ... tbl.drop_column(tbl.col)
757
+
758
+ Drop the column `col` from the table `my_table` if it exists, otherwise do nothing:
759
+
760
+ >>> tbl = pxt.get_table('my_table')
761
+ ... tbl.drop_col(tbl.col, if_not_exists='ignore')
636
762
  """
637
763
  self._check_is_dropped()
764
+ if self._tbl_version_path.is_snapshot():
765
+ raise excs.Error('Cannot drop column from a snapshot.')
638
766
  col: Column = None
767
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
639
768
  if isinstance(column, str):
640
- self.__check_column_name_exists(column)
769
+ col = self._tbl_version_path.get_column(column, include_bases=False)
770
+ if col is None:
771
+ if _if_not_exists == IfNotExistsParam.ERROR:
772
+ raise excs.Error(f'Column {column!r} unknown')
773
+ assert _if_not_exists == IfNotExistsParam.IGNORE
774
+ return
641
775
  col = self._tbl_version.cols_by_name[column]
642
776
  else:
643
- self.__check_column_ref_exists(column)
777
+ exists = self._tbl_version_path.has_column(column.col, include_bases=False)
778
+ if not exists:
779
+ if _if_not_exists == IfNotExistsParam.ERROR:
780
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
781
+ assert _if_not_exists == IfNotExistsParam.IGNORE
782
+ return
644
783
  col = column.col
645
784
 
646
785
  dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
@@ -686,69 +825,114 @@ class Table(SchemaObject):
686
825
  >>> tbl = pxt.get_table('my_table')
687
826
  ... tbl.rename_column('col1', 'col2')
688
827
  """
689
- self._check_is_dropped()
690
828
  self._tbl_version.rename_column(old_name, new_name)
691
829
 
830
+ def _list_index_info_for_test(self) -> list[dict[str, Any]]:
831
+ """
832
+ Returns list of all the indexes on this table. Used for testing.
833
+
834
+ Returns:
835
+ A list of index information, each containing the index's
836
+ id, name, and the name of the column it indexes.
837
+ """
838
+ assert not self._is_dropped
839
+ index_info = []
840
+ for idx_name, idx in self._tbl_version.idxs_by_name.items():
841
+ index_info.append({
842
+ '_id': idx.id,
843
+ '_name': idx_name,
844
+ '_column': idx.col.name
845
+ })
846
+ return index_info
847
+
692
848
  def add_embedding_index(
693
849
  self, column: Union[str, ColumnRef], *, idx_name: Optional[str] = None,
850
+ embedding: Optional[pxt.Function] = None,
694
851
  string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
695
- metric: str = 'cosine'
852
+ metric: str = 'cosine',
853
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
696
854
  ) -> None:
697
855
  """
698
- Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
856
+ Add an embedding index to the table. Once the index is created, it will be automatically kept up-to-date as new
699
857
  rows are inserted into the table.
700
858
 
701
- Indices are currently supported only for `String` and `Image` columns. The index must specify, at
702
- minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
703
- and image embedding (into the same vector space); in particular, this can be used to provide similarity search
704
- of text over an image column.
859
+ To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
860
+ Only `String` and `Image` columns are currently supported. Here's an example that uses a
861
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
862
+
863
+ >>> from pixeltable.functions.huggingface import clip
864
+ ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
865
+ ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
866
+
867
+ Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
868
+
869
+ >>> reference_img = PIL.Image.open('my_image.jpg')
870
+ ... sim = tbl.img.similarity(reference_img)
871
+ ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
872
+
873
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
874
+ performed using any of its supported types. In our example, CLIP supports both text and images, so we can
875
+ also search for images using a text description:
876
+
877
+ >>> sim = tbl.img.similarity('a picture of a train')
878
+ ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
705
879
 
706
880
  Args:
707
- column: The name of, or reference to, the column to index; must be a `String` or `Image` column.
708
- idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
709
- If specified, the name must be unique for this table.
710
- string_embed: A function to embed text; required if the column is a `String` column.
711
- image_embed: A function to embed images; required if the column is an `Image` column.
712
- metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
713
- the default is `'cosine'`.
881
+ column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
882
+ idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
883
+ automatically. If specified, the name must be unique for this table.
884
+ embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
885
+ or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
886
+ array of floats.
887
+ string_embed: An optional UDF to use for the string embedding component of this index.
888
+ Can be used in conjunction with `image_embed` to construct multimodal embeddings manually, by
889
+ specifying different embedding functions for different data types.
890
+ image_embed: An optional UDF to use for the image embedding component of this index.
891
+ Can be used in conjunction with `string_embed` to construct multimodal embeddings manually, by
892
+ specifying different embedding functions for different data types.
893
+ metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`.
894
+ The default is `'cosine'`.
895
+ if_exists: Directive for handling an existing index with the same name. Must be one of the following:
896
+
897
+ - `'error'`: raise an error if an index with the same name already exists.
898
+ - `'ignore'`: do nothing if an index with the same name already exists.
899
+ - `'replace'` or `'replace_force'`: replace the existing index with the new one.
714
900
 
715
901
  Raises:
716
- Error: If an index with that name already exists for the table, or if the specified column does not exist.
902
+ Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if the specified column does not exist.
717
903
 
718
904
  Examples:
719
- Add an index to the `img` column of the table `my_table` by column name:
905
+ Add an index to the `img` column of the table `my_table`:
720
906
 
721
- >>> tbl = pxt.get_table('my_table')
722
- ... tbl.add_embedding_index('img', image_embed=my_image_func)
907
+ >>> from pixeltable.functions.huggingface import clip
908
+ ... tbl = pxt.get_table('my_table')
909
+ ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
910
+ ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
723
911
 
724
- Add an index to the `img` column of the table `my_table` by column reference:
725
- >>> tbl = pxt.get_table('my_table')
726
- ... tbl.add_embedding_index(tbl.img, image_embed=my_image_func)
912
+ Alternatively, the `img` column may be specified by name:
913
+
914
+ >>> tbl.add_embedding_index('img', embedding=embedding_fn)
727
915
 
728
- Add another index to the `img` column, using the inner product as the distance metric,
729
- and with a specific name; `string_embed` is also specified in order to search with text:
916
+ Add a second index to the `img` column, using the inner product as the distance metric,
917
+ and with a specific name:
730
918
 
731
919
  >>> tbl.add_embedding_index(
732
- ... 'img',
733
- ... idx_name='clip_idx',
734
- ... image_embed=my_image_func,
735
- ... string_embed=my_string_func,
920
+ ... tbl.img,
921
+ ... idx_name='ip_idx',
922
+ ... embedding=embedding_fn,
736
923
  ... metric='ip'
737
924
  ... )
738
925
 
739
- Alternatively:
926
+ Add an index using separately specified string and image embeddings:
740
927
 
741
928
  >>> tbl.add_embedding_index(
742
929
  ... tbl.img,
743
- ... idx_name='clip_idx',
744
- ... image_embed=my_image_func,
745
- ... string_embed=my_string_func,
746
- ... metric='ip'
930
+ ... string_embed=string_embedding_fn,
931
+ ... image_embed=image_embedding_fn
747
932
  ... )
748
933
  """
749
934
  if self._tbl_version_path.is_snapshot():
750
935
  raise excs.Error('Cannot add an index to a snapshot')
751
- self._check_is_dropped()
752
936
  col: Column
753
937
  if isinstance(column, str):
754
938
  self.__check_column_name_exists(column, include_bases=True)
@@ -758,11 +942,22 @@ class Table(SchemaObject):
758
942
  col = column.col
759
943
 
760
944
  if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
761
- raise excs.Error(f'Duplicate index name: {idx_name}')
945
+ _if_exists = IfExistsParam.validated(if_exists, 'if_exists')
946
+ # An index with the same name already exists.
947
+ # Handle it according to if_exists.
948
+ if _if_exists == IfExistsParam.ERROR:
949
+ raise excs.Error(f'Duplicate index name: {idx_name}')
950
+ if not isinstance(self._tbl_version.idxs_by_name[idx_name].idx, index.EmbeddingIndex):
951
+ raise excs.Error(f'Index `{idx_name}` is not an embedding index. Cannot {_if_exists.name.lower()} it.')
952
+ if _if_exists == IfExistsParam.IGNORE:
953
+ return
954
+ assert _if_exists == IfExistsParam.REPLACE or _if_exists == IfExistsParam.REPLACE_FORCE
955
+ self.drop_index(idx_name=idx_name)
956
+ assert idx_name not in self._tbl_version.idxs_by_name
762
957
  from pixeltable.index import EmbeddingIndex
763
958
 
764
959
  # create the EmbeddingIndex instance to verify args
765
- idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
960
+ idx = EmbeddingIndex(col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
766
961
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
767
962
  # TODO: how to deal with exceptions here? drop the index and raise?
768
963
  FileCache.get().emit_eviction_warnings()
@@ -770,7 +965,9 @@ class Table(SchemaObject):
770
965
  def drop_embedding_index(
771
966
  self, *,
772
967
  column: Union[str, ColumnRef, None] = None,
773
- idx_name: Optional[str] = None) -> None:
968
+ idx_name: Optional[str] = None,
969
+ if_not_exists: Literal['error', 'ignore'] = 'error'
970
+ ) -> None:
774
971
  """
775
972
  Drop an embedding index from the table. Either a column name or an index name (but not both) must be
776
973
  specified. If a column name or reference is specified, it must be a column containing exactly one
@@ -780,11 +977,20 @@ class Table(SchemaObject):
780
977
  column: The name of, or reference to, the column from which to drop the index.
781
978
  The column must have only one embedding index.
782
979
  idx_name: The name of the index to drop.
980
+ if_not_exists: Directive for handling a non-existent index. Must be one of the following:
981
+
982
+ - `'error'`: raise an error if the index does not exist.
983
+ - `'ignore'`: do nothing if the index does not exist.
984
+
985
+ Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
986
+ and it does not exist, or when `column` is specified and it has no index.
987
+ `if_not_exists` does not apply to non-exisitng column.
783
988
 
784
989
  Raises:
785
990
  Error: If `column` is specified, but the column does not exist, or it contains no embedding
786
- indices or multiple embedding indices.
787
- Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
991
+ indices and `if_not_exists='error'`, or the column has multiple embedding indices.
992
+ Error: If `idx_name` is specified, but the index is not an embedding index, or
993
+ the index does not exist and `if_not_exists='error'`.
788
994
 
789
995
  Examples:
790
996
  Drop the embedding index on the `img` column of the table `my_table` by column name:
@@ -801,6 +1007,9 @@ class Table(SchemaObject):
801
1007
  >>> tbl = pxt.get_table('my_table')
802
1008
  ... tbl.drop_embedding_index(idx_name='idx1')
803
1009
 
1010
+ Drop the embedding index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
1011
+ >>> tbl = pxt.get_table('my_table')
1012
+ ... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
804
1013
  """
805
1014
  if (column is None) == (idx_name is None):
806
1015
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
@@ -814,12 +1023,14 @@ class Table(SchemaObject):
814
1023
  self.__check_column_ref_exists(column, include_bases=True)
815
1024
  col = column.col
816
1025
  assert col is not None
817
- self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
1026
+ self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
818
1027
 
819
1028
  def drop_index(
820
1029
  self, *,
821
1030
  column: Union[str, ColumnRef, None] = None,
822
- idx_name: Optional[str] = None) -> None:
1031
+ idx_name: Optional[str] = None,
1032
+ if_not_exists: Literal['error', 'ignore'] = 'error'
1033
+ ) -> None:
823
1034
  """
824
1035
  Drop an index from the table. Either a column name or an index name (but not both) must be
825
1036
  specified. If a column name or reference is specified, it must be a column containing exactly one index;
@@ -829,6 +1040,14 @@ class Table(SchemaObject):
829
1040
  column: The name of, or reference to, the column from which to drop the index.
830
1041
  The column must have only one embedding index.
831
1042
  idx_name: The name of the index to drop.
1043
+ if_not_exists: Directive for handling a non-existent index. Must be one of the following:
1044
+
1045
+ - `'error'`: raise an error if the index does not exist.
1046
+ - `'ignore'`: do nothing if the index does not exist.
1047
+
1048
+ Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
1049
+ and it does not exist, or when `column` is specified and it has no index.
1050
+ `if_not_exists` does not apply to non-exisitng column.
832
1051
 
833
1052
  Raises:
834
1053
  Error: If `column` is specified, but the column does not exist, or it contains no
@@ -850,6 +1069,10 @@ class Table(SchemaObject):
850
1069
  >>> tbl = pxt.get_table('my_table')
851
1070
  ... tbl.drop_index(idx_name='idx1')
852
1071
 
1072
+ Drop the index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
1073
+ >>> tbl = pxt.get_table('my_table')
1074
+ ... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
1075
+
853
1076
  """
854
1077
  if (column is None) == (idx_name is None):
855
1078
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
@@ -863,21 +1086,25 @@ class Table(SchemaObject):
863
1086
  self.__check_column_ref_exists(column, include_bases=True)
864
1087
  col = column.col
865
1088
  assert col is not None
866
- self._drop_index(col=col, idx_name=idx_name)
1089
+ self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
867
1090
 
868
1091
  def _drop_index(
869
1092
  self, *, col: Optional[Column] = None,
870
1093
  idx_name: Optional[str] = None,
871
- _idx_class: Optional[type[index.IndexBase]] = None
1094
+ _idx_class: Optional[type[index.IndexBase]] = None,
1095
+ if_not_exists: Literal['error', 'ignore'] = 'error'
872
1096
  ) -> None:
873
1097
  if self._tbl_version_path.is_snapshot():
874
1098
  raise excs.Error('Cannot drop an index from a snapshot')
875
- self._check_is_dropped()
876
1099
  assert (col is None) != (idx_name is None)
877
1100
 
878
1101
  if idx_name is not None:
1102
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
879
1103
  if idx_name not in self._tbl_version.idxs_by_name:
880
- raise excs.Error(f'Index {idx_name!r} does not exist')
1104
+ if _if_not_exists == IfNotExistsParam.ERROR:
1105
+ raise excs.Error(f'Index {idx_name!r} does not exist')
1106
+ assert _if_not_exists == IfNotExistsParam.IGNORE
1107
+ return
881
1108
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
882
1109
  else:
883
1110
  if col.tbl.id != self._tbl_version.id:
@@ -887,7 +1114,11 @@ class Table(SchemaObject):
887
1114
  if _idx_class is not None:
888
1115
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
889
1116
  if len(idx_info) == 0:
890
- raise excs.Error(f'Column {col.name!r} does not have an index')
1117
+ _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1118
+ if _if_not_exists == IfNotExistsParam.ERROR:
1119
+ raise excs.Error(f'Column {col.name!r} does not have an index')
1120
+ assert _if_not_exists == IfNotExistsParam.IGNORE
1121
+ return
891
1122
  if len(idx_info) > 1:
892
1123
  raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
893
1124
  idx_id = idx_info[0].id
@@ -1009,7 +1240,6 @@ class Table(SchemaObject):
1009
1240
 
1010
1241
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
1011
1242
  """
1012
- self._check_is_dropped()
1013
1243
  status = self._tbl_version.update(value_spec, where, cascade)
1014
1244
  FileCache.get().emit_eviction_warnings()
1015
1245
  return status
@@ -1045,7 +1275,6 @@ class Table(SchemaObject):
1045
1275
  """
1046
1276
  if self._tbl_version_path.is_snapshot():
1047
1277
  raise excs.Error('Cannot update a snapshot')
1048
- self._check_is_dropped()
1049
1278
  rows = list(rows)
1050
1279
 
1051
1280
  row_updates: list[dict[Column, exprs.Expr]] = []
@@ -1100,46 +1329,8 @@ class Table(SchemaObject):
1100
1329
  """
1101
1330
  if self._tbl_version_path.is_snapshot():
1102
1331
  raise excs.Error('Cannot revert a snapshot')
1103
- self._check_is_dropped()
1104
1332
  self._tbl_version.revert()
1105
1333
 
1106
- @overload
1107
- def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
1108
-
1109
- @overload
1110
- def query(
1111
- self, *, param_types: Optional[list[ts.ColumnType]] = None
1112
- ) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
1113
-
1114
- def query(self, *args: Any, **kwargs: Any) -> Any:
1115
- def make_query_template(
1116
- py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
1117
- ) -> 'pxt.func.QueryTemplateFunction':
1118
- if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
1119
- # this is a named function in a module
1120
- function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
1121
- else:
1122
- function_path = None
1123
- query_name = py_fn.__name__
1124
- if query_name in self._schema.keys():
1125
- raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
1126
- if query_name in self.__query_scope._queries and function_path is not None:
1127
- raise excs.Error(f'Duplicate query name: {query_name!r}')
1128
- query_fn = pxt.func.QueryTemplateFunction.create(
1129
- py_fn, param_types=param_types, path=function_path, name=query_name)
1130
- self.__query_scope._queries[query_name] = query_fn
1131
- return query_fn
1132
-
1133
- # TODO: verify that the inferred return type matches that of the template
1134
- # TODO: verify that the signature doesn't contain batched parameters
1135
-
1136
- if len(args) == 1:
1137
- assert len(kwargs) == 0 and callable(args[0])
1138
- return make_query_template(args[0], None)
1139
- else:
1140
- assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
1141
- return lambda py_fn: make_query_template(py_fn, kwargs['param_types'])
1142
-
1143
1334
  @property
1144
1335
  def external_stores(self) -> list[str]:
1145
1336
  return list(self._tbl_version.external_stores.keys())
@@ -1150,7 +1341,6 @@ class Table(SchemaObject):
1150
1341
  """
1151
1342
  if self._tbl_version.is_snapshot:
1152
1343
  raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1153
- self._check_is_dropped()
1154
1344
  if store.name in self.external_stores:
1155
1345
  raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1156
1346
  _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
@@ -1230,7 +1420,7 @@ class Table(SchemaObject):
1230
1420
  return sync_status
1231
1421
 
1232
1422
  def __dir__(self) -> list[str]:
1233
- return list(super().__dir__()) + list(self._schema.keys()) + self._query_names
1423
+ return list(super().__dir__()) + list(self._schema.keys())
1234
1424
 
1235
1425
  def _ipython_key_completions_(self) -> list[str]:
1236
- return list(self._schema.keys()) + self._query_names
1426
+ return list(self._schema.keys())