pixeltable 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (88) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/globals.py +3 -0
  5. pixeltable/catalog/insertable_table.py +9 -7
  6. pixeltable/catalog/table.py +220 -143
  7. pixeltable/catalog/table_version.py +36 -18
  8. pixeltable/catalog/table_version_path.py +0 -8
  9. pixeltable/catalog/view.py +3 -3
  10. pixeltable/dataframe.py +9 -24
  11. pixeltable/env.py +107 -36
  12. pixeltable/exceptions.py +7 -4
  13. pixeltable/exec/__init__.py +1 -1
  14. pixeltable/exec/aggregation_node.py +22 -15
  15. pixeltable/exec/component_iteration_node.py +62 -41
  16. pixeltable/exec/data_row_batch.py +7 -7
  17. pixeltable/exec/exec_node.py +35 -7
  18. pixeltable/exec/expr_eval_node.py +2 -1
  19. pixeltable/exec/in_memory_data_node.py +9 -9
  20. pixeltable/exec/sql_node.py +265 -136
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/data_row.py +30 -19
  23. pixeltable/exprs/expr.py +15 -14
  24. pixeltable/exprs/expr_dict.py +55 -0
  25. pixeltable/exprs/expr_set.py +21 -15
  26. pixeltable/exprs/function_call.py +21 -8
  27. pixeltable/exprs/json_path.py +3 -6
  28. pixeltable/exprs/rowid_ref.py +2 -2
  29. pixeltable/exprs/sql_element_cache.py +5 -1
  30. pixeltable/ext/functions/whisperx.py +7 -2
  31. pixeltable/func/callable_function.py +2 -2
  32. pixeltable/func/function_registry.py +6 -7
  33. pixeltable/func/query_template_function.py +11 -12
  34. pixeltable/func/signature.py +17 -15
  35. pixeltable/func/udf.py +0 -4
  36. pixeltable/functions/__init__.py +1 -1
  37. pixeltable/functions/audio.py +4 -6
  38. pixeltable/functions/globals.py +86 -42
  39. pixeltable/functions/huggingface.py +12 -14
  40. pixeltable/functions/image.py +59 -45
  41. pixeltable/functions/json.py +0 -1
  42. pixeltable/functions/mistralai.py +2 -2
  43. pixeltable/functions/openai.py +22 -25
  44. pixeltable/functions/string.py +50 -50
  45. pixeltable/functions/timestamp.py +20 -20
  46. pixeltable/functions/together.py +26 -12
  47. pixeltable/functions/video.py +11 -20
  48. pixeltable/functions/whisper.py +2 -20
  49. pixeltable/globals.py +57 -56
  50. pixeltable/index/base.py +2 -2
  51. pixeltable/index/btree.py +7 -7
  52. pixeltable/index/embedding_index.py +8 -10
  53. pixeltable/io/external_store.py +11 -5
  54. pixeltable/io/globals.py +3 -1
  55. pixeltable/io/hf_datasets.py +4 -4
  56. pixeltable/io/label_studio.py +6 -6
  57. pixeltable/io/parquet.py +14 -13
  58. pixeltable/iterators/document.py +10 -8
  59. pixeltable/iterators/video.py +10 -1
  60. pixeltable/metadata/__init__.py +3 -2
  61. pixeltable/metadata/converters/convert_14.py +4 -2
  62. pixeltable/metadata/converters/convert_15.py +1 -1
  63. pixeltable/metadata/converters/convert_19.py +1 -0
  64. pixeltable/metadata/converters/convert_20.py +1 -1
  65. pixeltable/metadata/converters/util.py +9 -8
  66. pixeltable/metadata/schema.py +32 -21
  67. pixeltable/plan.py +136 -154
  68. pixeltable/store.py +51 -36
  69. pixeltable/tool/create_test_db_dump.py +7 -7
  70. pixeltable/tool/doc_plugins/griffe.py +3 -34
  71. pixeltable/tool/mypy_plugin.py +32 -0
  72. pixeltable/type_system.py +243 -60
  73. pixeltable/utils/arrow.py +10 -9
  74. pixeltable/utils/coco.py +4 -4
  75. pixeltable/utils/documents.py +1 -1
  76. pixeltable/utils/filecache.py +131 -84
  77. pixeltable/utils/formatter.py +1 -1
  78. pixeltable/utils/http_server.py +2 -5
  79. pixeltable/utils/media_store.py +6 -6
  80. pixeltable/utils/pytorch.py +10 -11
  81. pixeltable/utils/sql.py +2 -1
  82. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/METADATA +16 -7
  83. pixeltable-0.2.21.dist-info/RECORD +148 -0
  84. pixeltable/utils/help.py +0 -11
  85. pixeltable-0.2.19.dist-info/RECORD +0 -147
  86. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
  87. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
  88. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
@@ -5,14 +5,15 @@ import builtins
5
5
  import json
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
8
+ from typing import _GenericAlias # type: ignore[attr-defined]
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
9
10
  from uuid import UUID
10
11
 
11
12
  import pandas as pd
12
13
  import pandas.io.formats.style
13
14
  import sqlalchemy as sql
14
15
 
15
- import pixeltable
16
+ import pixeltable as pxt
16
17
  import pixeltable.catalog as catalog
17
18
  import pixeltable.env as env
18
19
  import pixeltable.exceptions as excs
@@ -20,6 +21,7 @@ import pixeltable.exprs as exprs
20
21
  import pixeltable.index as index
21
22
  import pixeltable.metadata.schema as schema
22
23
  import pixeltable.type_system as ts
24
+ from pixeltable.utils.filecache import FileCache
23
25
 
24
26
  from .column import Column
25
27
  from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
@@ -33,14 +35,31 @@ if TYPE_CHECKING:
33
35
  _logger = logging.getLogger('pixeltable')
34
36
 
35
37
  class Table(SchemaObject):
36
- """Base class for table objects (base tables, views, snapshots)."""
38
+ """
39
+ A handle to a table, view, or snapshot. This class is the primary interface through which table operations
40
+ (queries, insertions, updates, etc.) are performed in Pixeltable.
41
+ """
42
+ # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
43
+ # FileCache.emit_eviction_warnings() at the end of the operation.
37
44
 
38
45
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
39
46
  super().__init__(id, name, dir_id)
40
47
  self._is_dropped = False
41
48
  self._tbl_version_path = tbl_version_path
42
- from pixeltable.func import QueryTemplateFunction
43
- self._queries: dict[str, QueryTemplateFunction] = {}
49
+ self.__query_scope = self.QueryScope(self)
50
+
51
+ class QueryScope:
52
+ __table: 'Table'
53
+ _queries: dict[str, pxt.func.QueryTemplateFunction]
54
+
55
+ def __init__(self, table: 'Table') -> None:
56
+ self.__table = table
57
+ self._queries = {}
58
+
59
+ def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
60
+ if name in self._queries:
61
+ return self._queries[name]
62
+ raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
44
63
 
45
64
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
46
65
  super()._move(new_name, new_dir_id)
@@ -53,6 +72,28 @@ class Table(SchemaObject):
53
72
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
54
73
 
55
74
  def get_metadata(self) -> dict[str, Any]:
75
+ """
76
+ Retrieves metadata associated with this table.
77
+
78
+ Returns:
79
+ A dictionary containing the metadata, in the following format:
80
+
81
+ ```python
82
+ {
83
+ 'base': None, # If this is a view or snapshot, will contain the name of its base table
84
+ 'schema': {
85
+ 'col1': StringType(),
86
+ 'col2': IntType(),
87
+ },
88
+ 'version': 22,
89
+ 'schema_version': 1,
90
+ 'comment': '',
91
+ 'num_retained_versions': 10,
92
+ 'is_view': False,
93
+ 'is_snapshot': False,
94
+ }
95
+ ```
96
+ """
56
97
  md = super().get_metadata()
57
98
  md['base'] = self._base._path if self._base is not None else None
58
99
  md['schema'] = self._schema
@@ -79,25 +120,24 @@ class Table(SchemaObject):
79
120
  if self._is_dropped:
80
121
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
81
122
 
82
- def __getattr__(
83
- self, name: str
84
- ) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
85
- """Return a ColumnRef or QueryTemplateFunction for the given name.
123
+ def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
124
+ """Return a ColumnRef for the given name.
86
125
  """
87
- if name in self._queries:
88
- return self._queries[name]
89
126
  return getattr(self._tbl_version_path, name)
90
127
 
91
- def __getitem__(
92
- self, index: object
93
- ) -> Union[
94
- 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.DataFrame'
95
- ]:
128
+ @overload
129
+ def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
130
+
131
+ @overload
132
+ def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
133
+
134
+ def __getitem__(self, index):
96
135
  """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
97
136
  """
98
- if isinstance(index, str) and index in self._queries:
99
- return self._queries[index]
100
- return self._tbl_version_path.__getitem__(index)
137
+ if isinstance(index, str):
138
+ return getattr(self, index)
139
+ else:
140
+ return self._df()[index]
101
141
 
102
142
  def list_views(self, *, recursive: bool = True) -> list[str]:
103
143
  """
@@ -106,6 +146,9 @@ class Table(SchemaObject):
106
146
  Args:
107
147
  recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
108
148
  all sub-views (including views of views, etc.)
149
+
150
+ Returns:
151
+ A list of view paths.
109
152
  """
110
153
  return [t._path for t in self._get_views(recursive=recursive)]
111
154
 
@@ -116,60 +159,55 @@ class Table(SchemaObject):
116
159
  else:
117
160
  return dependents
118
161
 
119
- def _df(self) -> 'pixeltable.dataframe.DataFrame':
162
+ def _df(self) -> 'pxt.dataframe.DataFrame':
120
163
  """Return a DataFrame for this table.
121
164
  """
122
165
  # local import: avoid circular imports
123
- from pixeltable.dataframe import DataFrame
124
- return DataFrame(self._tbl_version_path)
166
+ return pxt.DataFrame(self._tbl_version_path)
167
+
168
+ @property
169
+ def queries(self) -> 'Table.QueryScope':
170
+ return self.__query_scope
125
171
 
126
- def select(self, *items: Any, **named_items: Any) -> 'pixeltable.DataFrame':
172
+ def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
127
173
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
128
- # local import: avoid circular imports
129
- from pixeltable.dataframe import DataFrame
130
- return DataFrame(self._tbl_version_path).select(*items, **named_items)
174
+ return self._df().select(*items, **named_items)
131
175
 
132
- def where(self, pred: 'exprs.Expr') -> 'pixeltable.DataFrame':
176
+ def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
133
177
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
134
- # local import: avoid circular imports
135
- from pixeltable.dataframe import DataFrame
136
- return DataFrame(self._tbl_version_path).where(pred)
178
+ return self._df().where(pred)
137
179
 
138
- def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.DataFrame':
180
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
139
181
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
140
- # local import: avoid circular imports
141
- from pixeltable.dataframe import DataFrame
142
- return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
182
+ return self._df().order_by(*items, asc=asc)
143
183
 
144
- def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.DataFrame':
184
+ def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
145
185
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
146
- from pixeltable.dataframe import DataFrame
147
- return DataFrame(self._tbl_version_path).group_by(*items)
186
+ return self._df().group_by(*items)
148
187
 
149
- def limit(self, n: int) -> 'pixeltable.DataFrame':
150
- from pixeltable.dataframe import DataFrame
151
- return DataFrame(self._tbl_version_path).limit(n)
188
+ def limit(self, n: int) -> 'pxt.DataFrame':
189
+ return self._df().limit(n)
152
190
 
153
- def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
191
+ def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
154
192
  """Return rows from this table."""
155
193
  return self._df().collect()
156
194
 
157
195
  def show(
158
196
  self, *args, **kwargs
159
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
197
+ ) -> 'pxt.dataframe.DataFrameResultSet':
160
198
  """Return rows from this table.
161
199
  """
162
200
  return self._df().show(*args, **kwargs)
163
201
 
164
202
  def head(
165
203
  self, *args, **kwargs
166
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
204
+ ) -> 'pxt.dataframe.DataFrameResultSet':
167
205
  """Return the first n rows inserted into this table."""
168
206
  return self._df().head(*args, **kwargs)
169
207
 
170
208
  def tail(
171
209
  self, *args, **kwargs
172
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
210
+ ) -> 'pxt.dataframe.DataFrameResultSet':
173
211
  """Return the last n rows inserted into this table."""
174
212
  return self._df().tail(*args, **kwargs)
175
213
 
@@ -185,7 +223,7 @@ class Table(SchemaObject):
185
223
  @property
186
224
  def _query_names(self) -> list[str]:
187
225
  """Return the names of the registered queries for this table."""
188
- return list(self._queries.keys())
226
+ return list(self.__query_scope._queries.keys())
189
227
 
190
228
  @property
191
229
  def _base(self) -> Optional['Table']:
@@ -210,7 +248,7 @@ class Table(SchemaObject):
210
248
  cols = self._tbl_version_path.columns()
211
249
  df = pd.DataFrame({
212
250
  'Column Name': [c.name for c in cols],
213
- 'Type': [str(c.col_type) for c in cols],
251
+ 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
214
252
  'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
215
253
  })
216
254
  return df
@@ -233,7 +271,7 @@ class Table(SchemaObject):
233
271
  from IPython.display import display
234
272
  display(self._description_html())
235
273
  else:
236
- print(self.__repr__())
274
+ print(repr(self))
237
275
 
238
276
  # TODO: Display comments in _repr_html()
239
277
  def __repr__(self) -> str:
@@ -261,75 +299,77 @@ class Table(SchemaObject):
261
299
  """Return a PyTorch Dataset for this table.
262
300
  See DataFrame.to_pytorch_dataset()
263
301
  """
264
- from pixeltable.dataframe import DataFrame
265
- return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
302
+ return self._df().to_pytorch_dataset(image_format=image_format)
266
303
 
267
304
  def to_coco_dataset(self) -> Path:
268
305
  """Return the path to a COCO json file for this table.
269
306
  See DataFrame.to_coco_dataset()
270
307
  """
271
- from pixeltable.dataframe import DataFrame
272
- return DataFrame(self._tbl_version_path).to_coco_dataset()
308
+ return self._df().to_coco_dataset()
273
309
 
274
310
  def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
275
311
  """
276
312
  Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
277
313
 
278
- >>> tbl['new_col'] = IntType()
314
+ >>> tbl['new_col'] = pxt.Int
279
315
 
280
316
  is exactly equivalent to
281
317
 
282
- >>> tbl.add_column(new_col=IntType())
318
+ >>> tbl.add_column(new_col=pxt.Int)
283
319
 
284
320
  For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
285
321
  """
286
322
  if not isinstance(col_name, str):
287
323
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
288
- if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
289
- raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
290
- self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
324
+ if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
325
+ raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
326
+ self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
291
327
 
292
328
  def add_column(
293
329
  self,
294
330
  *,
295
- type: Optional[ts.ColumnType] = None,
331
+ type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
296
332
  stored: Optional[bool] = None,
297
333
  print_stats: bool = False,
298
- **kwargs: Union[ts.ColumnType, exprs.Expr, Callable]
334
+ on_error: Literal['abort', 'ignore'] = 'abort',
335
+ **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
299
336
  ) -> UpdateStatus:
300
337
  """
301
338
  Adds a column to the table.
302
339
 
303
340
  Args:
304
- kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
305
- type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
341
+ kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
342
+ type: The type of the column. Only valid and required if `value-expression` is a Callable.
306
343
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
307
- print_stats: If ``True``, print execution metrics.
344
+ print_stats: If `True`, print execution metrics during evaluation.
345
+ on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
346
+ row.
347
+
348
+ - If `on_error='abort'`, then an exception will be raised and the column will not be added.
349
+ - If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
350
+ with errors will have a `None` value for the column, with information about the error stored in the
351
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
308
352
 
309
353
  Returns:
310
- execution status
354
+ Information about the execution status of the operation.
311
355
 
312
356
  Raises:
313
357
  Error: If the column name is invalid or already exists.
314
358
 
315
359
  Examples:
316
- Add an int column with ``None`` values:
360
+ Add an int column:
317
361
 
318
- >>> tbl.add_column(new_col=IntType())
362
+ >>> tbl.add_column(new_col=pxt.Int)
319
363
 
320
364
  Alternatively, this can also be expressed as:
321
365
 
322
- >>> tbl['new_col'] = IntType()
366
+ >>> tbl['new_col'] = pxt.Int
323
367
 
324
- For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
368
+ For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
325
369
  the parameters of the Callable must correspond to existing column names (the column values are then passed
326
370
  as arguments to the Callable). In this case, the column type needs to be specified explicitly:
327
371
 
328
- >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
329
-
330
- Alternatively, this can also be expressed as:
331
-
332
- >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
372
+ >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
333
373
 
334
374
  For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
335
375
  90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
@@ -341,13 +381,9 @@ class Table(SchemaObject):
341
381
 
342
382
  >>> tbl['rotated'] = tbl.frame.rotate(90)
343
383
 
344
- Do the same, but now the column is stored:
345
-
346
- >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
347
-
348
- Alternatively, this can also be expressed as:
384
+ Do the same, but now the column is unstored:
349
385
 
350
- >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
386
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
351
387
  """
352
388
  self._check_is_dropped()
353
389
  # verify kwargs and construct column schema dict
@@ -359,22 +395,24 @@ class Table(SchemaObject):
359
395
  col_name, spec = next(iter(kwargs.items()))
360
396
  if not is_valid_identifier(col_name):
361
397
  raise excs.Error(f'Invalid column name: {col_name!r}')
362
- if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
398
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
363
399
  raise excs.Error(f'add_column(): keyword argument "type" is redundant')
364
400
 
365
401
  col_schema: dict[str, Any] = {}
366
- if isinstance(spec, ts.ColumnType):
367
- col_schema['type'] = spec
402
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
403
+ col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
368
404
  else:
369
405
  col_schema['value'] = spec
370
406
  if type is not None:
371
- col_schema['type'] = type
407
+ col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
372
408
  if stored is not None:
373
409
  col_schema['stored'] = stored
374
410
 
375
411
  new_col = self._create_columns({col_name: col_schema})[0]
376
412
  self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
377
- return self._tbl_version.add_column(new_col, print_stats=print_stats)
413
+ status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
414
+ FileCache.get().emit_eviction_warnings()
415
+ return status
378
416
 
379
417
  @classmethod
380
418
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
@@ -392,8 +430,8 @@ class Table(SchemaObject):
392
430
 
393
431
  if 'type' in spec:
394
432
  has_type = True
395
- if not isinstance(spec['type'], ts.ColumnType):
396
- raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
433
+ if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
434
+ raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
397
435
 
398
436
  if 'value' in spec:
399
437
  value_spec = spec['value']
@@ -426,20 +464,20 @@ class Table(SchemaObject):
426
464
  primary_key: Optional[bool] = None
427
465
  stored = True
428
466
 
429
- if isinstance(spec, ts.ColumnType):
430
- # TODO: create copy
431
- col_type = spec
467
+ if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
468
+ col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
432
469
  elif isinstance(spec, exprs.Expr):
433
470
  # create copy so we can modify it
434
471
  value_expr = spec.copy()
435
472
  elif callable(spec):
436
- raise excs.Error((
473
+ raise excs.Error(
437
474
  f'Column {name} computed with a Callable: specify using a dictionary with '
438
- f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
439
- ))
475
+ f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
476
+ )
440
477
  elif isinstance(spec, dict):
441
478
  cls._validate_column_spec(name, spec)
442
- col_type = spec.get('type')
479
+ if 'type' in spec:
480
+ col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
443
481
  value_expr = spec.get('value')
444
482
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
445
483
  # create copy so we can modify it
@@ -487,12 +525,13 @@ class Table(SchemaObject):
487
525
  name: The name of the column to drop.
488
526
 
489
527
  Raises:
490
- Error: If the column does not exist or if it is referenced by a computed column.
528
+ Error: If the column does not exist or if it is referenced by a dependent computed column.
491
529
 
492
530
  Examples:
493
- Drop column ``factorial``:
531
+ Drop the column `col` from the table `my_table`:
494
532
 
495
- >>> tbl.drop_column('factorial')
533
+ >>> tbl = pxt.get_table('my_table')
534
+ ... tbl.drop_column('col')
496
535
  """
497
536
  self._check_is_dropped()
498
537
 
@@ -535,43 +574,59 @@ class Table(SchemaObject):
535
574
  new_name: The new name of the column.
536
575
 
537
576
  Raises:
538
- Error: If the column does not exist or if the new name is invalid or already exists.
577
+ Error: If the column does not exist, or if the new name is invalid or already exists.
539
578
 
540
579
  Examples:
541
- Rename column ``factorial`` to ``fac``:
580
+ Rename the column `col1` to `col2` of the table `my_table`:
542
581
 
543
- >>> tbl.rename_column('factorial', 'fac')
582
+ >>> tbl = pxt.get_table('my_table')
583
+ ... tbl.rename_column('col1', 'col2')
544
584
  """
545
585
  self._check_is_dropped()
546
586
  self._tbl_version.rename_column(old_name, new_name)
547
587
 
548
588
  def add_embedding_index(
549
589
  self, col_name: str, *, idx_name: Optional[str] = None,
550
- string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
590
+ string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
551
591
  metric: str = 'cosine'
552
592
  ) -> None:
553
- """Add an index to the table.
593
+ """
594
+ Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
595
+ rows are inserted into the table.
596
+
597
+ Indices are currently supported only for `String` and `Image` columns. The index must specify, at
598
+ minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
599
+ and image embedding (into the same vector space); in particular, this can be used to provide similarity search
600
+ of text over an image column.
554
601
 
555
602
  Args:
556
- col_name: name of column to index
557
- idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
558
- string_embed: function to embed text; required if the column is a text column
559
- image_embed: function to embed images; required if the column is an image column
560
- metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
603
+ col_name: The name of column to index; must be a `String` or `Image` column.
604
+ idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
605
+ If specified, the name must be unique for this table.
606
+ string_embed: A function to embed text; required if the column is a `String` column.
607
+ image_embed: A function to embed images; required if the column is an `Image` column.
608
+ metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
609
+ the default is `'cosine'`.
561
610
 
562
611
  Raises:
563
- Error: If an index with that name already exists for the table or if the column does not exist.
612
+ Error: If an index with that name already exists for the table, or if the specified column does not exist.
564
613
 
565
614
  Examples:
566
- Add an index to the ``img`` column:
615
+ Add an index to the `img` column of the table `my_table`:
567
616
 
568
- >>> tbl.add_embedding_index('img', image_embed=...)
617
+ >>> tbl = pxt.get_table('my_table')
618
+ ... tbl.add_embedding_index('img', image_embed=my_image_func)
569
619
 
570
- Add another index to the ``img`` column, using the inner product as the distance metric,
571
- and with a specific name; ``string_embed`` is also specified in order to search with text:
620
+ Add another index to the `img` column, using the inner product as the distance metric,
621
+ and with a specific name; `string_embed` is also specified in order to search with text:
572
622
 
573
623
  >>> tbl.add_embedding_index(
574
- 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
624
+ ... 'img',
625
+ ... idx_name='clip_idx',
626
+ ... image_embed=my_image_func,
627
+ ... string_embed=my_string_func,
628
+ ... metric='ip'
629
+ ... )
575
630
  """
576
631
  if self._tbl_version_path.is_snapshot():
577
632
  raise excs.Error('Cannot add an index to a snapshot')
@@ -587,39 +642,53 @@ class Table(SchemaObject):
587
642
  idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
588
643
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
589
644
  # TODO: how to deal with exceptions here? drop the index and raise?
645
+ FileCache.get().emit_eviction_warnings()
590
646
 
591
647
  def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
592
- """Drop an embedding index from the table.
648
+ """
649
+ Drop an embedding index from the table. Either a column name or an index name (but not both) must be
650
+ specified. If a column name is specified, it must be a column containing exactly one embedding index;
651
+ otherwise the specific index name must be provided instead.
593
652
 
594
653
  Args:
595
- column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
654
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
596
655
  embedding indices.
597
656
  idx_name: The name of the index to drop.
598
657
 
599
658
  Raises:
600
- Error: If the index does not exist.
659
+ Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
660
+ indices or multiple embedding indices.
661
+ Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
601
662
 
602
663
  Examples:
603
- Drop embedding index on the ``img`` column:
664
+ Drop the embedding index on the `img` column of the table `my_table`:
604
665
 
605
- >>> tbl.drop_embedding_index(column_name='img')
666
+ >>> tbl = pxt.get_table('my_table')
667
+ ... tbl.drop_embedding_index(column_name='img')
606
668
  """
607
669
  self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
608
670
 
609
671
  def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
610
- """Drop an index from the table.
672
+ """
673
+ Drop an index from the table. Either a column name or an index name (but not both) must be
674
+ specified. If a column name is specified, it must be a column containing exactly one index;
675
+ otherwise the specific index name must be provided instead.
611
676
 
612
677
  Args:
613
- column_name: The name of the column whose index to drop. Invalid if the column has multiple indices.
678
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
679
+ indices.
614
680
  idx_name: The name of the index to drop.
615
681
 
616
682
  Raises:
617
- Error: If the index does not exist.
683
+ Error: If `column_name` is specified, but the column does not exist, or it contains no
684
+ indices or multiple indices.
685
+ Error: If `idx_name` is specified, but the index does not exist.
618
686
 
619
687
  Examples:
620
- Drop index on the ``img`` column:
688
+ Drop the index on the `img` column of the table `my_table`:
621
689
 
622
- >>> tbl.drop_index(column_name='img')
690
+ >>> tbl = pxt.get_table('my_table')
691
+ ... tbl.drop_index(column_name='img')
623
692
  """
624
693
  self._drop_index(column_name=column_name, idx_name=idx_name)
625
694
 
@@ -672,7 +741,7 @@ class Table(SchemaObject):
672
741
  To insert multiple rows at a time:
673
742
  ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
674
743
 
675
- To insert just a single row, you can use the more convenient syntax:
744
+ To insert just a single row, you can use the more concise syntax:
676
745
  ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
677
746
 
678
747
  Args:
@@ -688,24 +757,31 @@ class Table(SchemaObject):
688
757
  If ``True``, raise an exception that aborts the insert.
689
758
 
690
759
  Returns:
691
- execution status
760
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
692
761
 
693
762
  Raises:
694
- Error: if a row does not match the table schema or contains values for computed columns
763
+ Error: If one of the following conditions occurs:
764
+
765
+ - The table is a view or snapshot.
766
+ - The table has been dropped.
767
+ - One of the rows being inserted does not conform to the table schema.
768
+ - An error occurs during processing of computed columns, and `fail_on_exception=True`.
695
769
 
696
770
  Examples:
697
- Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
771
+ Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
772
+ Column ``c`` is nullable:
698
773
 
699
- >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
774
+ >>> tbl = pxt.get_table('my_table')
775
+ ... tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
700
776
 
701
- Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
777
+ Insert a single row using the alternative syntax:
702
778
 
703
- >>> tbl.insert(a=1, b=1, c=1)
779
+ >>> tbl.insert(a=3, b=3, c=3)
704
780
  """
705
781
  raise NotImplementedError
706
782
 
707
783
  def update(
708
- self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
784
+ self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
709
785
  ) -> UpdateStatus:
710
786
  """Update rows in this table.
711
787
 
@@ -732,7 +808,9 @@ class Table(SchemaObject):
732
808
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
733
809
  """
734
810
  self._check_is_dropped()
735
- return self._tbl_version.update(value_spec, where, cascade)
811
+ status = self._tbl_version.update(value_spec, where, cascade)
812
+ FileCache.get().emit_eviction_warnings()
813
+ return status
736
814
 
737
815
  def batch_update(
738
816
  self, rows: Iterable[dict[str, Any]], cascade: bool = True,
@@ -789,11 +867,13 @@ class Table(SchemaObject):
789
867
  missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
790
868
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
791
869
  row_updates.append(col_vals)
792
- return self._tbl_version.batch_update(
870
+ status = self._tbl_version.batch_update(
793
871
  row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
794
872
  insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
873
+ FileCache.get().emit_eviction_warnings()
874
+ return status
795
875
 
796
- def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
876
+ def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
797
877
  """Delete rows in this table.
798
878
 
799
879
  Args:
@@ -822,17 +902,17 @@ class Table(SchemaObject):
822
902
  self._tbl_version.revert()
823
903
 
824
904
  @overload
825
- def query(self, py_fn: Callable) -> 'pixeltable.func.QueryTemplateFunction': ...
905
+ def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
826
906
 
827
907
  @overload
828
908
  def query(
829
909
  self, *, param_types: Optional[list[ts.ColumnType]] = None
830
- ) -> Callable[[Callable], 'pixeltable.func.QueryTemplateFunction']: ...
910
+ ) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
831
911
 
832
912
  def query(self, *args: Any, **kwargs: Any) -> Any:
833
913
  def make_query_template(
834
914
  py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
835
- ) -> 'pixeltable.func.QueryTemplateFunction':
915
+ ) -> 'pxt.func.QueryTemplateFunction':
836
916
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
837
917
  # this is a named function in a module
838
918
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
@@ -841,12 +921,11 @@ class Table(SchemaObject):
841
921
  query_name = py_fn.__name__
842
922
  if query_name in self._schema.keys():
843
923
  raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
844
- if query_name in self._queries:
924
+ if query_name in self.__query_scope._queries and function_path is not None:
845
925
  raise excs.Error(f'Duplicate query name: {query_name!r}')
846
- import pixeltable.func as func
847
- query_fn = func.QueryTemplateFunction.create(
926
+ query_fn = pxt.func.QueryTemplateFunction.create(
848
927
  py_fn, param_types=param_types, path=function_path, name=query_name)
849
- self._queries[query_name] = query_fn
928
+ self.__query_scope._queries[query_name] = query_fn
850
929
  return query_fn
851
930
 
852
931
  # TODO: verify that the inferred return type matches that of the template
@@ -863,7 +942,7 @@ class Table(SchemaObject):
863
942
  def external_stores(self) -> list[str]:
864
943
  return list(self._tbl_version.external_stores.keys())
865
944
 
866
- def _link_external_store(self, store: 'pixeltable.io.ExternalStore') -> None:
945
+ def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
867
946
  """
868
947
  Links the specified `ExternalStore` to this table.
869
948
  """
@@ -918,7 +997,7 @@ class Table(SchemaObject):
918
997
  *,
919
998
  export_data: bool = True,
920
999
  import_data: bool = True
921
- ) -> 'pixeltable.io.SyncStatus':
1000
+ ) -> 'pxt.io.SyncStatus':
922
1001
  """
923
1002
  Synchronizes this table with its linked external stores.
924
1003
 
@@ -940,9 +1019,7 @@ class Table(SchemaObject):
940
1019
  if store not in all_stores:
941
1020
  raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
942
1021
 
943
- from pixeltable.io import SyncStatus
944
-
945
- sync_status = SyncStatus.empty()
1022
+ sync_status = pxt.io.SyncStatus.empty()
946
1023
  for store in stores:
947
1024
  store_obj = self._tbl_version.external_stores[store]
948
1025
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)