pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (84) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/globals.py +3 -0
  5. pixeltable/catalog/table.py +208 -145
  6. pixeltable/catalog/table_version.py +36 -18
  7. pixeltable/catalog/table_version_path.py +0 -8
  8. pixeltable/catalog/view.py +3 -3
  9. pixeltable/dataframe.py +9 -24
  10. pixeltable/env.py +1 -1
  11. pixeltable/exec/__init__.py +1 -1
  12. pixeltable/exec/aggregation_node.py +22 -15
  13. pixeltable/exec/data_row_batch.py +7 -7
  14. pixeltable/exec/exec_node.py +35 -7
  15. pixeltable/exec/expr_eval_node.py +2 -1
  16. pixeltable/exec/in_memory_data_node.py +9 -9
  17. pixeltable/exec/sql_node.py +265 -136
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/data_row.py +30 -19
  20. pixeltable/exprs/expr.py +15 -14
  21. pixeltable/exprs/expr_dict.py +55 -0
  22. pixeltable/exprs/expr_set.py +21 -15
  23. pixeltable/exprs/function_call.py +21 -8
  24. pixeltable/exprs/rowid_ref.py +2 -2
  25. pixeltable/exprs/sql_element_cache.py +5 -1
  26. pixeltable/ext/functions/whisperx.py +7 -2
  27. pixeltable/func/callable_function.py +2 -2
  28. pixeltable/func/function_registry.py +6 -7
  29. pixeltable/func/query_template_function.py +11 -12
  30. pixeltable/func/signature.py +17 -15
  31. pixeltable/func/udf.py +0 -4
  32. pixeltable/functions/__init__.py +1 -1
  33. pixeltable/functions/audio.py +4 -6
  34. pixeltable/functions/globals.py +86 -42
  35. pixeltable/functions/huggingface.py +12 -14
  36. pixeltable/functions/image.py +59 -45
  37. pixeltable/functions/json.py +0 -1
  38. pixeltable/functions/mistralai.py +2 -2
  39. pixeltable/functions/openai.py +22 -25
  40. pixeltable/functions/string.py +50 -50
  41. pixeltable/functions/timestamp.py +20 -20
  42. pixeltable/functions/together.py +2 -2
  43. pixeltable/functions/video.py +11 -20
  44. pixeltable/functions/whisper.py +2 -20
  45. pixeltable/globals.py +55 -56
  46. pixeltable/index/base.py +2 -2
  47. pixeltable/index/btree.py +7 -7
  48. pixeltable/index/embedding_index.py +8 -10
  49. pixeltable/io/external_store.py +11 -5
  50. pixeltable/io/globals.py +2 -0
  51. pixeltable/io/hf_datasets.py +1 -1
  52. pixeltable/io/label_studio.py +6 -6
  53. pixeltable/io/parquet.py +14 -13
  54. pixeltable/iterators/document.py +9 -7
  55. pixeltable/iterators/video.py +10 -1
  56. pixeltable/metadata/__init__.py +3 -2
  57. pixeltable/metadata/converters/convert_14.py +4 -2
  58. pixeltable/metadata/converters/convert_15.py +1 -1
  59. pixeltable/metadata/converters/convert_19.py +1 -0
  60. pixeltable/metadata/converters/convert_20.py +1 -1
  61. pixeltable/metadata/converters/util.py +9 -8
  62. pixeltable/metadata/schema.py +32 -21
  63. pixeltable/plan.py +136 -154
  64. pixeltable/store.py +51 -36
  65. pixeltable/tool/create_test_db_dump.py +6 -6
  66. pixeltable/tool/doc_plugins/griffe.py +3 -34
  67. pixeltable/tool/mypy_plugin.py +32 -0
  68. pixeltable/type_system.py +243 -60
  69. pixeltable/utils/arrow.py +10 -9
  70. pixeltable/utils/coco.py +4 -4
  71. pixeltable/utils/documents.py +1 -1
  72. pixeltable/utils/filecache.py +9 -9
  73. pixeltable/utils/formatter.py +1 -1
  74. pixeltable/utils/http_server.py +2 -5
  75. pixeltable/utils/media_store.py +6 -6
  76. pixeltable/utils/pytorch.py +10 -11
  77. pixeltable/utils/sql.py +2 -1
  78. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
  79. pixeltable-0.2.21.dist-info/RECORD +148 -0
  80. pixeltable/utils/help.py +0 -11
  81. pixeltable-0.2.20.dist-info/RECORD +0 -147
  82. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
  83. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
  84. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
@@ -5,14 +5,15 @@ import builtins
5
5
  import json
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
8
+ from typing import _GenericAlias # type: ignore[attr-defined]
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
9
10
  from uuid import UUID
10
11
 
11
12
  import pandas as pd
12
13
  import pandas.io.formats.style
13
14
  import sqlalchemy as sql
14
15
 
15
- import pixeltable
16
+ import pixeltable as pxt
16
17
  import pixeltable.catalog as catalog
17
18
  import pixeltable.env as env
18
19
  import pixeltable.exceptions as excs
@@ -35,18 +36,30 @@ _logger = logging.getLogger('pixeltable')
35
36
 
36
37
  class Table(SchemaObject):
37
38
  """
38
- Base class for table objects (base tables, views, snapshots).
39
-
40
- Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
41
- FileCache.emit_eviction_warnings() at the end of the operation.
39
+ A handle to a table, view, or snapshot. This class is the primary interface through which table operations
40
+ (queries, insertions, updates, etc.) are performed in Pixeltable.
42
41
  """
42
+ # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
43
+ # FileCache.emit_eviction_warnings() at the end of the operation.
43
44
 
44
45
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
45
46
  super().__init__(id, name, dir_id)
46
47
  self._is_dropped = False
47
48
  self._tbl_version_path = tbl_version_path
48
- from pixeltable.func import QueryTemplateFunction
49
- self._queries: dict[str, QueryTemplateFunction] = {}
49
+ self.__query_scope = self.QueryScope(self)
50
+
51
+ class QueryScope:
52
+ __table: 'Table'
53
+ _queries: dict[str, pxt.func.QueryTemplateFunction]
54
+
55
+ def __init__(self, table: 'Table') -> None:
56
+ self.__table = table
57
+ self._queries = {}
58
+
59
+ def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
60
+ if name in self._queries:
61
+ return self._queries[name]
62
+ raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
50
63
 
51
64
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
52
65
  super()._move(new_name, new_dir_id)
@@ -59,6 +72,28 @@ class Table(SchemaObject):
59
72
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
60
73
 
61
74
  def get_metadata(self) -> dict[str, Any]:
75
+ """
76
+ Retrieves metadata associated with this table.
77
+
78
+ Returns:
79
+ A dictionary containing the metadata, in the following format:
80
+
81
+ ```python
82
+ {
83
+ 'base': None, # If this is a view or snapshot, will contain the name of its base table
84
+ 'schema': {
85
+ 'col1': StringType(),
86
+ 'col2': IntType(),
87
+ },
88
+ 'version': 22,
89
+ 'schema_version': 1,
90
+ 'comment': '',
91
+ 'num_retained_versions': 10,
92
+ 'is_view': False,
93
+ 'is_snapshot': False,
94
+ }
95
+ ```
96
+ """
62
97
  md = super().get_metadata()
63
98
  md['base'] = self._base._path if self._base is not None else None
64
99
  md['schema'] = self._schema
@@ -85,25 +120,24 @@ class Table(SchemaObject):
85
120
  if self._is_dropped:
86
121
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
87
122
 
88
- def __getattr__(
89
- self, name: str
90
- ) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
91
- """Return a ColumnRef or QueryTemplateFunction for the given name.
123
+ def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
124
+ """Return a ColumnRef for the given name.
92
125
  """
93
- if name in self._queries:
94
- return self._queries[name]
95
126
  return getattr(self._tbl_version_path, name)
96
127
 
97
- def __getitem__(
98
- self, index: object
99
- ) -> Union[
100
- 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.DataFrame'
101
- ]:
128
+ @overload
129
+ def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
130
+
131
+ @overload
132
+ def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
133
+
134
+ def __getitem__(self, index):
102
135
  """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
103
136
  """
104
- if isinstance(index, str) and index in self._queries:
105
- return self._queries[index]
106
- return self._tbl_version_path.__getitem__(index)
137
+ if isinstance(index, str):
138
+ return getattr(self, index)
139
+ else:
140
+ return self._df()[index]
107
141
 
108
142
  def list_views(self, *, recursive: bool = True) -> list[str]:
109
143
  """
@@ -112,6 +146,9 @@ class Table(SchemaObject):
112
146
  Args:
113
147
  recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
114
148
  all sub-views (including views of views, etc.)
149
+
150
+ Returns:
151
+ A list of view paths.
115
152
  """
116
153
  return [t._path for t in self._get_views(recursive=recursive)]
117
154
 
@@ -122,60 +159,55 @@ class Table(SchemaObject):
122
159
  else:
123
160
  return dependents
124
161
 
125
- def _df(self) -> 'pixeltable.dataframe.DataFrame':
162
+ def _df(self) -> 'pxt.dataframe.DataFrame':
126
163
  """Return a DataFrame for this table.
127
164
  """
128
165
  # local import: avoid circular imports
129
- from pixeltable.dataframe import DataFrame
130
- return DataFrame(self._tbl_version_path)
166
+ return pxt.DataFrame(self._tbl_version_path)
167
+
168
+ @property
169
+ def queries(self) -> 'Table.QueryScope':
170
+ return self.__query_scope
131
171
 
132
- def select(self, *items: Any, **named_items: Any) -> 'pixeltable.DataFrame':
172
+ def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
133
173
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
134
- # local import: avoid circular imports
135
- from pixeltable.dataframe import DataFrame
136
- return DataFrame(self._tbl_version_path).select(*items, **named_items)
174
+ return self._df().select(*items, **named_items)
137
175
 
138
- def where(self, pred: 'exprs.Expr') -> 'pixeltable.DataFrame':
176
+ def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
139
177
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
140
- # local import: avoid circular imports
141
- from pixeltable.dataframe import DataFrame
142
- return DataFrame(self._tbl_version_path).where(pred)
178
+ return self._df().where(pred)
143
179
 
144
- def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.DataFrame':
180
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
145
181
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
146
- # local import: avoid circular imports
147
- from pixeltable.dataframe import DataFrame
148
- return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
182
+ return self._df().order_by(*items, asc=asc)
149
183
 
150
- def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.DataFrame':
184
+ def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
151
185
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
152
- from pixeltable.dataframe import DataFrame
153
- return DataFrame(self._tbl_version_path).group_by(*items)
186
+ return self._df().group_by(*items)
154
187
 
155
- def limit(self, n: int) -> 'pixeltable.DataFrame':
156
- from pixeltable.dataframe import DataFrame
157
- return DataFrame(self._tbl_version_path).limit(n)
188
+ def limit(self, n: int) -> 'pxt.DataFrame':
189
+ return self._df().limit(n)
158
190
 
159
- def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
191
+ def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
160
192
  """Return rows from this table."""
161
193
  return self._df().collect()
162
194
 
163
195
  def show(
164
196
  self, *args, **kwargs
165
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
197
+ ) -> 'pxt.dataframe.DataFrameResultSet':
166
198
  """Return rows from this table.
167
199
  """
168
200
  return self._df().show(*args, **kwargs)
169
201
 
170
202
  def head(
171
203
  self, *args, **kwargs
172
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
204
+ ) -> 'pxt.dataframe.DataFrameResultSet':
173
205
  """Return the first n rows inserted into this table."""
174
206
  return self._df().head(*args, **kwargs)
175
207
 
176
208
  def tail(
177
209
  self, *args, **kwargs
178
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
210
+ ) -> 'pxt.dataframe.DataFrameResultSet':
179
211
  """Return the last n rows inserted into this table."""
180
212
  return self._df().tail(*args, **kwargs)
181
213
 
@@ -191,7 +223,7 @@ class Table(SchemaObject):
191
223
  @property
192
224
  def _query_names(self) -> list[str]:
193
225
  """Return the names of the registered queries for this table."""
194
- return list(self._queries.keys())
226
+ return list(self.__query_scope._queries.keys())
195
227
 
196
228
  @property
197
229
  def _base(self) -> Optional['Table']:
@@ -216,7 +248,7 @@ class Table(SchemaObject):
216
248
  cols = self._tbl_version_path.columns()
217
249
  df = pd.DataFrame({
218
250
  'Column Name': [c.name for c in cols],
219
- 'Type': [str(c.col_type) for c in cols],
251
+ 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
220
252
  'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
221
253
  })
222
254
  return df
@@ -239,7 +271,7 @@ class Table(SchemaObject):
239
271
  from IPython.display import display
240
272
  display(self._description_html())
241
273
  else:
242
- print(self.__repr__())
274
+ print(repr(self))
243
275
 
244
276
  # TODO: Display comments in _repr_html()
245
277
  def __repr__(self) -> str:
@@ -267,75 +299,77 @@ class Table(SchemaObject):
267
299
  """Return a PyTorch Dataset for this table.
268
300
  See DataFrame.to_pytorch_dataset()
269
301
  """
270
- from pixeltable.dataframe import DataFrame
271
- return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
302
+ return self._df().to_pytorch_dataset(image_format=image_format)
272
303
 
273
304
  def to_coco_dataset(self) -> Path:
274
305
  """Return the path to a COCO json file for this table.
275
306
  See DataFrame.to_coco_dataset()
276
307
  """
277
- from pixeltable.dataframe import DataFrame
278
- return DataFrame(self._tbl_version_path).to_coco_dataset()
308
+ return self._df().to_coco_dataset()
279
309
 
280
310
  def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
281
311
  """
282
312
  Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
283
313
 
284
- >>> tbl['new_col'] = IntType()
314
+ >>> tbl['new_col'] = pxt.Int
285
315
 
286
316
  is exactly equivalent to
287
317
 
288
- >>> tbl.add_column(new_col=IntType())
318
+ >>> tbl.add_column(new_col=pxt.Int)
289
319
 
290
320
  For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
291
321
  """
292
322
  if not isinstance(col_name, str):
293
323
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
294
- if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
295
- raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
296
- self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
324
+ if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
325
+ raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
326
+ self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
297
327
 
298
328
  def add_column(
299
329
  self,
300
330
  *,
301
- type: Optional[ts.ColumnType] = None,
331
+ type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
302
332
  stored: Optional[bool] = None,
303
333
  print_stats: bool = False,
304
- **kwargs: Union[ts.ColumnType, exprs.Expr, Callable]
334
+ on_error: Literal['abort', 'ignore'] = 'abort',
335
+ **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
305
336
  ) -> UpdateStatus:
306
337
  """
307
338
  Adds a column to the table.
308
339
 
309
340
  Args:
310
- kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
311
- type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
341
+ kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
342
+ type: The type of the column. Only valid and required if `value-expression` is a Callable.
312
343
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
313
- print_stats: If ``True``, print execution metrics.
344
+ print_stats: If `True`, print execution metrics during evaluation.
345
+ on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
346
+ row.
347
+
348
+ - If `on_error='abort'`, then an exception will be raised and the column will not be added.
349
+ - If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
350
+ with errors will have a `None` value for the column, with information about the error stored in the
351
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
314
352
 
315
353
  Returns:
316
- execution status
354
+ Information about the execution status of the operation.
317
355
 
318
356
  Raises:
319
357
  Error: If the column name is invalid or already exists.
320
358
 
321
359
  Examples:
322
- Add an int column with ``None`` values:
360
+ Add an int column:
323
361
 
324
- >>> tbl.add_column(new_col=IntType())
362
+ >>> tbl.add_column(new_col=pxt.Int)
325
363
 
326
364
  Alternatively, this can also be expressed as:
327
365
 
328
- >>> tbl['new_col'] = IntType()
366
+ >>> tbl['new_col'] = pxt.Int
329
367
 
330
- For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
368
+ For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
331
369
  the parameters of the Callable must correspond to existing column names (the column values are then passed
332
370
  as arguments to the Callable). In this case, the column type needs to be specified explicitly:
333
371
 
334
- >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
335
-
336
- Alternatively, this can also be expressed as:
337
-
338
- >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
372
+ >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
339
373
 
340
374
  For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
341
375
  90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
@@ -347,13 +381,9 @@ class Table(SchemaObject):
347
381
 
348
382
  >>> tbl['rotated'] = tbl.frame.rotate(90)
349
383
 
350
- Do the same, but now the column is stored:
351
-
352
- >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
353
-
354
- Alternatively, this can also be expressed as:
384
+ Do the same, but now the column is unstored:
355
385
 
356
- >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
386
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
357
387
  """
358
388
  self._check_is_dropped()
359
389
  # verify kwargs and construct column schema dict
@@ -365,26 +395,25 @@ class Table(SchemaObject):
365
395
  col_name, spec = next(iter(kwargs.items()))
366
396
  if not is_valid_identifier(col_name):
367
397
  raise excs.Error(f'Invalid column name: {col_name!r}')
368
- if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
398
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
369
399
  raise excs.Error(f'add_column(): keyword argument "type" is redundant')
370
400
 
371
401
  col_schema: dict[str, Any] = {}
372
- if isinstance(spec, ts.ColumnType):
373
- col_schema['type'] = spec
402
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
403
+ col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
374
404
  else:
375
405
  col_schema['value'] = spec
376
406
  if type is not None:
377
- col_schema['type'] = type
407
+ col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
378
408
  if stored is not None:
379
409
  col_schema['stored'] = stored
380
410
 
381
411
  new_col = self._create_columns({col_name: col_schema})[0]
382
412
  self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
383
- status = self._tbl_version.add_column(new_col, print_stats=print_stats)
413
+ status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
384
414
  FileCache.get().emit_eviction_warnings()
385
415
  return status
386
416
 
387
-
388
417
  @classmethod
389
418
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
390
419
  """Check integrity of user-supplied Column spec
@@ -401,8 +430,8 @@ class Table(SchemaObject):
401
430
 
402
431
  if 'type' in spec:
403
432
  has_type = True
404
- if not isinstance(spec['type'], ts.ColumnType):
405
- raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
433
+ if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
434
+ raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
406
435
 
407
436
  if 'value' in spec:
408
437
  value_spec = spec['value']
@@ -435,20 +464,20 @@ class Table(SchemaObject):
435
464
  primary_key: Optional[bool] = None
436
465
  stored = True
437
466
 
438
- if isinstance(spec, ts.ColumnType):
439
- # TODO: create copy
440
- col_type = spec
467
+ if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
468
+ col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
441
469
  elif isinstance(spec, exprs.Expr):
442
470
  # create copy so we can modify it
443
471
  value_expr = spec.copy()
444
472
  elif callable(spec):
445
- raise excs.Error((
473
+ raise excs.Error(
446
474
  f'Column {name} computed with a Callable: specify using a dictionary with '
447
- f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
448
- ))
475
+ f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
476
+ )
449
477
  elif isinstance(spec, dict):
450
478
  cls._validate_column_spec(name, spec)
451
- col_type = spec.get('type')
479
+ if 'type' in spec:
480
+ col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
452
481
  value_expr = spec.get('value')
453
482
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
454
483
  # create copy so we can modify it
@@ -496,12 +525,13 @@ class Table(SchemaObject):
496
525
  name: The name of the column to drop.
497
526
 
498
527
  Raises:
499
- Error: If the column does not exist or if it is referenced by a computed column.
528
+ Error: If the column does not exist or if it is referenced by a dependent computed column.
500
529
 
501
530
  Examples:
502
- Drop column ``factorial``:
531
+ Drop the column `col` from the table `my_table`:
503
532
 
504
- >>> tbl.drop_column('factorial')
533
+ >>> tbl = pxt.get_table('my_table')
534
+ ... tbl.drop_column('col')
505
535
  """
506
536
  self._check_is_dropped()
507
537
 
@@ -544,43 +574,59 @@ class Table(SchemaObject):
544
574
  new_name: The new name of the column.
545
575
 
546
576
  Raises:
547
- Error: If the column does not exist or if the new name is invalid or already exists.
577
+ Error: If the column does not exist, or if the new name is invalid or already exists.
548
578
 
549
579
  Examples:
550
- Rename column ``factorial`` to ``fac``:
580
+ Rename the column `col1` to `col2` of the table `my_table`:
551
581
 
552
- >>> tbl.rename_column('factorial', 'fac')
582
+ >>> tbl = pxt.get_table('my_table')
583
+ ... tbl.rename_column('col1', 'col2')
553
584
  """
554
585
  self._check_is_dropped()
555
586
  self._tbl_version.rename_column(old_name, new_name)
556
587
 
557
588
  def add_embedding_index(
558
589
  self, col_name: str, *, idx_name: Optional[str] = None,
559
- string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
590
+ string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
560
591
  metric: str = 'cosine'
561
592
  ) -> None:
562
- """Add an index to the table.
593
+ """
594
+ Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
595
+ rows are inserted into the table.
596
+
597
+ Indices are currently supported only for `String` and `Image` columns. The index must specify, at
598
+ minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
599
+ and image embedding (into the same vector space); in particular, this can be used to provide similarity search
600
+ of text over an image column.
563
601
 
564
602
  Args:
565
- col_name: name of column to index
566
- idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
567
- string_embed: function to embed text; required if the column is a text column
568
- image_embed: function to embed images; required if the column is an image column
569
- metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
603
+ col_name: The name of column to index; must be a `String` or `Image` column.
604
+ idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
605
+ If specified, the name must be unique for this table.
606
+ string_embed: A function to embed text; required if the column is a `String` column.
607
+ image_embed: A function to embed images; required if the column is an `Image` column.
608
+ metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
609
+ the default is `'cosine'`.
570
610
 
571
611
  Raises:
572
- Error: If an index with that name already exists for the table or if the column does not exist.
612
+ Error: If an index with that name already exists for the table, or if the specified column does not exist.
573
613
 
574
614
  Examples:
575
- Add an index to the ``img`` column:
615
+ Add an index to the `img` column of the table `my_table`:
576
616
 
577
- >>> tbl.add_embedding_index('img', image_embed=...)
617
+ >>> tbl = pxt.get_table('my_table')
618
+ ... tbl.add_embedding_index('img', image_embed=my_image_func)
578
619
 
579
- Add another index to the ``img`` column, using the inner product as the distance metric,
580
- and with a specific name; ``string_embed`` is also specified in order to search with text:
620
+ Add another index to the `img` column, using the inner product as the distance metric,
621
+ and with a specific name; `string_embed` is also specified in order to search with text:
581
622
 
582
623
  >>> tbl.add_embedding_index(
583
- 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
624
+ ... 'img',
625
+ ... idx_name='clip_idx',
626
+ ... image_embed=my_image_func,
627
+ ... string_embed=my_string_func,
628
+ ... metric='ip'
629
+ ... )
584
630
  """
585
631
  if self._tbl_version_path.is_snapshot():
586
632
  raise excs.Error('Cannot add an index to a snapshot')
@@ -599,37 +645,50 @@ class Table(SchemaObject):
599
645
  FileCache.get().emit_eviction_warnings()
600
646
 
601
647
  def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
602
- """Drop an embedding index from the table.
648
+ """
649
+ Drop an embedding index from the table. Either a column name or an index name (but not both) must be
650
+ specified. If a column name is specified, it must be a column containing exactly one embedding index;
651
+ otherwise the specific index name must be provided instead.
603
652
 
604
653
  Args:
605
- column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
654
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
606
655
  embedding indices.
607
656
  idx_name: The name of the index to drop.
608
657
 
609
658
  Raises:
610
- Error: If the index does not exist.
659
+ Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
660
+ indices or multiple embedding indices.
661
+ Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
611
662
 
612
663
  Examples:
613
- Drop embedding index on the ``img`` column:
664
+ Drop the embedding index on the `img` column of the table `my_table`:
614
665
 
615
- >>> tbl.drop_embedding_index(column_name='img')
666
+ >>> tbl = pxt.get_table('my_table')
667
+ ... tbl.drop_embedding_index(column_name='img')
616
668
  """
617
669
  self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
618
670
 
619
671
  def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
620
- """Drop an index from the table.
672
+ """
673
+ Drop an index from the table. Either a column name or an index name (but not both) must be
674
+ specified. If a column name is specified, it must be a column containing exactly one index;
675
+ otherwise the specific index name must be provided instead.
621
676
 
622
677
  Args:
623
- column_name: The name of the column whose index to drop. Invalid if the column has multiple indices.
678
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
679
+ indices.
624
680
  idx_name: The name of the index to drop.
625
681
 
626
682
  Raises:
627
- Error: If the index does not exist.
683
+ Error: If `column_name` is specified, but the column does not exist, or it contains no
684
+ indices or multiple indices.
685
+ Error: If `idx_name` is specified, but the index does not exist.
628
686
 
629
687
  Examples:
630
- Drop index on the ``img`` column:
688
+ Drop the index on the `img` column of the table `my_table`:
631
689
 
632
- >>> tbl.drop_index(column_name='img')
690
+ >>> tbl = pxt.get_table('my_table')
691
+ ... tbl.drop_index(column_name='img')
633
692
  """
634
693
  self._drop_index(column_name=column_name, idx_name=idx_name)
635
694
 
@@ -682,7 +741,7 @@ class Table(SchemaObject):
682
741
  To insert multiple rows at a time:
683
742
  ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
684
743
 
685
- To insert just a single row, you can use the more convenient syntax:
744
+ To insert just a single row, you can use the more concise syntax:
686
745
  ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
687
746
 
688
747
  Args:
@@ -698,24 +757,31 @@ class Table(SchemaObject):
698
757
  If ``True``, raise an exception that aborts the insert.
699
758
 
700
759
  Returns:
701
- execution status
760
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
702
761
 
703
762
  Raises:
704
- Error: if a row does not match the table schema or contains values for computed columns
763
+ Error: If one of the following conditions occurs:
764
+
765
+ - The table is a view or snapshot.
766
+ - The table has been dropped.
767
+ - One of the rows being inserted does not conform to the table schema.
768
+ - An error occurs during processing of computed columns, and `fail_on_exception=True`.
705
769
 
706
770
  Examples:
707
- Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
771
+ Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
772
+ Column ``c`` is nullable:
708
773
 
709
- >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
774
+ >>> tbl = pxt.get_table('my_table')
775
+ ... tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
710
776
 
711
- Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
777
+ Insert a single row using the alternative syntax:
712
778
 
713
- >>> tbl.insert(a=1, b=1, c=1)
779
+ >>> tbl.insert(a=3, b=3, c=3)
714
780
  """
715
781
  raise NotImplementedError
716
782
 
717
783
  def update(
718
- self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
784
+ self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
719
785
  ) -> UpdateStatus:
720
786
  """Update rows in this table.
721
787
 
@@ -807,7 +873,7 @@ class Table(SchemaObject):
807
873
  FileCache.get().emit_eviction_warnings()
808
874
  return status
809
875
 
810
- def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
876
+ def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
811
877
  """Delete rows in this table.
812
878
 
813
879
  Args:
@@ -836,17 +902,17 @@ class Table(SchemaObject):
836
902
  self._tbl_version.revert()
837
903
 
838
904
  @overload
839
- def query(self, py_fn: Callable) -> 'pixeltable.func.QueryTemplateFunction': ...
905
+ def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
840
906
 
841
907
  @overload
842
908
  def query(
843
909
  self, *, param_types: Optional[list[ts.ColumnType]] = None
844
- ) -> Callable[[Callable], 'pixeltable.func.QueryTemplateFunction']: ...
910
+ ) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
845
911
 
846
912
  def query(self, *args: Any, **kwargs: Any) -> Any:
847
913
  def make_query_template(
848
914
  py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
849
- ) -> 'pixeltable.func.QueryTemplateFunction':
915
+ ) -> 'pxt.func.QueryTemplateFunction':
850
916
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
851
917
  # this is a named function in a module
852
918
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
@@ -855,12 +921,11 @@ class Table(SchemaObject):
855
921
  query_name = py_fn.__name__
856
922
  if query_name in self._schema.keys():
857
923
  raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
858
- if query_name in self._queries:
924
+ if query_name in self.__query_scope._queries and function_path is not None:
859
925
  raise excs.Error(f'Duplicate query name: {query_name!r}')
860
- import pixeltable.func as func
861
- query_fn = func.QueryTemplateFunction.create(
926
+ query_fn = pxt.func.QueryTemplateFunction.create(
862
927
  py_fn, param_types=param_types, path=function_path, name=query_name)
863
- self._queries[query_name] = query_fn
928
+ self.__query_scope._queries[query_name] = query_fn
864
929
  return query_fn
865
930
 
866
931
  # TODO: verify that the inferred return type matches that of the template
@@ -877,7 +942,7 @@ class Table(SchemaObject):
877
942
  def external_stores(self) -> list[str]:
878
943
  return list(self._tbl_version.external_stores.keys())
879
944
 
880
- def _link_external_store(self, store: 'pixeltable.io.ExternalStore') -> None:
945
+ def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
881
946
  """
882
947
  Links the specified `ExternalStore` to this table.
883
948
  """
@@ -932,7 +997,7 @@ class Table(SchemaObject):
932
997
  *,
933
998
  export_data: bool = True,
934
999
  import_data: bool = True
935
- ) -> 'pixeltable.io.SyncStatus':
1000
+ ) -> 'pxt.io.SyncStatus':
936
1001
  """
937
1002
  Synchronizes this table with its linked external stores.
938
1003
 
@@ -954,9 +1019,7 @@ class Table(SchemaObject):
954
1019
  if store not in all_stores:
955
1020
  raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
956
1021
 
957
- from pixeltable.io import SyncStatus
958
-
959
- sync_status = SyncStatus.empty()
1022
+ sync_status = pxt.io.SyncStatus.empty()
960
1023
  for store in stores:
961
1024
  store_obj = self._tbl_version.external_stores[store]
962
1025
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)