pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (120) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/column.py +37 -11
  5. pixeltable/catalog/globals.py +21 -0
  6. pixeltable/catalog/insertable_table.py +6 -4
  7. pixeltable/catalog/table.py +227 -148
  8. pixeltable/catalog/table_version.py +66 -28
  9. pixeltable/catalog/table_version_path.py +0 -8
  10. pixeltable/catalog/view.py +18 -19
  11. pixeltable/dataframe.py +16 -32
  12. pixeltable/env.py +6 -1
  13. pixeltable/exec/__init__.py +1 -2
  14. pixeltable/exec/aggregation_node.py +27 -17
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/data_row_batch.py +9 -26
  17. pixeltable/exec/exec_node.py +36 -7
  18. pixeltable/exec/expr_eval_node.py +19 -11
  19. pixeltable/exec/in_memory_data_node.py +14 -11
  20. pixeltable/exec/sql_node.py +266 -138
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/arithmetic_expr.py +3 -1
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +93 -14
  26. pixeltable/exprs/comparison.py +5 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +56 -36
  29. pixeltable/exprs/expr.py +65 -63
  30. pixeltable/exprs/expr_dict.py +55 -0
  31. pixeltable/exprs/expr_set.py +26 -15
  32. pixeltable/exprs/function_call.py +53 -24
  33. pixeltable/exprs/globals.py +4 -1
  34. pixeltable/exprs/in_predicate.py +8 -7
  35. pixeltable/exprs/inline_expr.py +4 -4
  36. pixeltable/exprs/is_null.py +4 -4
  37. pixeltable/exprs/json_mapper.py +11 -12
  38. pixeltable/exprs/json_path.py +5 -10
  39. pixeltable/exprs/literal.py +5 -5
  40. pixeltable/exprs/method_ref.py +5 -4
  41. pixeltable/exprs/object_ref.py +2 -1
  42. pixeltable/exprs/row_builder.py +88 -36
  43. pixeltable/exprs/rowid_ref.py +14 -13
  44. pixeltable/exprs/similarity_expr.py +12 -7
  45. pixeltable/exprs/sql_element_cache.py +12 -6
  46. pixeltable/exprs/type_cast.py +8 -6
  47. pixeltable/exprs/variable.py +5 -4
  48. pixeltable/ext/functions/whisperx.py +7 -2
  49. pixeltable/func/aggregate_function.py +1 -1
  50. pixeltable/func/callable_function.py +2 -2
  51. pixeltable/func/function.py +11 -10
  52. pixeltable/func/function_registry.py +6 -7
  53. pixeltable/func/query_template_function.py +11 -12
  54. pixeltable/func/signature.py +17 -15
  55. pixeltable/func/udf.py +0 -4
  56. pixeltable/functions/__init__.py +2 -2
  57. pixeltable/functions/audio.py +4 -6
  58. pixeltable/functions/globals.py +84 -42
  59. pixeltable/functions/huggingface.py +31 -34
  60. pixeltable/functions/image.py +59 -45
  61. pixeltable/functions/json.py +0 -1
  62. pixeltable/functions/llama_cpp.py +106 -0
  63. pixeltable/functions/mistralai.py +2 -2
  64. pixeltable/functions/ollama.py +147 -0
  65. pixeltable/functions/openai.py +22 -25
  66. pixeltable/functions/replicate.py +72 -0
  67. pixeltable/functions/string.py +59 -50
  68. pixeltable/functions/timestamp.py +20 -20
  69. pixeltable/functions/together.py +2 -2
  70. pixeltable/functions/video.py +11 -20
  71. pixeltable/functions/whisper.py +2 -20
  72. pixeltable/globals.py +65 -74
  73. pixeltable/index/base.py +2 -2
  74. pixeltable/index/btree.py +20 -7
  75. pixeltable/index/embedding_index.py +12 -14
  76. pixeltable/io/__init__.py +1 -2
  77. pixeltable/io/external_store.py +11 -5
  78. pixeltable/io/fiftyone.py +178 -0
  79. pixeltable/io/globals.py +98 -2
  80. pixeltable/io/hf_datasets.py +1 -1
  81. pixeltable/io/label_studio.py +6 -6
  82. pixeltable/io/parquet.py +14 -13
  83. pixeltable/iterators/base.py +3 -2
  84. pixeltable/iterators/document.py +10 -8
  85. pixeltable/iterators/video.py +126 -60
  86. pixeltable/metadata/__init__.py +4 -3
  87. pixeltable/metadata/converters/convert_14.py +4 -2
  88. pixeltable/metadata/converters/convert_15.py +1 -1
  89. pixeltable/metadata/converters/convert_19.py +1 -0
  90. pixeltable/metadata/converters/convert_20.py +1 -1
  91. pixeltable/metadata/converters/convert_21.py +34 -0
  92. pixeltable/metadata/converters/util.py +54 -12
  93. pixeltable/metadata/notes.py +1 -0
  94. pixeltable/metadata/schema.py +40 -21
  95. pixeltable/plan.py +149 -165
  96. pixeltable/py.typed +0 -0
  97. pixeltable/store.py +57 -37
  98. pixeltable/tool/create_test_db_dump.py +6 -6
  99. pixeltable/tool/create_test_video.py +1 -1
  100. pixeltable/tool/doc_plugins/griffe.py +3 -34
  101. pixeltable/tool/embed_udf.py +1 -1
  102. pixeltable/tool/mypy_plugin.py +55 -0
  103. pixeltable/type_system.py +260 -61
  104. pixeltable/utils/arrow.py +10 -9
  105. pixeltable/utils/coco.py +4 -4
  106. pixeltable/utils/documents.py +16 -2
  107. pixeltable/utils/filecache.py +9 -9
  108. pixeltable/utils/formatter.py +10 -11
  109. pixeltable/utils/http_server.py +2 -5
  110. pixeltable/utils/media_store.py +6 -6
  111. pixeltable/utils/pytorch.py +10 -11
  112. pixeltable/utils/sql.py +2 -1
  113. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
  114. pixeltable-0.2.22.dist-info/RECORD +153 -0
  115. pixeltable/exec/media_validation_node.py +0 -43
  116. pixeltable/utils/help.py +0 -11
  117. pixeltable-0.2.20.dist-info/RECORD +0 -147
  118. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
  119. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
  120. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
@@ -5,14 +5,15 @@ import builtins
5
5
  import json
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
8
+ from typing import _GenericAlias # type: ignore[attr-defined]
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
9
10
  from uuid import UUID
10
11
 
11
12
  import pandas as pd
12
13
  import pandas.io.formats.style
13
14
  import sqlalchemy as sql
14
15
 
15
- import pixeltable
16
+ import pixeltable as pxt
16
17
  import pixeltable.catalog as catalog
17
18
  import pixeltable.env as env
18
19
  import pixeltable.exceptions as excs
@@ -23,7 +24,7 @@ import pixeltable.type_system as ts
23
24
  from pixeltable.utils.filecache import FileCache
24
25
 
25
26
  from .column import Column
26
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
27
+ from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
27
28
  from .schema_object import SchemaObject
28
29
  from .table_version import TableVersion
29
30
  from .table_version_path import TableVersionPath
@@ -35,18 +36,30 @@ _logger = logging.getLogger('pixeltable')
35
36
 
36
37
  class Table(SchemaObject):
37
38
  """
38
- Base class for table objects (base tables, views, snapshots).
39
-
40
- Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
41
- FileCache.emit_eviction_warnings() at the end of the operation.
39
+ A handle to a table, view, or snapshot. This class is the primary interface through which table operations
40
+ (queries, insertions, updates, etc.) are performed in Pixeltable.
42
41
  """
42
+ # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
43
+ # FileCache.emit_eviction_warnings() at the end of the operation.
43
44
 
44
45
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
45
46
  super().__init__(id, name, dir_id)
46
47
  self._is_dropped = False
47
48
  self._tbl_version_path = tbl_version_path
48
- from pixeltable.func import QueryTemplateFunction
49
- self._queries: dict[str, QueryTemplateFunction] = {}
49
+ self.__query_scope = self.QueryScope(self)
50
+
51
+ class QueryScope:
52
+ __table: 'Table'
53
+ _queries: dict[str, pxt.func.QueryTemplateFunction]
54
+
55
+ def __init__(self, table: 'Table') -> None:
56
+ self.__table = table
57
+ self._queries = {}
58
+
59
+ def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
60
+ if name in self._queries:
61
+ return self._queries[name]
62
+ raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
50
63
 
51
64
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
52
65
  super()._move(new_name, new_dir_id)
@@ -59,6 +72,29 @@ class Table(SchemaObject):
59
72
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
60
73
 
61
74
  def get_metadata(self) -> dict[str, Any]:
75
+ """
76
+ Retrieves metadata associated with this table.
77
+
78
+ Returns:
79
+ A dictionary containing the metadata, in the following format:
80
+
81
+ ```python
82
+ {
83
+ 'base': None, # If this is a view or snapshot, will contain the name of its base table
84
+ 'schema': {
85
+ 'col1': StringType(),
86
+ 'col2': IntType(),
87
+ },
88
+ 'version': 22,
89
+ 'schema_version': 1,
90
+ 'comment': '',
91
+ 'num_retained_versions': 10,
92
+ 'is_view': False,
93
+ 'is_snapshot': False,
94
+ 'media_validation': 'on_write',
95
+ }
96
+ ```
97
+ """
62
98
  md = super().get_metadata()
63
99
  md['base'] = self._base._path if self._base is not None else None
64
100
  md['schema'] = self._schema
@@ -66,6 +102,7 @@ class Table(SchemaObject):
66
102
  md['schema_version'] = self._tbl_version.schema_version
67
103
  md['comment'] = self._comment
68
104
  md['num_retained_versions'] = self._num_retained_versions
105
+ md['media_validation'] = self._media_validation.name.lower()
69
106
  return md
70
107
 
71
108
  @property
@@ -85,25 +122,24 @@ class Table(SchemaObject):
85
122
  if self._is_dropped:
86
123
  raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
87
124
 
88
- def __getattr__(
89
- self, name: str
90
- ) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
91
- """Return a ColumnRef or QueryTemplateFunction for the given name.
125
+ def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
126
+ """Return a ColumnRef for the given name.
92
127
  """
93
- if name in self._queries:
94
- return self._queries[name]
95
128
  return getattr(self._tbl_version_path, name)
96
129
 
97
- def __getitem__(
98
- self, index: object
99
- ) -> Union[
100
- 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.DataFrame'
101
- ]:
130
+ @overload
131
+ def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
132
+
133
+ @overload
134
+ def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
135
+
136
+ def __getitem__(self, index):
102
137
  """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
103
138
  """
104
- if isinstance(index, str) and index in self._queries:
105
- return self._queries[index]
106
- return self._tbl_version_path.__getitem__(index)
139
+ if isinstance(index, str):
140
+ return getattr(self, index)
141
+ else:
142
+ return self._df()[index]
107
143
 
108
144
  def list_views(self, *, recursive: bool = True) -> list[str]:
109
145
  """
@@ -112,6 +148,9 @@ class Table(SchemaObject):
112
148
  Args:
113
149
  recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
114
150
  all sub-views (including views of views, etc.)
151
+
152
+ Returns:
153
+ A list of view paths.
115
154
  """
116
155
  return [t._path for t in self._get_views(recursive=recursive)]
117
156
 
@@ -122,60 +161,55 @@ class Table(SchemaObject):
122
161
  else:
123
162
  return dependents
124
163
 
125
- def _df(self) -> 'pixeltable.dataframe.DataFrame':
164
+ def _df(self) -> 'pxt.dataframe.DataFrame':
126
165
  """Return a DataFrame for this table.
127
166
  """
128
167
  # local import: avoid circular imports
129
- from pixeltable.dataframe import DataFrame
130
- return DataFrame(self._tbl_version_path)
168
+ return pxt.DataFrame(self._tbl_version_path)
131
169
 
132
- def select(self, *items: Any, **named_items: Any) -> 'pixeltable.DataFrame':
170
+ @property
171
+ def queries(self) -> 'Table.QueryScope':
172
+ return self.__query_scope
173
+
174
+ def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
133
175
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
134
- # local import: avoid circular imports
135
- from pixeltable.dataframe import DataFrame
136
- return DataFrame(self._tbl_version_path).select(*items, **named_items)
176
+ return self._df().select(*items, **named_items)
137
177
 
138
- def where(self, pred: 'exprs.Expr') -> 'pixeltable.DataFrame':
178
+ def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
139
179
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
140
- # local import: avoid circular imports
141
- from pixeltable.dataframe import DataFrame
142
- return DataFrame(self._tbl_version_path).where(pred)
180
+ return self._df().where(pred)
143
181
 
144
- def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.DataFrame':
182
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
145
183
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
146
- # local import: avoid circular imports
147
- from pixeltable.dataframe import DataFrame
148
- return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
184
+ return self._df().order_by(*items, asc=asc)
149
185
 
150
- def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.DataFrame':
186
+ def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
151
187
  """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
152
- from pixeltable.dataframe import DataFrame
153
- return DataFrame(self._tbl_version_path).group_by(*items)
188
+ return self._df().group_by(*items)
154
189
 
155
- def limit(self, n: int) -> 'pixeltable.DataFrame':
156
- from pixeltable.dataframe import DataFrame
157
- return DataFrame(self._tbl_version_path).limit(n)
190
+ def limit(self, n: int) -> 'pxt.DataFrame':
191
+ return self._df().limit(n)
158
192
 
159
- def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
193
+ def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
160
194
  """Return rows from this table."""
161
195
  return self._df().collect()
162
196
 
163
197
  def show(
164
198
  self, *args, **kwargs
165
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
199
+ ) -> 'pxt.dataframe.DataFrameResultSet':
166
200
  """Return rows from this table.
167
201
  """
168
202
  return self._df().show(*args, **kwargs)
169
203
 
170
204
  def head(
171
205
  self, *args, **kwargs
172
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
206
+ ) -> 'pxt.dataframe.DataFrameResultSet':
173
207
  """Return the first n rows inserted into this table."""
174
208
  return self._df().head(*args, **kwargs)
175
209
 
176
210
  def tail(
177
211
  self, *args, **kwargs
178
- ) -> 'pixeltable.dataframe.DataFrameResultSet':
212
+ ) -> 'pxt.dataframe.DataFrameResultSet':
179
213
  """Return the last n rows inserted into this table."""
180
214
  return self._df().tail(*args, **kwargs)
181
215
 
@@ -191,7 +225,7 @@ class Table(SchemaObject):
191
225
  @property
192
226
  def _query_names(self) -> list[str]:
193
227
  """Return the names of the registered queries for this table."""
194
- return list(self._queries.keys())
228
+ return list(self.__query_scope._queries.keys())
195
229
 
196
230
  @property
197
231
  def _base(self) -> Optional['Table']:
@@ -212,11 +246,15 @@ class Table(SchemaObject):
212
246
  def _num_retained_versions(self):
213
247
  return self._tbl_version.num_retained_versions
214
248
 
249
+ @property
250
+ def _media_validation(self) -> MediaValidation:
251
+ return self._tbl_version.media_validation
252
+
215
253
  def _description(self) -> pd.DataFrame:
216
254
  cols = self._tbl_version_path.columns()
217
255
  df = pd.DataFrame({
218
256
  'Column Name': [c.name for c in cols],
219
- 'Type': [str(c.col_type) for c in cols],
257
+ 'Type': [c.col_type._to_str(as_schema=True) for c in cols],
220
258
  'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
221
259
  })
222
260
  return df
@@ -239,7 +277,7 @@ class Table(SchemaObject):
239
277
  from IPython.display import display
240
278
  display(self._description_html())
241
279
  else:
242
- print(self.__repr__())
280
+ print(repr(self))
243
281
 
244
282
  # TODO: Display comments in _repr_html()
245
283
  def __repr__(self) -> str:
@@ -267,75 +305,77 @@ class Table(SchemaObject):
267
305
  """Return a PyTorch Dataset for this table.
268
306
  See DataFrame.to_pytorch_dataset()
269
307
  """
270
- from pixeltable.dataframe import DataFrame
271
- return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
308
+ return self._df().to_pytorch_dataset(image_format=image_format)
272
309
 
273
310
  def to_coco_dataset(self) -> Path:
274
311
  """Return the path to a COCO json file for this table.
275
312
  See DataFrame.to_coco_dataset()
276
313
  """
277
- from pixeltable.dataframe import DataFrame
278
- return DataFrame(self._tbl_version_path).to_coco_dataset()
314
+ return self._df().to_coco_dataset()
279
315
 
280
316
  def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
281
317
  """
282
318
  Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
283
319
 
284
- >>> tbl['new_col'] = IntType()
320
+ >>> tbl['new_col'] = pxt.Int
285
321
 
286
322
  is exactly equivalent to
287
323
 
288
- >>> tbl.add_column(new_col=IntType())
324
+ >>> tbl.add_column(new_col=pxt.Int)
289
325
 
290
326
  For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
291
327
  """
292
328
  if not isinstance(col_name, str):
293
329
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
294
- if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
295
- raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
296
- self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
330
+ if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
331
+ raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
332
+ self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
297
333
 
298
334
  def add_column(
299
335
  self,
300
336
  *,
301
- type: Optional[ts.ColumnType] = None,
337
+ type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
302
338
  stored: Optional[bool] = None,
303
339
  print_stats: bool = False,
304
- **kwargs: Union[ts.ColumnType, exprs.Expr, Callable]
340
+ on_error: Literal['abort', 'ignore'] = 'abort',
341
+ **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
305
342
  ) -> UpdateStatus:
306
343
  """
307
344
  Adds a column to the table.
308
345
 
309
346
  Args:
310
- kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
311
- type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
347
+ kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
348
+ type: The type of the column. Only valid and required if `value-expression` is a Callable.
312
349
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
313
- print_stats: If ``True``, print execution metrics.
350
+ print_stats: If `True`, print execution metrics during evaluation.
351
+ on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
352
+ row.
353
+
354
+ - If `on_error='abort'`, then an exception will be raised and the column will not be added.
355
+ - If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
356
+ with errors will have a `None` value for the column, with information about the error stored in the
357
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
314
358
 
315
359
  Returns:
316
- execution status
360
+ Information about the execution status of the operation.
317
361
 
318
362
  Raises:
319
363
  Error: If the column name is invalid or already exists.
320
364
 
321
365
  Examples:
322
- Add an int column with ``None`` values:
366
+ Add an int column:
323
367
 
324
- >>> tbl.add_column(new_col=IntType())
368
+ >>> tbl.add_column(new_col=pxt.Int)
325
369
 
326
370
  Alternatively, this can also be expressed as:
327
371
 
328
- >>> tbl['new_col'] = IntType()
372
+ >>> tbl['new_col'] = pxt.Int
329
373
 
330
- For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
374
+ For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
331
375
  the parameters of the Callable must correspond to existing column names (the column values are then passed
332
376
  as arguments to the Callable). In this case, the column type needs to be specified explicitly:
333
377
 
334
- >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
335
-
336
- Alternatively, this can also be expressed as:
337
-
338
- >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
378
+ >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
339
379
 
340
380
  For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
341
381
  90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
@@ -347,13 +387,9 @@ class Table(SchemaObject):
347
387
 
348
388
  >>> tbl['rotated'] = tbl.frame.rotate(90)
349
389
 
350
- Do the same, but now the column is stored:
351
-
352
- >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
390
+ Do the same, but now the column is unstored:
353
391
 
354
- Alternatively, this can also be expressed as:
355
-
356
- >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
392
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
357
393
  """
358
394
  self._check_is_dropped()
359
395
  # verify kwargs and construct column schema dict
@@ -365,26 +401,25 @@ class Table(SchemaObject):
365
401
  col_name, spec = next(iter(kwargs.items()))
366
402
  if not is_valid_identifier(col_name):
367
403
  raise excs.Error(f'Invalid column name: {col_name!r}')
368
- if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
404
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
369
405
  raise excs.Error(f'add_column(): keyword argument "type" is redundant')
370
406
 
371
407
  col_schema: dict[str, Any] = {}
372
- if isinstance(spec, ts.ColumnType):
373
- col_schema['type'] = spec
408
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
409
+ col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
374
410
  else:
375
411
  col_schema['value'] = spec
376
412
  if type is not None:
377
- col_schema['type'] = type
413
+ col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
378
414
  if stored is not None:
379
415
  col_schema['stored'] = stored
380
416
 
381
417
  new_col = self._create_columns({col_name: col_schema})[0]
382
418
  self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
383
- status = self._tbl_version.add_column(new_col, print_stats=print_stats)
419
+ status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
384
420
  FileCache.get().emit_eviction_warnings()
385
421
  return status
386
422
 
387
-
388
423
  @classmethod
389
424
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
390
425
  """Check integrity of user-supplied Column spec
@@ -393,7 +428,7 @@ class Table(SchemaObject):
393
428
  (on account of containing Python Callables or Exprs).
394
429
  """
395
430
  assert isinstance(spec, dict)
396
- valid_keys = {'type', 'value', 'stored'}
431
+ valid_keys = {'type', 'value', 'stored', 'media_validation'}
397
432
  has_type = False
398
433
  for k in spec.keys():
399
434
  if k not in valid_keys:
@@ -401,8 +436,8 @@ class Table(SchemaObject):
401
436
 
402
437
  if 'type' in spec:
403
438
  has_type = True
404
- if not isinstance(spec['type'], ts.ColumnType):
405
- raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
439
+ if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
440
+ raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
406
441
 
407
442
  if 'value' in spec:
408
443
  value_spec = spec['value']
@@ -420,6 +455,9 @@ class Table(SchemaObject):
420
455
  if 'type' in spec:
421
456
  raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
422
457
 
458
+ if 'media_validation' in spec:
459
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
460
+
423
461
  if 'stored' in spec and not isinstance(spec['stored'], bool):
424
462
  raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
425
463
  if not has_type:
@@ -433,31 +471,38 @@ class Table(SchemaObject):
433
471
  col_type: Optional[ts.ColumnType] = None
434
472
  value_expr: Optional[exprs.Expr] = None
435
473
  primary_key: Optional[bool] = None
474
+ media_validation: Optional[catalog.MediaValidation] = None
436
475
  stored = True
437
476
 
438
- if isinstance(spec, ts.ColumnType):
439
- # TODO: create copy
440
- col_type = spec
477
+ if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
478
+ col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
441
479
  elif isinstance(spec, exprs.Expr):
442
480
  # create copy so we can modify it
443
481
  value_expr = spec.copy()
444
482
  elif callable(spec):
445
- raise excs.Error((
483
+ raise excs.Error(
446
484
  f'Column {name} computed with a Callable: specify using a dictionary with '
447
- f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
448
- ))
485
+ f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
486
+ )
449
487
  elif isinstance(spec, dict):
450
488
  cls._validate_column_spec(name, spec)
451
- col_type = spec.get('type')
489
+ if 'type' in spec:
490
+ col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
452
491
  value_expr = spec.get('value')
453
492
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
454
493
  # create copy so we can modify it
455
494
  value_expr = value_expr.copy()
456
495
  stored = spec.get('stored', True)
457
496
  primary_key = spec.get('primary_key')
497
+ media_validation_str = spec.get('media_validation')
498
+ media_validation = (
499
+ catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
500
+ else None
501
+ )
458
502
 
459
503
  column = Column(
460
- name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key)
504
+ name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
505
+ media_validation=media_validation)
461
506
  columns.append(column)
462
507
  return columns
463
508
 
@@ -496,12 +541,13 @@ class Table(SchemaObject):
496
541
  name: The name of the column to drop.
497
542
 
498
543
  Raises:
499
- Error: If the column does not exist or if it is referenced by a computed column.
544
+ Error: If the column does not exist or if it is referenced by a dependent computed column.
500
545
 
501
546
  Examples:
502
- Drop column ``factorial``:
547
+ Drop the column `col` from the table `my_table`:
503
548
 
504
- >>> tbl.drop_column('factorial')
549
+ >>> tbl = pxt.get_table('my_table')
550
+ ... tbl.drop_column('col')
505
551
  """
506
552
  self._check_is_dropped()
507
553
 
@@ -544,43 +590,59 @@ class Table(SchemaObject):
544
590
  new_name: The new name of the column.
545
591
 
546
592
  Raises:
547
- Error: If the column does not exist or if the new name is invalid or already exists.
593
+ Error: If the column does not exist, or if the new name is invalid or already exists.
548
594
 
549
595
  Examples:
550
- Rename column ``factorial`` to ``fac``:
596
+ Rename the column `col1` to `col2` of the table `my_table`:
551
597
 
552
- >>> tbl.rename_column('factorial', 'fac')
598
+ >>> tbl = pxt.get_table('my_table')
599
+ ... tbl.rename_column('col1', 'col2')
553
600
  """
554
601
  self._check_is_dropped()
555
602
  self._tbl_version.rename_column(old_name, new_name)
556
603
 
557
604
  def add_embedding_index(
558
605
  self, col_name: str, *, idx_name: Optional[str] = None,
559
- string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
606
+ string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
560
607
  metric: str = 'cosine'
561
608
  ) -> None:
562
- """Add an index to the table.
609
+ """
610
+ Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
611
+ rows are inserted into the table.
612
+
613
+ Indices are currently supported only for `String` and `Image` columns. The index must specify, at
614
+ minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
615
+ and image embedding (into the same vector space); in particular, this can be used to provide similarity search
616
+ of text over an image column.
563
617
 
564
618
  Args:
565
- col_name: name of column to index
566
- idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
567
- string_embed: function to embed text; required if the column is a text column
568
- image_embed: function to embed images; required if the column is an image column
569
- metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
619
+ col_name: The name of column to index; must be a `String` or `Image` column.
620
+ idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
621
+ If specified, the name must be unique for this table.
622
+ string_embed: A function to embed text; required if the column is a `String` column.
623
+ image_embed: A function to embed images; required if the column is an `Image` column.
624
+ metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
625
+ the default is `'cosine'`.
570
626
 
571
627
  Raises:
572
- Error: If an index with that name already exists for the table or if the column does not exist.
628
+ Error: If an index with that name already exists for the table, or if the specified column does not exist.
573
629
 
574
630
  Examples:
575
- Add an index to the ``img`` column:
631
+ Add an index to the `img` column of the table `my_table`:
576
632
 
577
- >>> tbl.add_embedding_index('img', image_embed=...)
633
+ >>> tbl = pxt.get_table('my_table')
634
+ ... tbl.add_embedding_index('img', image_embed=my_image_func)
578
635
 
579
- Add another index to the ``img`` column, using the inner product as the distance metric,
580
- and with a specific name; ``string_embed`` is also specified in order to search with text:
636
+ Add another index to the `img` column, using the inner product as the distance metric,
637
+ and with a specific name; `string_embed` is also specified in order to search with text:
581
638
 
582
639
  >>> tbl.add_embedding_index(
583
- 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
640
+ ... 'img',
641
+ ... idx_name='clip_idx',
642
+ ... image_embed=my_image_func,
643
+ ... string_embed=my_string_func,
644
+ ... metric='ip'
645
+ ... )
584
646
  """
585
647
  if self._tbl_version_path.is_snapshot():
586
648
  raise excs.Error('Cannot add an index to a snapshot')
@@ -599,37 +661,50 @@ class Table(SchemaObject):
599
661
  FileCache.get().emit_eviction_warnings()
600
662
 
601
663
  def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
602
- """Drop an embedding index from the table.
664
+ """
665
+ Drop an embedding index from the table. Either a column name or an index name (but not both) must be
666
+ specified. If a column name is specified, it must be a column containing exactly one embedding index;
667
+ otherwise the specific index name must be provided instead.
603
668
 
604
669
  Args:
605
- column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
670
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
606
671
  embedding indices.
607
672
  idx_name: The name of the index to drop.
608
673
 
609
674
  Raises:
610
- Error: If the index does not exist.
675
+ Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
676
+ indices or multiple embedding indices.
677
+ Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
611
678
 
612
679
  Examples:
613
- Drop embedding index on the ``img`` column:
680
+ Drop the embedding index on the `img` column of the table `my_table`:
614
681
 
615
- >>> tbl.drop_embedding_index(column_name='img')
682
+ >>> tbl = pxt.get_table('my_table')
683
+ ... tbl.drop_embedding_index(column_name='img')
616
684
  """
617
685
  self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
618
686
 
619
687
  def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
620
- """Drop an index from the table.
688
+ """
689
+ Drop an index from the table. Either a column name or an index name (but not both) must be
690
+ specified. If a column name is specified, it must be a column containing exactly one index;
691
+ otherwise the specific index name must be provided instead.
621
692
 
622
693
  Args:
623
- column_name: The name of the column whose index to drop. Invalid if the column has multiple indices.
694
+ column_name: The name of the column from which to drop the index. Invalid if the column has multiple
695
+ indices.
624
696
  idx_name: The name of the index to drop.
625
697
 
626
698
  Raises:
627
- Error: If the index does not exist.
699
+ Error: If `column_name` is specified, but the column does not exist, or it contains no
700
+ indices or multiple indices.
701
+ Error: If `idx_name` is specified, but the index does not exist.
628
702
 
629
703
  Examples:
630
- Drop index on the ``img`` column:
704
+ Drop the index on the `img` column of the table `my_table`:
631
705
 
632
- >>> tbl.drop_index(column_name='img')
706
+ >>> tbl = pxt.get_table('my_table')
707
+ ... tbl.drop_index(column_name='img')
633
708
  """
634
709
  self._drop_index(column_name=column_name, idx_name=idx_name)
635
710
 
@@ -682,7 +757,7 @@ class Table(SchemaObject):
682
757
  To insert multiple rows at a time:
683
758
  ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
684
759
 
685
- To insert just a single row, you can use the more convenient syntax:
760
+ To insert just a single row, you can use the more concise syntax:
686
761
  ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
687
762
 
688
763
  Args:
@@ -698,24 +773,31 @@ class Table(SchemaObject):
698
773
  If ``True``, raise an exception that aborts the insert.
699
774
 
700
775
  Returns:
701
- execution status
776
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
702
777
 
703
778
  Raises:
704
- Error: if a row does not match the table schema or contains values for computed columns
779
+ Error: If one of the following conditions occurs:
780
+
781
+ - The table is a view or snapshot.
782
+ - The table has been dropped.
783
+ - One of the rows being inserted does not conform to the table schema.
784
+ - An error occurs during processing of computed columns, and `fail_on_exception=True`.
705
785
 
706
786
  Examples:
707
- Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
787
+ Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
788
+ Column ``c`` is nullable:
708
789
 
709
- >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
790
+ >>> tbl = pxt.get_table('my_table')
791
+ ... tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
710
792
 
711
- Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
793
+ Insert a single row using the alternative syntax:
712
794
 
713
- >>> tbl.insert(a=1, b=1, c=1)
795
+ >>> tbl.insert(a=3, b=3, c=3)
714
796
  """
715
797
  raise NotImplementedError
716
798
 
717
799
  def update(
718
- self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
800
+ self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
719
801
  ) -> UpdateStatus:
720
802
  """Update rows in this table.
721
803
 
@@ -807,7 +889,7 @@ class Table(SchemaObject):
807
889
  FileCache.get().emit_eviction_warnings()
808
890
  return status
809
891
 
810
- def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
892
+ def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
811
893
  """Delete rows in this table.
812
894
 
813
895
  Args:
@@ -836,17 +918,17 @@ class Table(SchemaObject):
836
918
  self._tbl_version.revert()
837
919
 
838
920
  @overload
839
- def query(self, py_fn: Callable) -> 'pixeltable.func.QueryTemplateFunction': ...
921
+ def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
840
922
 
841
923
  @overload
842
924
  def query(
843
925
  self, *, param_types: Optional[list[ts.ColumnType]] = None
844
- ) -> Callable[[Callable], 'pixeltable.func.QueryTemplateFunction']: ...
926
+ ) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
845
927
 
846
928
  def query(self, *args: Any, **kwargs: Any) -> Any:
847
929
  def make_query_template(
848
930
  py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
849
- ) -> 'pixeltable.func.QueryTemplateFunction':
931
+ ) -> 'pxt.func.QueryTemplateFunction':
850
932
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
851
933
  # this is a named function in a module
852
934
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
@@ -855,12 +937,11 @@ class Table(SchemaObject):
855
937
  query_name = py_fn.__name__
856
938
  if query_name in self._schema.keys():
857
939
  raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
858
- if query_name in self._queries:
940
+ if query_name in self.__query_scope._queries and function_path is not None:
859
941
  raise excs.Error(f'Duplicate query name: {query_name!r}')
860
- import pixeltable.func as func
861
- query_fn = func.QueryTemplateFunction.create(
942
+ query_fn = pxt.func.QueryTemplateFunction.create(
862
943
  py_fn, param_types=param_types, path=function_path, name=query_name)
863
- self._queries[query_name] = query_fn
944
+ self.__query_scope._queries[query_name] = query_fn
864
945
  return query_fn
865
946
 
866
947
  # TODO: verify that the inferred return type matches that of the template
@@ -877,7 +958,7 @@ class Table(SchemaObject):
877
958
  def external_stores(self) -> list[str]:
878
959
  return list(self._tbl_version.external_stores.keys())
879
960
 
880
- def _link_external_store(self, store: 'pixeltable.io.ExternalStore') -> None:
961
+ def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
881
962
  """
882
963
  Links the specified `ExternalStore` to this table.
883
964
  """
@@ -932,7 +1013,7 @@ class Table(SchemaObject):
932
1013
  *,
933
1014
  export_data: bool = True,
934
1015
  import_data: bool = True
935
- ) -> 'pixeltable.io.SyncStatus':
1016
+ ) -> 'pxt.io.SyncStatus':
936
1017
  """
937
1018
  Synchronizes this table with its linked external stores.
938
1019
 
@@ -954,9 +1035,7 @@ class Table(SchemaObject):
954
1035
  if store not in all_stores:
955
1036
  raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
956
1037
 
957
- from pixeltable.io import SyncStatus
958
-
959
- sync_status = SyncStatus.empty()
1038
+ sync_status = pxt.io.SyncStatus.empty()
960
1039
  for store in stores:
961
1040
  store_obj = self._tbl_version.external_stores[store]
962
1041
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)