pixeltable 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (40) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +8 -22
  4. pixeltable/catalog/insertable_table.py +26 -8
  5. pixeltable/catalog/table.py +179 -83
  6. pixeltable/catalog/table_version.py +13 -39
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/catalog/view.py +2 -2
  9. pixeltable/dataframe.py +20 -28
  10. pixeltable/env.py +2 -0
  11. pixeltable/exec/cache_prefetch_node.py +189 -43
  12. pixeltable/exec/data_row_batch.py +3 -3
  13. pixeltable/exec/exec_context.py +2 -2
  14. pixeltable/exec/exec_node.py +2 -2
  15. pixeltable/exec/expr_eval_node.py +8 -8
  16. pixeltable/exprs/arithmetic_expr.py +9 -4
  17. pixeltable/exprs/column_ref.py +4 -0
  18. pixeltable/exprs/comparison.py +5 -0
  19. pixeltable/exprs/json_path.py +1 -1
  20. pixeltable/func/aggregate_function.py +8 -8
  21. pixeltable/func/expr_template_function.py +6 -5
  22. pixeltable/func/udf.py +6 -11
  23. pixeltable/functions/huggingface.py +145 -25
  24. pixeltable/functions/llama_cpp.py +3 -2
  25. pixeltable/functions/mistralai.py +1 -1
  26. pixeltable/functions/openai.py +1 -1
  27. pixeltable/functions/together.py +1 -1
  28. pixeltable/functions/util.py +5 -2
  29. pixeltable/globals.py +55 -6
  30. pixeltable/plan.py +1 -1
  31. pixeltable/tool/create_test_db_dump.py +1 -1
  32. pixeltable/type_system.py +83 -35
  33. pixeltable/utils/coco.py +5 -5
  34. pixeltable/utils/formatter.py +3 -3
  35. pixeltable/utils/s3.py +6 -3
  36. {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/METADATA +119 -46
  37. {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/RECORD +40 -40
  38. {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/LICENSE +0 -0
  39. {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/WHEEL +0 -0
  40. {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Aggregator, Function, expr_udf, uda, udf
6
- from .globals import (array, configure_logging, create_dir, create_table, create_view, drop_dir, drop_table, get_table,
7
- init, list_dirs, list_functions, list_tables, move)
6
+ from .globals import (array, configure_logging, create_dir, create_snapshot, create_table, create_view, drop_dir,
7
+ drop_table, get_table, init, list_dirs, list_functions, list_tables, move)
8
8
  from .type_system import (Array, ArrayType, Audio, AudioType, Bool, BoolType, ColumnType, Document, DocumentType, Float,
9
9
  FloatType, Image, ImageType, Int, IntType, Json, JsonType, Required, String, StringType,
10
10
  Timestamp, TimestampType, Video, VideoType)
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.22"
3
- __version_tuple__ = (0, 2, 22)
2
+ __version__ = "0.2.24"
3
+ __version_tuple__ = (0, 2, 24)
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -35,7 +35,6 @@ class Column:
35
35
  sa_col_type: Optional[sql.sqltypes.TypeEngine]
36
36
  sa_errormsg_col: Optional[sql.schema.Column]
37
37
  sa_errortype_col: Optional[sql.schema.Column]
38
- compute_func: Optional[Callable]
39
38
  _value_expr: Optional[exprs.Expr]
40
39
  value_expr_dict: Optional[dict[str, Any]]
41
40
  dependent_cols: set[Column]
@@ -43,7 +42,7 @@ class Column:
43
42
 
44
43
  def __init__(
45
44
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
46
- computed_with: Optional[Union[exprs.Expr, Callable]] = None,
45
+ computed_with: Optional[exprs.Expr] = None,
47
46
  is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
48
47
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
49
48
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
@@ -54,7 +53,7 @@ class Column:
54
53
  Args:
55
54
  name: column name; None for system columns (eg, index columns)
56
55
  col_type: column type; can be None if the type can be derived from ``computed_with``
57
- computed_with: a callable or an Expr object that computes the column value
56
+ computed_with: an Expr that computes the column value
58
57
  is_pk: if True, this column is part of the primary key
59
58
  stored: determines whether a computed column is present in the stored table or recomputed on demand
60
59
  col_id: column ID (only used internally)
@@ -64,11 +63,6 @@ class Column:
64
63
  col_type is None
65
64
  - when loaded from md store: ``computed_with`` is set and col_type is set
66
65
 
67
- ``computed_with`` is a Callable:
68
- - the callable's parameter names must correspond to existing columns in the table for which this Column
69
- is being used
70
- - ``col_type`` needs to be set to the callable's return type
71
-
72
66
  ``stored`` (only valid for computed image columns):
73
67
  - if True: the column is present in the stored table
74
68
  - if False: the column is not present in the stored table and recomputed during a query
@@ -80,22 +74,14 @@ class Column:
80
74
  if col_type is None and computed_with is None:
81
75
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
82
76
 
83
- self._value_expr = None
84
- self.compute_func = None
77
+ self._value_expr: Optional[exprs.Expr] = None
85
78
  self.value_expr_dict = value_expr_dict
86
79
  if computed_with is not None:
87
80
  value_expr = exprs.Expr.from_object(computed_with)
88
81
  if value_expr is None:
89
- # computed_with needs to be a Callable
90
- if not callable(computed_with):
91
- raise excs.Error(
92
- f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
93
- f'but it is a {type(computed_with)}')
94
- if col_type is None:
95
- raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
96
- # we need to turn the computed_with function into an Expr, but this requires resolving
97
- # column name references and for that we need to wait until we're assigned to a Table
98
- self.compute_func = computed_with
82
+ raise excs.Error(
83
+ f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
84
+ f'but it is a {type(computed_with)}')
99
85
  else:
100
86
  self._value_expr = value_expr.copy()
101
87
  self.col_type = self._value_expr.col_type
@@ -158,7 +144,7 @@ class Column:
158
144
 
159
145
  @property
160
146
  def is_computed(self) -> bool:
161
- return self.compute_func is not None or self._value_expr is not None or self.value_expr_dict is not None
147
+ return self._value_expr is not None or self.value_expr_dict is not None
162
148
 
163
149
  @property
164
150
  def is_stored(self) -> bool:
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Dict, Iterable, List, Optional, overload
4
+ from typing import Any, Iterable, Literal, Optional, overload
5
5
  from uuid import UUID
6
6
 
7
7
  import sqlalchemy.orm as orm
@@ -13,7 +13,7 @@ from pixeltable.env import Env
13
13
  from pixeltable.utils.filecache import FileCache
14
14
 
15
15
  from .catalog import Catalog
16
- from .globals import UpdateStatus, MediaValidation
16
+ from .globals import MediaValidation, UpdateStatus
17
17
  from .table import Table
18
18
  from .table_version import TableVersion
19
19
  from .table_version_path import TableVersionPath
@@ -36,7 +36,7 @@ class InsertableTable(Table):
36
36
  @classmethod
37
37
  def _create(
38
38
  cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
39
- primary_key: List[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
39
+ primary_key: list[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
40
40
  ) -> InsertableTable:
41
41
  columns = cls._create_columns(schema)
42
42
  cls._verify_schema(columns)
@@ -79,15 +79,31 @@ class InsertableTable(Table):
79
79
 
80
80
  @overload
81
81
  def insert(
82
- self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
82
+ self,
83
+ rows: Iterable[dict[str, Any]],
84
+ /,
85
+ *,
86
+ print_stats: bool = False,
87
+ on_error: Literal['abort', 'ignore'] = 'abort'
83
88
  ) -> UpdateStatus: ...
84
89
 
85
90
  @overload
86
- def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
91
+ def insert(
92
+ self,
93
+ *,
94
+ print_stats: bool = False,
95
+ on_error: Literal['abort', 'ignore'] = 'abort',
96
+ **kwargs: Any
97
+ ) -> UpdateStatus: ...
87
98
 
88
99
  def insert( # type: ignore[misc]
89
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
90
- fail_on_exception: bool = True, **kwargs: Any
100
+ self,
101
+ rows: Optional[Iterable[dict[str, Any]]] = None,
102
+ /,
103
+ *,
104
+ print_stats: bool = False,
105
+ on_error: Literal['abort', 'ignore'] = 'abort',
106
+ **kwargs: Any
91
107
  ) -> UpdateStatus:
92
108
  if rows is None:
93
109
  rows = [kwargs]
@@ -96,6 +112,8 @@ class InsertableTable(Table):
96
112
  if len(kwargs) > 0:
97
113
  raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
98
114
 
115
+ fail_on_exception = on_error == 'abort'
116
+
99
117
  if not isinstance(rows, list):
100
118
  raise excs.Error('rows must be a list of dictionaries')
101
119
  if len(rows) == 0:
@@ -121,7 +139,7 @@ class InsertableTable(Table):
121
139
  FileCache.get().emit_eviction_warnings()
122
140
  return status
123
141
 
124
- def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
142
+ def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
125
143
  """Verify that the input rows match the table schema"""
126
144
  valid_col_names = set(self._schema.keys())
127
145
  reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
@@ -6,7 +6,7 @@ import json
6
6
  import logging
7
7
  from pathlib import Path
8
8
  from typing import _GenericAlias # type: ignore[attr-defined]
9
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
10
10
  from uuid import UUID
11
11
 
12
12
  import pandas as pd
@@ -125,7 +125,7 @@ class Table(SchemaObject):
125
125
  def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
126
126
  """Return a ColumnRef for the given name.
127
127
  """
128
- return getattr(self._tbl_version_path, name)
128
+ return self._tbl_version_path.get_column_ref(name)
129
129
 
130
130
  @overload
131
131
  def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
@@ -217,6 +217,12 @@ class Table(SchemaObject):
217
217
  """Return the number of rows in this table."""
218
218
  return self._df().count()
219
219
 
220
+ @property
221
+ def columns(self) -> list[str]:
222
+ """Return the names of the columns in this table. """
223
+ cols = self._tbl_version_path.columns()
224
+ return [c.name for c in cols]
225
+
220
226
  @property
221
227
  def _schema(self) -> dict[str, ts.ColumnType]:
222
228
  """Return the schema (column names and column types) of this table."""
@@ -250,7 +256,7 @@ class Table(SchemaObject):
250
256
  def _media_validation(self) -> MediaValidation:
251
257
  return self._tbl_version.media_validation
252
258
 
253
- def _description(self) -> pd.DataFrame:
259
+ def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
254
260
  cols = self._tbl_version_path.columns()
255
261
  df = pd.DataFrame({
256
262
  'Column Name': [c.name for c in cols],
@@ -259,8 +265,8 @@ class Table(SchemaObject):
259
265
  })
260
266
  return df
261
267
 
262
- def _description_html(self) -> pandas.io.formats.style.Styler:
263
- pd_df = self._description()
268
+ def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
269
+ pd_df = self._description(cols)
264
270
  # white-space: pre-wrap: print \n as newline
265
271
  # th: center-align headings
266
272
  return (
@@ -329,30 +335,74 @@ class Table(SchemaObject):
329
335
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
330
336
  if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
331
337
  raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
332
- self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
338
+ self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
339
+
340
+ def add_columns(
341
+ self,
342
+ schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
343
+ ) -> UpdateStatus:
344
+ """
345
+ Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
346
+ use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
347
+
348
+ The format of the `schema` argument is identical to the format of the schema in a call to
349
+ [`create_table()`][pixeltable.globals.create_table].
350
+
351
+ Args:
352
+ schema: A dictionary mapping column names to types.
353
+
354
+ Returns:
355
+ Information about the execution status of the operation.
356
+
357
+ Raises:
358
+ Error: If any column name is invalid or already exists.
359
+
360
+ Examples:
361
+ Add multiple columns to the table `my_table`:
362
+
363
+ >>> tbl = pxt.get_table('my_table')
364
+ ... schema = {
365
+ ... 'new_col_1': pxt.Int,
366
+ ... 'new_col_2': pxt.String,
367
+ ... }
368
+ ... tbl.add_columns(schema)
369
+ """
370
+ self._check_is_dropped()
371
+ col_schema = {
372
+ col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
373
+ for col_name, spec in schema.items()
374
+ }
375
+ new_cols = self._create_columns(col_schema)
376
+ for new_col in new_cols:
377
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
378
+ status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
379
+ FileCache.get().emit_eviction_warnings()
380
+ return status
333
381
 
382
+ # TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
383
+ # will be supported only through add_computed_column(). At that point, we can remove the `stored`,
384
+ # `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
385
+ # simplifying the code. For the time being, there's some obvious code duplication.
334
386
  def add_column(
335
- self,
336
- *,
337
- type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
338
- stored: Optional[bool] = None,
339
- print_stats: bool = False,
340
- on_error: Literal['abort', 'ignore'] = 'abort',
341
- **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
387
+ self,
388
+ *,
389
+ stored: Optional[bool] = None,
390
+ print_stats: bool = False,
391
+ on_error: Literal['abort', 'ignore'] = 'abort',
392
+ **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
342
393
  ) -> UpdateStatus:
343
394
  """
344
395
  Adds a column to the table.
345
396
 
346
397
  Args:
347
- kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
348
- type: The type of the column. Only valid and required if `value-expression` is a Callable.
398
+ kwargs: Exactly one keyword argument of the form `col_name=col_type`.
349
399
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
350
400
  print_stats: If `True`, print execution metrics during evaluation.
351
401
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
352
402
  row.
353
403
 
354
- - If `on_error='abort'`, then an exception will be raised and the column will not be added.
355
- - If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
404
+ - `'abort'`: an exception will be raised and the column will not be added.
405
+ - `'ignore'`: execution will continue and the column will be added. Any rows
356
406
  with errors will have a `None` value for the column, with information about the error stored in the
357
407
  corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
358
408
 
@@ -370,53 +420,79 @@ class Table(SchemaObject):
370
420
  Alternatively, this can also be expressed as:
371
421
 
372
422
  >>> tbl['new_col'] = pxt.Int
423
+ """
424
+ self._check_is_dropped()
425
+ # verify kwargs and construct column schema dict
426
+ if len(kwargs) != 1:
427
+ raise excs.Error(
428
+ f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
429
+ f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
430
+ )
431
+ col_name, spec = next(iter(kwargs.items()))
432
+ if not is_valid_identifier(col_name):
433
+ raise excs.Error(f'Invalid column name: {col_name!r}')
434
+
435
+ col_schema: dict[str, Any] = {}
436
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
437
+ col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
438
+ else:
439
+ col_schema['value'] = spec
440
+ if stored is not None:
441
+ col_schema['stored'] = stored
442
+
443
+ new_col = self._create_columns({col_name: col_schema})[0]
444
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
445
+ status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
446
+ FileCache.get().emit_eviction_warnings()
447
+ return status
373
448
 
374
- For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
375
- the parameters of the Callable must correspond to existing column names (the column values are then passed
376
- as arguments to the Callable). In this case, the column type needs to be specified explicitly:
449
+ def add_computed_column(
450
+ self,
451
+ *,
452
+ stored: Optional[bool] = None,
453
+ print_stats: bool = False,
454
+ on_error: Literal['abort', 'ignore'] = 'abort',
455
+ **kwargs: exprs.Expr
456
+ ) -> UpdateStatus:
457
+ """
458
+ Adds a computed column to the table.
377
459
 
378
- >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
460
+ Args:
461
+ kwargs: Exactly one keyword argument of the form `col_name=expression`.
379
462
 
380
- For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
381
- 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
382
- (by default, computed image columns are not stored but recomputed on demand):
463
+ Returns:
464
+ Information about the execution status of the operation.
383
465
 
384
- >>> tbl.add_column(rotated=tbl.frame.rotate(90))
466
+ Raises:
467
+ Error: If the column name is invalid or already exists.
385
468
 
386
- Alternatively, this can also be expressed as:
469
+ Examples:
470
+ For a table with an image column `frame`, add an image column `rotated` that rotates the image by
471
+ 90 degrees:
387
472
 
388
- >>> tbl['rotated'] = tbl.frame.rotate(90)
473
+ >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90))
389
474
 
390
475
  Do the same, but now the column is unstored:
391
476
 
392
- >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
477
+ >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
393
478
  """
394
479
  self._check_is_dropped()
395
- # verify kwargs and construct column schema dict
396
480
  if len(kwargs) != 1:
397
481
  raise excs.Error(
398
- f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
399
- f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
482
+ f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
483
+ f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
400
484
  )
401
485
  col_name, spec = next(iter(kwargs.items()))
402
486
  if not is_valid_identifier(col_name):
403
487
  raise excs.Error(f'Invalid column name: {col_name!r}')
404
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
405
- raise excs.Error(f'add_column(): keyword argument "type" is redundant')
406
488
 
407
- col_schema: dict[str, Any] = {}
408
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
409
- col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
410
- else:
411
- col_schema['value'] = spec
412
- if type is not None:
413
- col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
489
+ col_schema: dict[str, Any] = {'value': spec}
414
490
  if stored is not None:
415
491
  col_schema['stored'] = stored
416
492
 
417
493
  new_col = self._create_columns({col_name: col_schema})[0]
418
494
  self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
419
- status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
495
+ status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
420
496
  FileCache.get().emit_eviction_warnings()
421
497
  return status
422
498
 
@@ -429,39 +505,29 @@ class Table(SchemaObject):
429
505
  """
430
506
  assert isinstance(spec, dict)
431
507
  valid_keys = {'type', 'value', 'stored', 'media_validation'}
432
- has_type = False
433
508
  for k in spec.keys():
434
509
  if k not in valid_keys:
435
510
  raise excs.Error(f'Column {name}: invalid key {k!r}')
436
511
 
512
+ if 'type' not in spec and 'value' not in spec:
513
+ raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
514
+
437
515
  if 'type' in spec:
438
- has_type = True
439
516
  if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
440
517
  raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
441
518
 
442
519
  if 'value' in spec:
443
- value_spec = spec['value']
444
- value_expr = exprs.Expr.from_object(value_spec)
520
+ value_expr = exprs.Expr.from_object(spec['value'])
445
521
  if value_expr is None:
446
- # needs to be a Callable
447
- if not callable(value_spec):
448
- raise excs.Error(
449
- f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
450
- f'but it is a {type(value_spec)}')
451
- if 'type' not in spec:
452
- raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
453
- else:
454
- has_type = True
455
- if 'type' in spec:
456
- raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
522
+ raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
523
+ if 'type' in spec:
524
+ raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
457
525
 
458
526
  if 'media_validation' in spec:
459
527
  _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
460
528
 
461
529
  if 'stored' in spec and not isinstance(spec['stored'], bool):
462
530
  raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
463
- if not has_type:
464
- raise excs.Error(f'Column {name}: "type" is required')
465
531
 
466
532
  @classmethod
467
533
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
@@ -475,19 +541,15 @@ class Table(SchemaObject):
475
541
  stored = True
476
542
 
477
543
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
478
- col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
544
+ col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
479
545
  elif isinstance(spec, exprs.Expr):
480
546
  # create copy so we can modify it
481
547
  value_expr = spec.copy()
482
- elif callable(spec):
483
- raise excs.Error(
484
- f'Column {name} computed with a Callable: specify using a dictionary with '
485
- f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
486
- )
487
548
  elif isinstance(spec, dict):
488
549
  cls._validate_column_spec(name, spec)
489
550
  if 'type' in spec:
490
- col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
551
+ col_type = ts.ColumnType.normalize_type(
552
+ spec['type'], nullable_default=True, allow_builtin_types=False)
491
553
  value_expr = spec.get('value')
492
554
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
493
555
  # create copy so we can modify it
@@ -499,6 +561,8 @@ class Table(SchemaObject):
499
561
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
500
562
  else None
501
563
  )
564
+ else:
565
+ raise excs.Error(f'Invalid value for column {name!r}')
502
566
 
503
567
  column = Column(
504
568
  name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
@@ -508,7 +572,7 @@ class Table(SchemaObject):
508
572
 
509
573
  @classmethod
510
574
  def _verify_column(
511
- cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
575
+ cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
512
576
  ) -> None:
513
577
  """Check integrity of user-supplied Column and supply defaults"""
514
578
  if is_system_column_name(col.name):
@@ -529,7 +593,7 @@ class Table(SchemaObject):
529
593
  @classmethod
530
594
  def _verify_schema(cls, schema: list[Column]) -> None:
531
595
  """Check integrity of user-supplied schema and set defaults"""
532
- column_names: Set[str] = set()
596
+ column_names: set[str] = set()
533
597
  for col in schema:
534
598
  cls._verify_column(col, column_names)
535
599
  column_names.add(col.name)
@@ -710,7 +774,7 @@ class Table(SchemaObject):
710
774
 
711
775
  def _drop_index(
712
776
  self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
713
- _idx_class: Optional[Type[index.IndexBase]] = None
777
+ _idx_class: Optional[type[index.IndexBase]] = None
714
778
  ) -> None:
715
779
  if self._tbl_version_path.is_snapshot():
716
780
  raise excs.Error('Cannot drop an index from a snapshot')
@@ -741,36 +805,68 @@ class Table(SchemaObject):
741
805
 
742
806
  @overload
743
807
  def insert(
744
- self, rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
808
+ self,
809
+ rows: Iterable[dict[str, Any]],
810
+ /,
811
+ *,
812
+ print_stats: bool = False,
813
+ on_error: Literal['abort', 'ignore'] = 'abort'
745
814
  ) -> UpdateStatus: ...
746
815
 
747
816
  @overload
748
- def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
817
+ def insert(
818
+ self,
819
+ *,
820
+ print_stats: bool = False,
821
+ on_error: Literal['abort', 'ignore'] = 'abort',
822
+ **kwargs: Any
823
+ ) -> UpdateStatus: ...
749
824
 
750
825
  @abc.abstractmethod # type: ignore[misc]
751
826
  def insert(
752
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
753
- fail_on_exception: bool = True, **kwargs: Any
827
+ self,
828
+ rows: Optional[Iterable[dict[str, Any]]] = None,
829
+ /,
830
+ *,
831
+ print_stats: bool = False,
832
+ on_error: Literal['abort', 'ignore'] = 'abort',
833
+ **kwargs: Any
754
834
  ) -> UpdateStatus:
755
835
  """Inserts rows into this table. There are two mutually exclusive call patterns:
756
836
 
757
837
  To insert multiple rows at a time:
758
- ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
838
+
839
+ ```python
840
+ insert(
841
+ rows: Iterable[dict[str, Any]],
842
+ /,
843
+ *,
844
+ print_stats: bool = False,
845
+ on_error: Literal['abort', 'ignore'] = 'abort'
846
+ )```
759
847
 
760
848
  To insert just a single row, you can use the more concise syntax:
761
- ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
849
+
850
+ ```python
851
+ insert(
852
+ *,
853
+ print_stats: bool = False,
854
+ on_error: Literal['abort', 'ignore'] = 'abort',
855
+ **kwargs: Any
856
+ )```
762
857
 
763
858
  Args:
764
859
  rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
765
860
  names to values.
766
861
  kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
767
- print_stats: If ``True``, print statistics about the cost of computed columns.
768
- fail_on_exception:
769
- Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
770
- are handled.
771
- If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
772
- for those cases, but continue inserting rows.
773
- If ``True``, raise an exception that aborts the insert.
862
+ print_stats: If `True`, print statistics about the cost of computed columns.
863
+ on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
864
+ invalid media file (such as a corrupt image) for one of the inserted rows.
865
+
866
+ - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
867
+ - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
868
+ with errors will have a `None` value for that cell, with information about the error stored in the
869
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
774
870
 
775
871
  Returns:
776
872
  An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -781,7 +877,7 @@ class Table(SchemaObject):
781
877
  - The table is a view or snapshot.
782
878
  - The table has been dropped.
783
879
  - One of the rows being inserted does not conform to the table schema.
784
- - An error occurs during processing of computed columns, and `fail_on_exception=True`.
880
+ - An error occurs during processing of computed columns, and `on_error='ignore'`.
785
881
 
786
882
  Examples:
787
883
  Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -867,7 +963,7 @@ class Table(SchemaObject):
867
963
 
868
964
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
869
965
  has_rowid = _ROWID_COLUMN_NAME in rows[0]
870
- rowids: list[Tuple[int, ...]] = []
966
+ rowids: list[tuple[int, ...]] = []
871
967
  if len(pk_col_names) == 0 and not has_rowid:
872
968
  raise excs.Error('Table must have primary key for batch update')
873
969