pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/column.py +41 -29
  5. pixeltable/catalog/globals.py +18 -0
  6. pixeltable/catalog/insertable_table.py +30 -10
  7. pixeltable/catalog/table.py +198 -86
  8. pixeltable/catalog/table_version.py +47 -53
  9. pixeltable/catalog/table_version_path.py +2 -2
  10. pixeltable/catalog/view.py +17 -18
  11. pixeltable/dataframe.py +27 -36
  12. pixeltable/env.py +7 -0
  13. pixeltable/exec/__init__.py +0 -1
  14. pixeltable/exec/aggregation_node.py +6 -3
  15. pixeltable/exec/cache_prefetch_node.py +189 -43
  16. pixeltable/exec/data_row_batch.py +5 -22
  17. pixeltable/exec/exec_context.py +2 -2
  18. pixeltable/exec/exec_node.py +3 -2
  19. pixeltable/exec/expr_eval_node.py +23 -16
  20. pixeltable/exec/in_memory_data_node.py +6 -3
  21. pixeltable/exec/sql_node.py +24 -25
  22. pixeltable/exprs/arithmetic_expr.py +12 -5
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +97 -14
  26. pixeltable/exprs/comparison.py +10 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +27 -18
  29. pixeltable/exprs/expr.py +53 -52
  30. pixeltable/exprs/expr_set.py +5 -0
  31. pixeltable/exprs/function_call.py +32 -16
  32. pixeltable/exprs/globals.py +4 -1
  33. pixeltable/exprs/in_predicate.py +8 -7
  34. pixeltable/exprs/inline_expr.py +4 -4
  35. pixeltable/exprs/is_null.py +4 -4
  36. pixeltable/exprs/json_mapper.py +11 -12
  37. pixeltable/exprs/json_path.py +6 -11
  38. pixeltable/exprs/literal.py +5 -5
  39. pixeltable/exprs/method_ref.py +5 -4
  40. pixeltable/exprs/object_ref.py +2 -1
  41. pixeltable/exprs/row_builder.py +88 -36
  42. pixeltable/exprs/rowid_ref.py +12 -11
  43. pixeltable/exprs/similarity_expr.py +12 -7
  44. pixeltable/exprs/sql_element_cache.py +7 -5
  45. pixeltable/exprs/type_cast.py +8 -6
  46. pixeltable/exprs/variable.py +5 -4
  47. pixeltable/func/aggregate_function.py +9 -9
  48. pixeltable/func/expr_template_function.py +6 -5
  49. pixeltable/func/function.py +11 -10
  50. pixeltable/func/udf.py +6 -11
  51. pixeltable/functions/__init__.py +2 -2
  52. pixeltable/functions/globals.py +5 -7
  53. pixeltable/functions/huggingface.py +155 -45
  54. pixeltable/functions/llama_cpp.py +107 -0
  55. pixeltable/functions/mistralai.py +1 -1
  56. pixeltable/functions/ollama.py +147 -0
  57. pixeltable/functions/openai.py +1 -1
  58. pixeltable/functions/replicate.py +72 -0
  59. pixeltable/functions/string.py +9 -0
  60. pixeltable/functions/together.py +1 -1
  61. pixeltable/functions/util.py +5 -2
  62. pixeltable/globals.py +67 -26
  63. pixeltable/index/btree.py +16 -3
  64. pixeltable/index/embedding_index.py +4 -4
  65. pixeltable/io/__init__.py +1 -2
  66. pixeltable/io/fiftyone.py +178 -0
  67. pixeltable/io/globals.py +96 -2
  68. pixeltable/iterators/base.py +3 -2
  69. pixeltable/iterators/document.py +1 -1
  70. pixeltable/iterators/video.py +120 -63
  71. pixeltable/metadata/__init__.py +1 -1
  72. pixeltable/metadata/converters/convert_21.py +34 -0
  73. pixeltable/metadata/converters/util.py +45 -4
  74. pixeltable/metadata/notes.py +1 -0
  75. pixeltable/metadata/schema.py +8 -0
  76. pixeltable/plan.py +17 -15
  77. pixeltable/py.typed +0 -0
  78. pixeltable/store.py +7 -2
  79. pixeltable/tool/create_test_db_dump.py +1 -1
  80. pixeltable/tool/create_test_video.py +1 -1
  81. pixeltable/tool/embed_udf.py +1 -1
  82. pixeltable/tool/mypy_plugin.py +28 -5
  83. pixeltable/type_system.py +100 -36
  84. pixeltable/utils/coco.py +5 -5
  85. pixeltable/utils/documents.py +15 -1
  86. pixeltable/utils/formatter.py +12 -13
  87. pixeltable/utils/s3.py +6 -3
  88. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
  89. pixeltable-0.2.23.dist-info/RECORD +153 -0
  90. pixeltable/exec/media_validation_node.py +0 -43
  91. pixeltable-0.2.21.dist-info/RECORD +0 -148
  92. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
  93. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
  94. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import json
6
6
  import logging
7
7
  from pathlib import Path
8
8
  from typing import _GenericAlias # type: ignore[attr-defined]
9
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
10
10
  from uuid import UUID
11
11
 
12
12
  import pandas as pd
@@ -24,7 +24,7 @@ import pixeltable.type_system as ts
24
24
  from pixeltable.utils.filecache import FileCache
25
25
 
26
26
  from .column import Column
27
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
27
+ from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
28
28
  from .schema_object import SchemaObject
29
29
  from .table_version import TableVersion
30
30
  from .table_version_path import TableVersionPath
@@ -91,6 +91,7 @@ class Table(SchemaObject):
91
91
  'num_retained_versions': 10,
92
92
  'is_view': False,
93
93
  'is_snapshot': False,
94
+ 'media_validation': 'on_write',
94
95
  }
95
96
  ```
96
97
  """
@@ -101,6 +102,7 @@ class Table(SchemaObject):
101
102
  md['schema_version'] = self._tbl_version.schema_version
102
103
  md['comment'] = self._comment
103
104
  md['num_retained_versions'] = self._num_retained_versions
105
+ md['media_validation'] = self._media_validation.name.lower()
104
106
  return md
105
107
 
106
108
  @property
@@ -123,7 +125,7 @@ class Table(SchemaObject):
123
125
  def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
124
126
  """Return a ColumnRef for the given name.
125
127
  """
126
- return getattr(self._tbl_version_path, name)
128
+ return self._tbl_version_path.get_column_ref(name)
127
129
 
128
130
  @overload
129
131
  def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
@@ -215,6 +217,12 @@ class Table(SchemaObject):
215
217
  """Return the number of rows in this table."""
216
218
  return self._df().count()
217
219
 
220
+ @property
221
+ def columns(self) -> list[str]:
222
+ """Return the names of the columns in this table. """
223
+ cols = self._tbl_version_path.columns()
224
+ return [c.name for c in cols]
225
+
218
226
  @property
219
227
  def _schema(self) -> dict[str, ts.ColumnType]:
220
228
  """Return the schema (column names and column types) of this table."""
@@ -244,7 +252,11 @@ class Table(SchemaObject):
244
252
  def _num_retained_versions(self):
245
253
  return self._tbl_version.num_retained_versions
246
254
 
247
- def _description(self) -> pd.DataFrame:
255
+ @property
256
+ def _media_validation(self) -> MediaValidation:
257
+ return self._tbl_version.media_validation
258
+
259
+ def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
248
260
  cols = self._tbl_version_path.columns()
249
261
  df = pd.DataFrame({
250
262
  'Column Name': [c.name for c in cols],
@@ -253,8 +265,8 @@ class Table(SchemaObject):
253
265
  })
254
266
  return df
255
267
 
256
- def _description_html(self) -> pandas.io.formats.style.Styler:
257
- pd_df = self._description()
268
+ def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
269
+ pd_df = self._description(cols)
258
270
  # white-space: pre-wrap: print \n as newline
259
271
  # th: center-align headings
260
272
  return (
@@ -323,30 +335,74 @@ class Table(SchemaObject):
323
335
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
324
336
  if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
325
337
  raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
326
- self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
338
+ self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
339
+
340
+ def add_columns(
341
+ self,
342
+ schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
343
+ ) -> UpdateStatus:
344
+ """
345
+ Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
346
+ use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
347
+
348
+ The format of the `schema` argument is identical to the format of the schema in a call to
349
+ [`create_table()`][pixeltable.globals.create_table].
350
+
351
+ Args:
352
+ schema: A dictionary mapping column names to types.
353
+
354
+ Returns:
355
+ Information about the execution status of the operation.
356
+
357
+ Raises:
358
+ Error: If any column name is invalid or already exists.
359
+
360
+ Examples:
361
+ Add multiple columns to the table `my_table`:
362
+
363
+ >>> tbl = pxt.get_table('my_table')
364
+ ... schema = {
365
+ ... 'new_col_1': pxt.Int,
366
+ ... 'new_col_2': pxt.String,
367
+ ... }
368
+ ... tbl.add_columns(schema)
369
+ """
370
+ self._check_is_dropped()
371
+ col_schema = {
372
+ col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
373
+ for col_name, spec in schema.items()
374
+ }
375
+ new_cols = self._create_columns(col_schema)
376
+ for new_col in new_cols:
377
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
378
+ status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
379
+ FileCache.get().emit_eviction_warnings()
380
+ return status
327
381
 
382
+ # TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
383
+ # will be supported only through add_computed_column(). At that point, we can remove the `stored`,
384
+ # `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
385
+ # simplifying the code. For the time being, there's some obvious code duplication.
328
386
  def add_column(
329
- self,
330
- *,
331
- type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
332
- stored: Optional[bool] = None,
333
- print_stats: bool = False,
334
- on_error: Literal['abort', 'ignore'] = 'abort',
335
- **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
387
+ self,
388
+ *,
389
+ stored: Optional[bool] = None,
390
+ print_stats: bool = False,
391
+ on_error: Literal['abort', 'ignore'] = 'abort',
392
+ **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
336
393
  ) -> UpdateStatus:
337
394
  """
338
395
  Adds a column to the table.
339
396
 
340
397
  Args:
341
- kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
342
- type: The type of the column. Only valid and required if `value-expression` is a Callable.
398
+ kwargs: Exactly one keyword argument of the form `col_name=col_type`.
343
399
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
344
400
  print_stats: If `True`, print execution metrics during evaluation.
345
401
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
346
402
  row.
347
403
 
348
- - If `on_error='abort'`, then an exception will be raised and the column will not be added.
349
- - If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
404
+ - `'abort'`: an exception will be raised and the column will not be added.
405
+ - `'ignore'`: execution will continue and the column will be added. Any rows
350
406
  with errors will have a `None` value for the column, with information about the error stored in the
351
407
  corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
352
408
 
@@ -364,53 +420,79 @@ class Table(SchemaObject):
364
420
  Alternatively, this can also be expressed as:
365
421
 
366
422
  >>> tbl['new_col'] = pxt.Int
423
+ """
424
+ self._check_is_dropped()
425
+ # verify kwargs and construct column schema dict
426
+ if len(kwargs) != 1:
427
+ raise excs.Error(
428
+ f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
429
+ f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
430
+ )
431
+ col_name, spec = next(iter(kwargs.items()))
432
+ if not is_valid_identifier(col_name):
433
+ raise excs.Error(f'Invalid column name: {col_name!r}')
434
+
435
+ col_schema: dict[str, Any] = {}
436
+ if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
437
+ col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
438
+ else:
439
+ col_schema['value'] = spec
440
+ if stored is not None:
441
+ col_schema['stored'] = stored
367
442
 
368
- For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
369
- the parameters of the Callable must correspond to existing column names (the column values are then passed
370
- as arguments to the Callable). In this case, the column type needs to be specified explicitly:
443
+ new_col = self._create_columns({col_name: col_schema})[0]
444
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
445
+ status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
446
+ FileCache.get().emit_eviction_warnings()
447
+ return status
371
448
 
372
- >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
449
+ def add_computed_column(
450
+ self,
451
+ *,
452
+ stored: Optional[bool] = None,
453
+ print_stats: bool = False,
454
+ on_error: Literal['abort', 'ignore'] = 'abort',
455
+ **kwargs: exprs.Expr
456
+ ) -> UpdateStatus:
457
+ """
458
+ Adds a computed column to the table.
373
459
 
374
- For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
375
- 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
376
- (by default, computed image columns are not stored but recomputed on demand):
460
+ Args:
461
+ kwargs: Exactly one keyword argument of the form `col_name=expression`.
377
462
 
378
- >>> tbl.add_column(rotated=tbl.frame.rotate(90))
463
+ Returns:
464
+ Information about the execution status of the operation.
379
465
 
380
- Alternatively, this can also be expressed as:
466
+ Raises:
467
+ Error: If the column name is invalid or already exists.
468
+
469
+ Examples:
470
+ For a table with an image column `frame`, add an image column `rotated` that rotates the image by
471
+ 90 degrees:
381
472
 
382
- >>> tbl['rotated'] = tbl.frame.rotate(90)
473
+ >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90))
383
474
 
384
475
  Do the same, but now the column is unstored:
385
476
 
386
- >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
477
+ >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
387
478
  """
388
479
  self._check_is_dropped()
389
- # verify kwargs and construct column schema dict
390
480
  if len(kwargs) != 1:
391
481
  raise excs.Error(
392
- f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
393
- f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
482
+ f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
483
+ f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
394
484
  )
395
485
  col_name, spec = next(iter(kwargs.items()))
396
486
  if not is_valid_identifier(col_name):
397
487
  raise excs.Error(f'Invalid column name: {col_name!r}')
398
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
399
- raise excs.Error(f'add_column(): keyword argument "type" is redundant')
400
488
 
401
- col_schema: dict[str, Any] = {}
402
- if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
403
- col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
404
- else:
405
- col_schema['value'] = spec
406
- if type is not None:
407
- col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
489
+ col_schema: dict[str, Any] = {'value': spec}
408
490
  if stored is not None:
409
491
  col_schema['stored'] = stored
410
492
 
411
493
  new_col = self._create_columns({col_name: col_schema})[0]
412
494
  self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
413
- status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
495
+ status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
414
496
  FileCache.get().emit_eviction_warnings()
415
497
  return status
416
498
 
@@ -422,37 +504,30 @@ class Table(SchemaObject):
422
504
  (on account of containing Python Callables or Exprs).
423
505
  """
424
506
  assert isinstance(spec, dict)
425
- valid_keys = {'type', 'value', 'stored'}
426
- has_type = False
507
+ valid_keys = {'type', 'value', 'stored', 'media_validation'}
427
508
  for k in spec.keys():
428
509
  if k not in valid_keys:
429
510
  raise excs.Error(f'Column {name}: invalid key {k!r}')
430
511
 
512
+ if 'type' not in spec and 'value' not in spec:
513
+ raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
514
+
431
515
  if 'type' in spec:
432
- has_type = True
433
516
  if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
434
517
  raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
435
518
 
436
519
  if 'value' in spec:
437
- value_spec = spec['value']
438
- value_expr = exprs.Expr.from_object(value_spec)
520
+ value_expr = exprs.Expr.from_object(spec['value'])
439
521
  if value_expr is None:
440
- # needs to be a Callable
441
- if not callable(value_spec):
442
- raise excs.Error(
443
- f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
444
- f'but it is a {type(value_spec)}')
445
- if 'type' not in spec:
446
- raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
447
- else:
448
- has_type = True
449
- if 'type' in spec:
450
- raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
522
+ raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
523
+ if 'type' in spec:
524
+ raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
525
+
526
+ if 'media_validation' in spec:
527
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
451
528
 
452
529
  if 'stored' in spec and not isinstance(spec['stored'], bool):
453
530
  raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
454
- if not has_type:
455
- raise excs.Error(f'Column {name}: "type" is required')
456
531
 
457
532
  @classmethod
458
533
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
@@ -462,37 +537,42 @@ class Table(SchemaObject):
462
537
  col_type: Optional[ts.ColumnType] = None
463
538
  value_expr: Optional[exprs.Expr] = None
464
539
  primary_key: Optional[bool] = None
540
+ media_validation: Optional[catalog.MediaValidation] = None
465
541
  stored = True
466
542
 
467
543
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
468
- col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
544
+ col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
469
545
  elif isinstance(spec, exprs.Expr):
470
546
  # create copy so we can modify it
471
547
  value_expr = spec.copy()
472
- elif callable(spec):
473
- raise excs.Error(
474
- f'Column {name} computed with a Callable: specify using a dictionary with '
475
- f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
476
- )
477
548
  elif isinstance(spec, dict):
478
549
  cls._validate_column_spec(name, spec)
479
550
  if 'type' in spec:
480
- col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
551
+ col_type = ts.ColumnType.normalize_type(
552
+ spec['type'], nullable_default=True, allow_builtin_types=False)
481
553
  value_expr = spec.get('value')
482
554
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
483
555
  # create copy so we can modify it
484
556
  value_expr = value_expr.copy()
485
557
  stored = spec.get('stored', True)
486
558
  primary_key = spec.get('primary_key')
559
+ media_validation_str = spec.get('media_validation')
560
+ media_validation = (
561
+ catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
562
+ else None
563
+ )
564
+ else:
565
+ raise excs.Error(f'Invalid value for column {name!r}')
487
566
 
488
567
  column = Column(
489
- name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key)
568
+ name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
569
+ media_validation=media_validation)
490
570
  columns.append(column)
491
571
  return columns
492
572
 
493
573
  @classmethod
494
574
  def _verify_column(
495
- cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
575
+ cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
496
576
  ) -> None:
497
577
  """Check integrity of user-supplied Column and supply defaults"""
498
578
  if is_system_column_name(col.name):
@@ -513,7 +593,7 @@ class Table(SchemaObject):
513
593
  @classmethod
514
594
  def _verify_schema(cls, schema: list[Column]) -> None:
515
595
  """Check integrity of user-supplied schema and set defaults"""
516
- column_names: Set[str] = set()
596
+ column_names: set[str] = set()
517
597
  for col in schema:
518
598
  cls._verify_column(col, column_names)
519
599
  column_names.add(col.name)
@@ -694,7 +774,7 @@ class Table(SchemaObject):
694
774
 
695
775
  def _drop_index(
696
776
  self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
697
- _idx_class: Optional[Type[index.IndexBase]] = None
777
+ _idx_class: Optional[type[index.IndexBase]] = None
698
778
  ) -> None:
699
779
  if self._tbl_version_path.is_snapshot():
700
780
  raise excs.Error('Cannot drop an index from a snapshot')
@@ -725,36 +805,68 @@ class Table(SchemaObject):
725
805
 
726
806
  @overload
727
807
  def insert(
728
- self, rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
808
+ self,
809
+ rows: Iterable[dict[str, Any]],
810
+ /,
811
+ *,
812
+ print_stats: bool = False,
813
+ on_error: Literal['abort', 'ignore'] = 'abort'
729
814
  ) -> UpdateStatus: ...
730
815
 
731
816
  @overload
732
- def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
817
+ def insert(
818
+ self,
819
+ *,
820
+ print_stats: bool = False,
821
+ on_error: Literal['abort', 'ignore'] = 'abort',
822
+ **kwargs: Any
823
+ ) -> UpdateStatus: ...
733
824
 
734
825
  @abc.abstractmethod # type: ignore[misc]
735
826
  def insert(
736
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
737
- fail_on_exception: bool = True, **kwargs: Any
827
+ self,
828
+ rows: Optional[Iterable[dict[str, Any]]] = None,
829
+ /,
830
+ *,
831
+ print_stats: bool = False,
832
+ on_error: Literal['abort', 'ignore'] = 'abort',
833
+ **kwargs: Any
738
834
  ) -> UpdateStatus:
739
835
  """Inserts rows into this table. There are two mutually exclusive call patterns:
740
836
 
741
837
  To insert multiple rows at a time:
742
- ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
838
+
839
+ ```python
840
+ insert(
841
+ rows: Iterable[dict[str, Any]],
842
+ /,
843
+ *,
844
+ print_stats: bool = False,
845
+ on_error: Literal['abort', 'ignore'] = 'abort'
846
+ )```
743
847
 
744
848
  To insert just a single row, you can use the more concise syntax:
745
- ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
849
+
850
+ ```python
851
+ insert(
852
+ *,
853
+ print_stats: bool = False,
854
+ on_error: Literal['abort', 'ignore'] = 'abort',
855
+ **kwargs: Any
856
+ )```
746
857
 
747
858
  Args:
748
859
  rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
749
860
  names to values.
750
861
  kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
751
- print_stats: If ``True``, print statistics about the cost of computed columns.
752
- fail_on_exception:
753
- Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
754
- are handled.
755
- If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
756
- for those cases, but continue inserting rows.
757
- If ``True``, raise an exception that aborts the insert.
862
+ print_stats: If `True`, print statistics about the cost of computed columns.
863
+ on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
864
+ invalid media file (such as a corrupt image) for one of the inserted rows.
865
+
866
+ - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
867
+ - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
868
+ with errors will have a `None` value for that cell, with information about the error stored in the
869
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
758
870
 
759
871
  Returns:
760
872
  An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -765,7 +877,7 @@ class Table(SchemaObject):
765
877
  - The table is a view or snapshot.
766
878
  - The table has been dropped.
767
879
  - One of the rows being inserted does not conform to the table schema.
768
- - An error occurs during processing of computed columns, and `fail_on_exception=True`.
880
+ - An error occurs during processing of computed columns, and `on_error='ignore'`.
769
881
 
770
882
  Examples:
771
883
  Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -851,7 +963,7 @@ class Table(SchemaObject):
851
963
 
852
964
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
853
965
  has_rowid = _ROWID_COLUMN_NAME in rows[0]
854
- rowids: list[Tuple[int, ...]] = []
966
+ rowids: list[tuple[int, ...]] = []
855
967
  if len(pk_col_names) == 0 and not has_rowid:
856
968
  raise excs.Error('Table must have primary key for batch update')
857
969