pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +41 -29
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +30 -10
- pixeltable/catalog/table.py +198 -86
- pixeltable/catalog/table_version.py +47 -53
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +17 -18
- pixeltable/dataframe.py +27 -36
- pixeltable/env.py +7 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +5 -22
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval_node.py +23 -16
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +12 -5
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +97 -14
- pixeltable/exprs/comparison.py +10 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +6 -11
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/function.py +11 -10
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +155 -45
- pixeltable/functions/llama_cpp.py +107 -0
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +67 -26
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +17 -15
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +100 -36
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +12 -13
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
- pixeltable-0.2.23.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional,
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
@@ -24,7 +24,7 @@ import pixeltable.type_system as ts
|
|
|
24
24
|
from pixeltable.utils.filecache import FileCache
|
|
25
25
|
|
|
26
26
|
from .column import Column
|
|
27
|
-
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
27
|
+
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
|
|
28
28
|
from .schema_object import SchemaObject
|
|
29
29
|
from .table_version import TableVersion
|
|
30
30
|
from .table_version_path import TableVersionPath
|
|
@@ -91,6 +91,7 @@ class Table(SchemaObject):
|
|
|
91
91
|
'num_retained_versions': 10,
|
|
92
92
|
'is_view': False,
|
|
93
93
|
'is_snapshot': False,
|
|
94
|
+
'media_validation': 'on_write',
|
|
94
95
|
}
|
|
95
96
|
```
|
|
96
97
|
"""
|
|
@@ -101,6 +102,7 @@ class Table(SchemaObject):
|
|
|
101
102
|
md['schema_version'] = self._tbl_version.schema_version
|
|
102
103
|
md['comment'] = self._comment
|
|
103
104
|
md['num_retained_versions'] = self._num_retained_versions
|
|
105
|
+
md['media_validation'] = self._media_validation.name.lower()
|
|
104
106
|
return md
|
|
105
107
|
|
|
106
108
|
@property
|
|
@@ -123,7 +125,7 @@ class Table(SchemaObject):
|
|
|
123
125
|
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
124
126
|
"""Return a ColumnRef for the given name.
|
|
125
127
|
"""
|
|
126
|
-
return
|
|
128
|
+
return self._tbl_version_path.get_column_ref(name)
|
|
127
129
|
|
|
128
130
|
@overload
|
|
129
131
|
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
|
|
@@ -215,6 +217,12 @@ class Table(SchemaObject):
|
|
|
215
217
|
"""Return the number of rows in this table."""
|
|
216
218
|
return self._df().count()
|
|
217
219
|
|
|
220
|
+
@property
|
|
221
|
+
def columns(self) -> list[str]:
|
|
222
|
+
"""Return the names of the columns in this table. """
|
|
223
|
+
cols = self._tbl_version_path.columns()
|
|
224
|
+
return [c.name for c in cols]
|
|
225
|
+
|
|
218
226
|
@property
|
|
219
227
|
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
220
228
|
"""Return the schema (column names and column types) of this table."""
|
|
@@ -244,7 +252,11 @@ class Table(SchemaObject):
|
|
|
244
252
|
def _num_retained_versions(self):
|
|
245
253
|
return self._tbl_version.num_retained_versions
|
|
246
254
|
|
|
247
|
-
|
|
255
|
+
@property
|
|
256
|
+
def _media_validation(self) -> MediaValidation:
|
|
257
|
+
return self._tbl_version.media_validation
|
|
258
|
+
|
|
259
|
+
def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
|
|
248
260
|
cols = self._tbl_version_path.columns()
|
|
249
261
|
df = pd.DataFrame({
|
|
250
262
|
'Column Name': [c.name for c in cols],
|
|
@@ -253,8 +265,8 @@ class Table(SchemaObject):
|
|
|
253
265
|
})
|
|
254
266
|
return df
|
|
255
267
|
|
|
256
|
-
def _description_html(self) -> pandas.io.formats.style.Styler:
|
|
257
|
-
pd_df = self._description()
|
|
268
|
+
def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
|
|
269
|
+
pd_df = self._description(cols)
|
|
258
270
|
# white-space: pre-wrap: print \n as newline
|
|
259
271
|
# th: center-align headings
|
|
260
272
|
return (
|
|
@@ -323,30 +335,74 @@ class Table(SchemaObject):
|
|
|
323
335
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
324
336
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
325
337
|
raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
|
|
326
|
-
self.add_column(
|
|
338
|
+
self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
|
|
339
|
+
|
|
340
|
+
def add_columns(
|
|
341
|
+
self,
|
|
342
|
+
schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
|
|
343
|
+
) -> UpdateStatus:
|
|
344
|
+
"""
|
|
345
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
|
|
346
|
+
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
347
|
+
|
|
348
|
+
The format of the `schema` argument is identical to the format of the schema in a call to
|
|
349
|
+
[`create_table()`][pixeltable.globals.create_table].
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
schema: A dictionary mapping column names to types.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Information about the execution status of the operation.
|
|
356
|
+
|
|
357
|
+
Raises:
|
|
358
|
+
Error: If any column name is invalid or already exists.
|
|
359
|
+
|
|
360
|
+
Examples:
|
|
361
|
+
Add multiple columns to the table `my_table`:
|
|
362
|
+
|
|
363
|
+
>>> tbl = pxt.get_table('my_table')
|
|
364
|
+
... schema = {
|
|
365
|
+
... 'new_col_1': pxt.Int,
|
|
366
|
+
... 'new_col_2': pxt.String,
|
|
367
|
+
... }
|
|
368
|
+
... tbl.add_columns(schema)
|
|
369
|
+
"""
|
|
370
|
+
self._check_is_dropped()
|
|
371
|
+
col_schema = {
|
|
372
|
+
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
373
|
+
for col_name, spec in schema.items()
|
|
374
|
+
}
|
|
375
|
+
new_cols = self._create_columns(col_schema)
|
|
376
|
+
for new_col in new_cols:
|
|
377
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
378
|
+
status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
|
|
379
|
+
FileCache.get().emit_eviction_warnings()
|
|
380
|
+
return status
|
|
327
381
|
|
|
382
|
+
# TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
|
|
383
|
+
# will be supported only through add_computed_column(). At that point, we can remove the `stored`,
|
|
384
|
+
# `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
|
|
385
|
+
# simplifying the code. For the time being, there's some obvious code duplication.
|
|
328
386
|
def add_column(
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
|
|
387
|
+
self,
|
|
388
|
+
*,
|
|
389
|
+
stored: Optional[bool] = None,
|
|
390
|
+
print_stats: bool = False,
|
|
391
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
392
|
+
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
|
|
336
393
|
) -> UpdateStatus:
|
|
337
394
|
"""
|
|
338
395
|
Adds a column to the table.
|
|
339
396
|
|
|
340
397
|
Args:
|
|
341
|
-
kwargs: Exactly one keyword argument of the form `
|
|
342
|
-
type: The type of the column. Only valid and required if `value-expression` is a Callable.
|
|
398
|
+
kwargs: Exactly one keyword argument of the form `col_name=col_type`.
|
|
343
399
|
stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
|
|
344
400
|
print_stats: If `True`, print execution metrics during evaluation.
|
|
345
401
|
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
346
402
|
row.
|
|
347
403
|
|
|
348
|
-
-
|
|
349
|
-
-
|
|
404
|
+
- `'abort'`: an exception will be raised and the column will not be added.
|
|
405
|
+
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
350
406
|
with errors will have a `None` value for the column, with information about the error stored in the
|
|
351
407
|
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
352
408
|
|
|
@@ -364,53 +420,79 @@ class Table(SchemaObject):
|
|
|
364
420
|
Alternatively, this can also be expressed as:
|
|
365
421
|
|
|
366
422
|
>>> tbl['new_col'] = pxt.Int
|
|
423
|
+
"""
|
|
424
|
+
self._check_is_dropped()
|
|
425
|
+
# verify kwargs and construct column schema dict
|
|
426
|
+
if len(kwargs) != 1:
|
|
427
|
+
raise excs.Error(
|
|
428
|
+
f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
|
|
429
|
+
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
430
|
+
)
|
|
431
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
432
|
+
if not is_valid_identifier(col_name):
|
|
433
|
+
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
434
|
+
|
|
435
|
+
col_schema: dict[str, Any] = {}
|
|
436
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
437
|
+
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
438
|
+
else:
|
|
439
|
+
col_schema['value'] = spec
|
|
440
|
+
if stored is not None:
|
|
441
|
+
col_schema['stored'] = stored
|
|
367
442
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
443
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
444
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
445
|
+
status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
446
|
+
FileCache.get().emit_eviction_warnings()
|
|
447
|
+
return status
|
|
371
448
|
|
|
372
|
-
|
|
449
|
+
def add_computed_column(
|
|
450
|
+
self,
|
|
451
|
+
*,
|
|
452
|
+
stored: Optional[bool] = None,
|
|
453
|
+
print_stats: bool = False,
|
|
454
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
455
|
+
**kwargs: exprs.Expr
|
|
456
|
+
) -> UpdateStatus:
|
|
457
|
+
"""
|
|
458
|
+
Adds a computed column to the table.
|
|
373
459
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
(by default, computed image columns are not stored but recomputed on demand):
|
|
460
|
+
Args:
|
|
461
|
+
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
377
462
|
|
|
378
|
-
|
|
463
|
+
Returns:
|
|
464
|
+
Information about the execution status of the operation.
|
|
379
465
|
|
|
380
|
-
|
|
466
|
+
Raises:
|
|
467
|
+
Error: If the column name is invalid or already exists.
|
|
468
|
+
|
|
469
|
+
Examples:
|
|
470
|
+
For a table with an image column `frame`, add an image column `rotated` that rotates the image by
|
|
471
|
+
90 degrees:
|
|
381
472
|
|
|
382
|
-
>>> tbl
|
|
473
|
+
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90))
|
|
383
474
|
|
|
384
475
|
Do the same, but now the column is unstored:
|
|
385
476
|
|
|
386
|
-
>>> tbl.
|
|
477
|
+
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
387
478
|
"""
|
|
388
479
|
self._check_is_dropped()
|
|
389
|
-
# verify kwargs and construct column schema dict
|
|
390
480
|
if len(kwargs) != 1:
|
|
391
481
|
raise excs.Error(
|
|
392
|
-
f'
|
|
393
|
-
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
482
|
+
f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
|
|
483
|
+
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
394
484
|
)
|
|
395
485
|
col_name, spec = next(iter(kwargs.items()))
|
|
396
486
|
if not is_valid_identifier(col_name):
|
|
397
487
|
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
398
|
-
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
|
|
399
|
-
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
400
488
|
|
|
401
|
-
col_schema: dict[str, Any] = {}
|
|
402
|
-
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
403
|
-
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
404
|
-
else:
|
|
405
|
-
col_schema['value'] = spec
|
|
406
|
-
if type is not None:
|
|
407
|
-
col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
|
|
489
|
+
col_schema: dict[str, Any] = {'value': spec}
|
|
408
490
|
if stored is not None:
|
|
409
491
|
col_schema['stored'] = stored
|
|
410
492
|
|
|
411
493
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
412
494
|
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
413
|
-
status = self._tbl_version.
|
|
495
|
+
status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
414
496
|
FileCache.get().emit_eviction_warnings()
|
|
415
497
|
return status
|
|
416
498
|
|
|
@@ -422,37 +504,30 @@ class Table(SchemaObject):
|
|
|
422
504
|
(on account of containing Python Callables or Exprs).
|
|
423
505
|
"""
|
|
424
506
|
assert isinstance(spec, dict)
|
|
425
|
-
valid_keys = {'type', 'value', 'stored'}
|
|
426
|
-
has_type = False
|
|
507
|
+
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
427
508
|
for k in spec.keys():
|
|
428
509
|
if k not in valid_keys:
|
|
429
510
|
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
430
511
|
|
|
512
|
+
if 'type' not in spec and 'value' not in spec:
|
|
513
|
+
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
514
|
+
|
|
431
515
|
if 'type' in spec:
|
|
432
|
-
has_type = True
|
|
433
516
|
if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
434
517
|
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
435
518
|
|
|
436
519
|
if 'value' in spec:
|
|
437
|
-
|
|
438
|
-
value_expr = exprs.Expr.from_object(value_spec)
|
|
520
|
+
value_expr = exprs.Expr.from_object(spec['value'])
|
|
439
521
|
if value_expr is None:
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
|
|
447
|
-
else:
|
|
448
|
-
has_type = True
|
|
449
|
-
if 'type' in spec:
|
|
450
|
-
raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
|
|
522
|
+
raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
|
|
523
|
+
if 'type' in spec:
|
|
524
|
+
raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
|
|
525
|
+
|
|
526
|
+
if 'media_validation' in spec:
|
|
527
|
+
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
|
|
451
528
|
|
|
452
529
|
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
453
530
|
raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
|
|
454
|
-
if not has_type:
|
|
455
|
-
raise excs.Error(f'Column {name}: "type" is required')
|
|
456
531
|
|
|
457
532
|
@classmethod
|
|
458
533
|
def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
|
|
@@ -462,37 +537,42 @@ class Table(SchemaObject):
|
|
|
462
537
|
col_type: Optional[ts.ColumnType] = None
|
|
463
538
|
value_expr: Optional[exprs.Expr] = None
|
|
464
539
|
primary_key: Optional[bool] = None
|
|
540
|
+
media_validation: Optional[catalog.MediaValidation] = None
|
|
465
541
|
stored = True
|
|
466
542
|
|
|
467
543
|
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
468
|
-
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
544
|
+
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
469
545
|
elif isinstance(spec, exprs.Expr):
|
|
470
546
|
# create copy so we can modify it
|
|
471
547
|
value_expr = spec.copy()
|
|
472
|
-
elif callable(spec):
|
|
473
|
-
raise excs.Error(
|
|
474
|
-
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
475
|
-
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
|
|
476
|
-
)
|
|
477
548
|
elif isinstance(spec, dict):
|
|
478
549
|
cls._validate_column_spec(name, spec)
|
|
479
550
|
if 'type' in spec:
|
|
480
|
-
col_type = ts.ColumnType.normalize_type(
|
|
551
|
+
col_type = ts.ColumnType.normalize_type(
|
|
552
|
+
spec['type'], nullable_default=True, allow_builtin_types=False)
|
|
481
553
|
value_expr = spec.get('value')
|
|
482
554
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
483
555
|
# create copy so we can modify it
|
|
484
556
|
value_expr = value_expr.copy()
|
|
485
557
|
stored = spec.get('stored', True)
|
|
486
558
|
primary_key = spec.get('primary_key')
|
|
559
|
+
media_validation_str = spec.get('media_validation')
|
|
560
|
+
media_validation = (
|
|
561
|
+
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
|
|
562
|
+
else None
|
|
563
|
+
)
|
|
564
|
+
else:
|
|
565
|
+
raise excs.Error(f'Invalid value for column {name!r}')
|
|
487
566
|
|
|
488
567
|
column = Column(
|
|
489
|
-
name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key
|
|
568
|
+
name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
|
|
569
|
+
media_validation=media_validation)
|
|
490
570
|
columns.append(column)
|
|
491
571
|
return columns
|
|
492
572
|
|
|
493
573
|
@classmethod
|
|
494
574
|
def _verify_column(
|
|
495
|
-
cls, col: Column, existing_column_names:
|
|
575
|
+
cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
|
|
496
576
|
) -> None:
|
|
497
577
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
498
578
|
if is_system_column_name(col.name):
|
|
@@ -513,7 +593,7 @@ class Table(SchemaObject):
|
|
|
513
593
|
@classmethod
|
|
514
594
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
515
595
|
"""Check integrity of user-supplied schema and set defaults"""
|
|
516
|
-
column_names:
|
|
596
|
+
column_names: set[str] = set()
|
|
517
597
|
for col in schema:
|
|
518
598
|
cls._verify_column(col, column_names)
|
|
519
599
|
column_names.add(col.name)
|
|
@@ -694,7 +774,7 @@ class Table(SchemaObject):
|
|
|
694
774
|
|
|
695
775
|
def _drop_index(
|
|
696
776
|
self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
|
|
697
|
-
_idx_class: Optional[
|
|
777
|
+
_idx_class: Optional[type[index.IndexBase]] = None
|
|
698
778
|
) -> None:
|
|
699
779
|
if self._tbl_version_path.is_snapshot():
|
|
700
780
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
@@ -725,36 +805,68 @@ class Table(SchemaObject):
|
|
|
725
805
|
|
|
726
806
|
@overload
|
|
727
807
|
def insert(
|
|
728
|
-
|
|
808
|
+
self,
|
|
809
|
+
rows: Iterable[dict[str, Any]],
|
|
810
|
+
/,
|
|
811
|
+
*,
|
|
812
|
+
print_stats: bool = False,
|
|
813
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
729
814
|
) -> UpdateStatus: ...
|
|
730
815
|
|
|
731
816
|
@overload
|
|
732
|
-
def insert(
|
|
817
|
+
def insert(
|
|
818
|
+
self,
|
|
819
|
+
*,
|
|
820
|
+
print_stats: bool = False,
|
|
821
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
822
|
+
**kwargs: Any
|
|
823
|
+
) -> UpdateStatus: ...
|
|
733
824
|
|
|
734
825
|
@abc.abstractmethod # type: ignore[misc]
|
|
735
826
|
def insert(
|
|
736
|
-
|
|
737
|
-
|
|
827
|
+
self,
|
|
828
|
+
rows: Optional[Iterable[dict[str, Any]]] = None,
|
|
829
|
+
/,
|
|
830
|
+
*,
|
|
831
|
+
print_stats: bool = False,
|
|
832
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
833
|
+
**kwargs: Any
|
|
738
834
|
) -> UpdateStatus:
|
|
739
835
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
740
836
|
|
|
741
837
|
To insert multiple rows at a time:
|
|
742
|
-
|
|
838
|
+
|
|
839
|
+
```python
|
|
840
|
+
insert(
|
|
841
|
+
rows: Iterable[dict[str, Any]],
|
|
842
|
+
/,
|
|
843
|
+
*,
|
|
844
|
+
print_stats: bool = False,
|
|
845
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
846
|
+
)```
|
|
743
847
|
|
|
744
848
|
To insert just a single row, you can use the more concise syntax:
|
|
745
|
-
|
|
849
|
+
|
|
850
|
+
```python
|
|
851
|
+
insert(
|
|
852
|
+
*,
|
|
853
|
+
print_stats: bool = False,
|
|
854
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
855
|
+
**kwargs: Any
|
|
856
|
+
)```
|
|
746
857
|
|
|
747
858
|
Args:
|
|
748
859
|
rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
|
|
749
860
|
names to values.
|
|
750
861
|
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
751
|
-
print_stats: If
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
If
|
|
756
|
-
|
|
757
|
-
|
|
862
|
+
print_stats: If `True`, print statistics about the cost of computed columns.
|
|
863
|
+
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
864
|
+
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
865
|
+
|
|
866
|
+
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
867
|
+
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
868
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
869
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
758
870
|
|
|
759
871
|
Returns:
|
|
760
872
|
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
@@ -765,7 +877,7 @@ class Table(SchemaObject):
|
|
|
765
877
|
- The table is a view or snapshot.
|
|
766
878
|
- The table has been dropped.
|
|
767
879
|
- One of the rows being inserted does not conform to the table schema.
|
|
768
|
-
- An error occurs during processing of computed columns, and `
|
|
880
|
+
- An error occurs during processing of computed columns, and `on_error='ignore'`.
|
|
769
881
|
|
|
770
882
|
Examples:
|
|
771
883
|
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
@@ -851,7 +963,7 @@ class Table(SchemaObject):
|
|
|
851
963
|
|
|
852
964
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
853
965
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
854
|
-
rowids: list[
|
|
966
|
+
rowids: list[tuple[int, ...]] = []
|
|
855
967
|
if len(pk_col_names) == 0 and not has_rowid:
|
|
856
968
|
raise excs.Error('Table must have primary key for batch update')
|
|
857
969
|
|