pixeltable 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +8 -22
- pixeltable/catalog/insertable_table.py +26 -8
- pixeltable/catalog/table.py +179 -83
- pixeltable/catalog/table_version.py +13 -39
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +20 -28
- pixeltable/env.py +2 -0
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +2 -2
- pixeltable/exec/expr_eval_node.py +8 -8
- pixeltable/exprs/arithmetic_expr.py +9 -4
- pixeltable/exprs/column_ref.py +4 -0
- pixeltable/exprs/comparison.py +5 -0
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/huggingface.py +136 -25
- pixeltable/functions/llama_cpp.py +3 -2
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +55 -6
- pixeltable/plan.py +1 -1
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/type_system.py +83 -35
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/formatter.py +3 -3
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/METADATA +119 -46
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/RECORD +40 -40
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
|
|
|
3
3
|
from .exceptions import Error
|
|
4
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
5
5
|
from .func import Aggregator, Function, expr_udf, uda, udf
|
|
6
|
-
from .globals import (array, configure_logging, create_dir, create_table, create_view, drop_dir,
|
|
7
|
-
init, list_dirs, list_functions, list_tables, move)
|
|
6
|
+
from .globals import (array, configure_logging, create_dir, create_snapshot, create_table, create_view, drop_dir,
|
|
7
|
+
drop_table, get_table, init, list_dirs, list_functions, list_tables, move)
|
|
8
8
|
from .type_system import (Array, ArrayType, Audio, AudioType, Bool, BoolType, ColumnType, Document, DocumentType, Float,
|
|
9
9
|
FloatType, Image, ImageType, Int, IntType, Json, JsonType, Required, String, StringType,
|
|
10
10
|
Timestamp, TimestampType, Video, VideoType)
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.23"
|
|
3
|
+
__version_tuple__ = (0, 2, 23)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -35,7 +35,6 @@ class Column:
|
|
|
35
35
|
sa_col_type: Optional[sql.sqltypes.TypeEngine]
|
|
36
36
|
sa_errormsg_col: Optional[sql.schema.Column]
|
|
37
37
|
sa_errortype_col: Optional[sql.schema.Column]
|
|
38
|
-
compute_func: Optional[Callable]
|
|
39
38
|
_value_expr: Optional[exprs.Expr]
|
|
40
39
|
value_expr_dict: Optional[dict[str, Any]]
|
|
41
40
|
dependent_cols: set[Column]
|
|
@@ -43,7 +42,7 @@ class Column:
|
|
|
43
42
|
|
|
44
43
|
def __init__(
|
|
45
44
|
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
46
|
-
computed_with: Optional[
|
|
45
|
+
computed_with: Optional[exprs.Expr] = None,
|
|
47
46
|
is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
|
|
48
47
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
49
48
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
@@ -54,7 +53,7 @@ class Column:
|
|
|
54
53
|
Args:
|
|
55
54
|
name: column name; None for system columns (eg, index columns)
|
|
56
55
|
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
57
|
-
computed_with:
|
|
56
|
+
computed_with: an Expr that computes the column value
|
|
58
57
|
is_pk: if True, this column is part of the primary key
|
|
59
58
|
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
60
59
|
col_id: column ID (only used internally)
|
|
@@ -64,11 +63,6 @@ class Column:
|
|
|
64
63
|
col_type is None
|
|
65
64
|
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
66
65
|
|
|
67
|
-
``computed_with`` is a Callable:
|
|
68
|
-
- the callable's parameter names must correspond to existing columns in the table for which this Column
|
|
69
|
-
is being used
|
|
70
|
-
- ``col_type`` needs to be set to the callable's return type
|
|
71
|
-
|
|
72
66
|
``stored`` (only valid for computed image columns):
|
|
73
67
|
- if True: the column is present in the stored table
|
|
74
68
|
- if False: the column is not present in the stored table and recomputed during a query
|
|
@@ -80,22 +74,14 @@ class Column:
|
|
|
80
74
|
if col_type is None and computed_with is None:
|
|
81
75
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
82
76
|
|
|
83
|
-
self._value_expr = None
|
|
84
|
-
self.compute_func = None
|
|
77
|
+
self._value_expr: Optional[exprs.Expr] = None
|
|
85
78
|
self.value_expr_dict = value_expr_dict
|
|
86
79
|
if computed_with is not None:
|
|
87
80
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
88
81
|
if value_expr is None:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
|
|
93
|
-
f'but it is a {type(computed_with)}')
|
|
94
|
-
if col_type is None:
|
|
95
|
-
raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
|
|
96
|
-
# we need to turn the computed_with function into an Expr, but this requires resolving
|
|
97
|
-
# column name references and for that we need to wait until we're assigned to a Table
|
|
98
|
-
self.compute_func = computed_with
|
|
82
|
+
raise excs.Error(
|
|
83
|
+
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
84
|
+
f'but it is a {type(computed_with)}')
|
|
99
85
|
else:
|
|
100
86
|
self._value_expr = value_expr.copy()
|
|
101
87
|
self.col_type = self._value_expr.col_type
|
|
@@ -158,7 +144,7 @@ class Column:
|
|
|
158
144
|
|
|
159
145
|
@property
|
|
160
146
|
def is_computed(self) -> bool:
|
|
161
|
-
return self.
|
|
147
|
+
return self._value_expr is not None or self.value_expr_dict is not None
|
|
162
148
|
|
|
163
149
|
@property
|
|
164
150
|
def is_stored(self) -> bool:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Iterable, Literal, Optional, overload
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import sqlalchemy.orm as orm
|
|
@@ -13,7 +13,7 @@ from pixeltable.env import Env
|
|
|
13
13
|
from pixeltable.utils.filecache import FileCache
|
|
14
14
|
|
|
15
15
|
from .catalog import Catalog
|
|
16
|
-
from .globals import
|
|
16
|
+
from .globals import MediaValidation, UpdateStatus
|
|
17
17
|
from .table import Table
|
|
18
18
|
from .table_version import TableVersion
|
|
19
19
|
from .table_version_path import TableVersionPath
|
|
@@ -36,7 +36,7 @@ class InsertableTable(Table):
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def _create(
|
|
38
38
|
cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
|
|
39
|
-
primary_key:
|
|
39
|
+
primary_key: list[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
|
|
40
40
|
) -> InsertableTable:
|
|
41
41
|
columns = cls._create_columns(schema)
|
|
42
42
|
cls._verify_schema(columns)
|
|
@@ -79,15 +79,31 @@ class InsertableTable(Table):
|
|
|
79
79
|
|
|
80
80
|
@overload
|
|
81
81
|
def insert(
|
|
82
|
-
|
|
82
|
+
self,
|
|
83
|
+
rows: Iterable[dict[str, Any]],
|
|
84
|
+
/,
|
|
85
|
+
*,
|
|
86
|
+
print_stats: bool = False,
|
|
87
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
83
88
|
) -> UpdateStatus: ...
|
|
84
89
|
|
|
85
90
|
@overload
|
|
86
|
-
def insert(
|
|
91
|
+
def insert(
|
|
92
|
+
self,
|
|
93
|
+
*,
|
|
94
|
+
print_stats: bool = False,
|
|
95
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
96
|
+
**kwargs: Any
|
|
97
|
+
) -> UpdateStatus: ...
|
|
87
98
|
|
|
88
99
|
def insert( # type: ignore[misc]
|
|
89
|
-
|
|
90
|
-
|
|
100
|
+
self,
|
|
101
|
+
rows: Optional[Iterable[dict[str, Any]]] = None,
|
|
102
|
+
/,
|
|
103
|
+
*,
|
|
104
|
+
print_stats: bool = False,
|
|
105
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
106
|
+
**kwargs: Any
|
|
91
107
|
) -> UpdateStatus:
|
|
92
108
|
if rows is None:
|
|
93
109
|
rows = [kwargs]
|
|
@@ -96,6 +112,8 @@ class InsertableTable(Table):
|
|
|
96
112
|
if len(kwargs) > 0:
|
|
97
113
|
raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
|
|
98
114
|
|
|
115
|
+
fail_on_exception = on_error == 'abort'
|
|
116
|
+
|
|
99
117
|
if not isinstance(rows, list):
|
|
100
118
|
raise excs.Error('rows must be a list of dictionaries')
|
|
101
119
|
if len(rows) == 0:
|
|
@@ -121,7 +139,7 @@ class InsertableTable(Table):
|
|
|
121
139
|
FileCache.get().emit_eviction_warnings()
|
|
122
140
|
return status
|
|
123
141
|
|
|
124
|
-
def _validate_input_rows(self, rows:
|
|
142
|
+
def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
|
|
125
143
|
"""Verify that the input rows match the table schema"""
|
|
126
144
|
valid_col_names = set(self._schema.keys())
|
|
127
145
|
reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
|
pixeltable/catalog/table.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional,
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
@@ -125,7 +125,7 @@ class Table(SchemaObject):
|
|
|
125
125
|
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
126
126
|
"""Return a ColumnRef for the given name.
|
|
127
127
|
"""
|
|
128
|
-
return
|
|
128
|
+
return self._tbl_version_path.get_column_ref(name)
|
|
129
129
|
|
|
130
130
|
@overload
|
|
131
131
|
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
|
|
@@ -217,6 +217,12 @@ class Table(SchemaObject):
|
|
|
217
217
|
"""Return the number of rows in this table."""
|
|
218
218
|
return self._df().count()
|
|
219
219
|
|
|
220
|
+
@property
|
|
221
|
+
def columns(self) -> list[str]:
|
|
222
|
+
"""Return the names of the columns in this table. """
|
|
223
|
+
cols = self._tbl_version_path.columns()
|
|
224
|
+
return [c.name for c in cols]
|
|
225
|
+
|
|
220
226
|
@property
|
|
221
227
|
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
222
228
|
"""Return the schema (column names and column types) of this table."""
|
|
@@ -250,7 +256,7 @@ class Table(SchemaObject):
|
|
|
250
256
|
def _media_validation(self) -> MediaValidation:
|
|
251
257
|
return self._tbl_version.media_validation
|
|
252
258
|
|
|
253
|
-
def _description(self) -> pd.DataFrame:
|
|
259
|
+
def _description(self, cols: Optional[Iterable[Column]] = None) -> pd.DataFrame:
|
|
254
260
|
cols = self._tbl_version_path.columns()
|
|
255
261
|
df = pd.DataFrame({
|
|
256
262
|
'Column Name': [c.name for c in cols],
|
|
@@ -259,8 +265,8 @@ class Table(SchemaObject):
|
|
|
259
265
|
})
|
|
260
266
|
return df
|
|
261
267
|
|
|
262
|
-
def _description_html(self) -> pandas.io.formats.style.Styler:
|
|
263
|
-
pd_df = self._description()
|
|
268
|
+
def _description_html(self, cols: Optional[Iterable[Column]] = None) -> pandas.io.formats.style.Styler:
|
|
269
|
+
pd_df = self._description(cols)
|
|
264
270
|
# white-space: pre-wrap: print \n as newline
|
|
265
271
|
# th: center-align headings
|
|
266
272
|
return (
|
|
@@ -329,30 +335,74 @@ class Table(SchemaObject):
|
|
|
329
335
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
330
336
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
331
337
|
raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
|
|
332
|
-
self.add_column(
|
|
338
|
+
self.add_column(stored=None, print_stats=False, on_error='abort', **{col_name: spec})
|
|
339
|
+
|
|
340
|
+
def add_columns(
|
|
341
|
+
self,
|
|
342
|
+
schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]]
|
|
343
|
+
) -> UpdateStatus:
|
|
344
|
+
"""
|
|
345
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
|
|
346
|
+
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
347
|
+
|
|
348
|
+
The format of the `schema` argument is identical to the format of the schema in a call to
|
|
349
|
+
[`create_table()`][pixeltable.globals.create_table].
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
schema: A dictionary mapping column names to types.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Information about the execution status of the operation.
|
|
356
|
+
|
|
357
|
+
Raises:
|
|
358
|
+
Error: If any column name is invalid or already exists.
|
|
359
|
+
|
|
360
|
+
Examples:
|
|
361
|
+
Add multiple columns to the table `my_table`:
|
|
362
|
+
|
|
363
|
+
>>> tbl = pxt.get_table('my_table')
|
|
364
|
+
... schema = {
|
|
365
|
+
... 'new_col_1': pxt.Int,
|
|
366
|
+
... 'new_col_2': pxt.String,
|
|
367
|
+
... }
|
|
368
|
+
... tbl.add_columns(schema)
|
|
369
|
+
"""
|
|
370
|
+
self._check_is_dropped()
|
|
371
|
+
col_schema = {
|
|
372
|
+
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
373
|
+
for col_name, spec in schema.items()
|
|
374
|
+
}
|
|
375
|
+
new_cols = self._create_columns(col_schema)
|
|
376
|
+
for new_col in new_cols:
|
|
377
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
378
|
+
status = self._tbl_version.add_columns(new_cols, print_stats=False, on_error='abort')
|
|
379
|
+
FileCache.get().emit_eviction_warnings()
|
|
380
|
+
return status
|
|
333
381
|
|
|
382
|
+
# TODO: add_column() still supports computed columns for backward-compatibility. In the future, computed columns
|
|
383
|
+
# will be supported only through add_computed_column(). At that point, we can remove the `stored`,
|
|
384
|
+
# `print_stats`, and `on_error` parameters, and change the method body to simply call self.add_columns(kwargs),
|
|
385
|
+
# simplifying the code. For the time being, there's some obvious code duplication.
|
|
334
386
|
def add_column(
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
|
|
387
|
+
self,
|
|
388
|
+
*,
|
|
389
|
+
stored: Optional[bool] = None,
|
|
390
|
+
print_stats: bool = False,
|
|
391
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
392
|
+
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
|
|
342
393
|
) -> UpdateStatus:
|
|
343
394
|
"""
|
|
344
395
|
Adds a column to the table.
|
|
345
396
|
|
|
346
397
|
Args:
|
|
347
|
-
kwargs: Exactly one keyword argument of the form `
|
|
348
|
-
type: The type of the column. Only valid and required if `value-expression` is a Callable.
|
|
398
|
+
kwargs: Exactly one keyword argument of the form `col_name=col_type`.
|
|
349
399
|
stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
|
|
350
400
|
print_stats: If `True`, print execution metrics during evaluation.
|
|
351
401
|
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
352
402
|
row.
|
|
353
403
|
|
|
354
|
-
-
|
|
355
|
-
-
|
|
404
|
+
- `'abort'`: an exception will be raised and the column will not be added.
|
|
405
|
+
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
356
406
|
with errors will have a `None` value for the column, with information about the error stored in the
|
|
357
407
|
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
358
408
|
|
|
@@ -370,53 +420,79 @@ class Table(SchemaObject):
|
|
|
370
420
|
Alternatively, this can also be expressed as:
|
|
371
421
|
|
|
372
422
|
>>> tbl['new_col'] = pxt.Int
|
|
423
|
+
"""
|
|
424
|
+
self._check_is_dropped()
|
|
425
|
+
# verify kwargs and construct column schema dict
|
|
426
|
+
if len(kwargs) != 1:
|
|
427
|
+
raise excs.Error(
|
|
428
|
+
f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
|
|
429
|
+
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
430
|
+
)
|
|
431
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
432
|
+
if not is_valid_identifier(col_name):
|
|
433
|
+
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
434
|
+
|
|
435
|
+
col_schema: dict[str, Any] = {}
|
|
436
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
437
|
+
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
438
|
+
else:
|
|
439
|
+
col_schema['value'] = spec
|
|
440
|
+
if stored is not None:
|
|
441
|
+
col_schema['stored'] = stored
|
|
442
|
+
|
|
443
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
444
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
445
|
+
status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
446
|
+
FileCache.get().emit_eviction_warnings()
|
|
447
|
+
return status
|
|
373
448
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
449
|
+
def add_computed_column(
|
|
450
|
+
self,
|
|
451
|
+
*,
|
|
452
|
+
stored: Optional[bool] = None,
|
|
453
|
+
print_stats: bool = False,
|
|
454
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
455
|
+
**kwargs: exprs.Expr
|
|
456
|
+
) -> UpdateStatus:
|
|
457
|
+
"""
|
|
458
|
+
Adds a computed column to the table.
|
|
377
459
|
|
|
378
|
-
|
|
460
|
+
Args:
|
|
461
|
+
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
379
462
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
(by default, computed image columns are not stored but recomputed on demand):
|
|
463
|
+
Returns:
|
|
464
|
+
Information about the execution status of the operation.
|
|
383
465
|
|
|
384
|
-
|
|
466
|
+
Raises:
|
|
467
|
+
Error: If the column name is invalid or already exists.
|
|
385
468
|
|
|
386
|
-
|
|
469
|
+
Examples:
|
|
470
|
+
For a table with an image column `frame`, add an image column `rotated` that rotates the image by
|
|
471
|
+
90 degrees:
|
|
387
472
|
|
|
388
|
-
>>> tbl
|
|
473
|
+
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90))
|
|
389
474
|
|
|
390
475
|
Do the same, but now the column is unstored:
|
|
391
476
|
|
|
392
|
-
>>> tbl.
|
|
477
|
+
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
393
478
|
"""
|
|
394
479
|
self._check_is_dropped()
|
|
395
|
-
# verify kwargs and construct column schema dict
|
|
396
480
|
if len(kwargs) != 1:
|
|
397
481
|
raise excs.Error(
|
|
398
|
-
f'
|
|
399
|
-
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
482
|
+
f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
|
|
483
|
+
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
400
484
|
)
|
|
401
485
|
col_name, spec = next(iter(kwargs.items()))
|
|
402
486
|
if not is_valid_identifier(col_name):
|
|
403
487
|
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
404
|
-
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
|
|
405
|
-
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
406
488
|
|
|
407
|
-
col_schema: dict[str, Any] = {}
|
|
408
|
-
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
409
|
-
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
410
|
-
else:
|
|
411
|
-
col_schema['value'] = spec
|
|
412
|
-
if type is not None:
|
|
413
|
-
col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
|
|
489
|
+
col_schema: dict[str, Any] = {'value': spec}
|
|
414
490
|
if stored is not None:
|
|
415
491
|
col_schema['stored'] = stored
|
|
416
492
|
|
|
417
493
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
418
494
|
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
419
|
-
status = self._tbl_version.
|
|
495
|
+
status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
420
496
|
FileCache.get().emit_eviction_warnings()
|
|
421
497
|
return status
|
|
422
498
|
|
|
@@ -429,39 +505,29 @@ class Table(SchemaObject):
|
|
|
429
505
|
"""
|
|
430
506
|
assert isinstance(spec, dict)
|
|
431
507
|
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
432
|
-
has_type = False
|
|
433
508
|
for k in spec.keys():
|
|
434
509
|
if k not in valid_keys:
|
|
435
510
|
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
436
511
|
|
|
512
|
+
if 'type' not in spec and 'value' not in spec:
|
|
513
|
+
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
514
|
+
|
|
437
515
|
if 'type' in spec:
|
|
438
|
-
has_type = True
|
|
439
516
|
if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
440
517
|
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
441
518
|
|
|
442
519
|
if 'value' in spec:
|
|
443
|
-
|
|
444
|
-
value_expr = exprs.Expr.from_object(value_spec)
|
|
520
|
+
value_expr = exprs.Expr.from_object(spec['value'])
|
|
445
521
|
if value_expr is None:
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
|
|
450
|
-
f'but it is a {type(value_spec)}')
|
|
451
|
-
if 'type' not in spec:
|
|
452
|
-
raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
|
|
453
|
-
else:
|
|
454
|
-
has_type = True
|
|
455
|
-
if 'type' in spec:
|
|
456
|
-
raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
|
|
522
|
+
raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
|
|
523
|
+
if 'type' in spec:
|
|
524
|
+
raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
|
|
457
525
|
|
|
458
526
|
if 'media_validation' in spec:
|
|
459
527
|
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
|
|
460
528
|
|
|
461
529
|
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
462
530
|
raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
|
|
463
|
-
if not has_type:
|
|
464
|
-
raise excs.Error(f'Column {name}: "type" is required')
|
|
465
531
|
|
|
466
532
|
@classmethod
|
|
467
533
|
def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
|
|
@@ -475,19 +541,15 @@ class Table(SchemaObject):
|
|
|
475
541
|
stored = True
|
|
476
542
|
|
|
477
543
|
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
478
|
-
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
544
|
+
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
479
545
|
elif isinstance(spec, exprs.Expr):
|
|
480
546
|
# create copy so we can modify it
|
|
481
547
|
value_expr = spec.copy()
|
|
482
|
-
elif callable(spec):
|
|
483
|
-
raise excs.Error(
|
|
484
|
-
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
485
|
-
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
|
|
486
|
-
)
|
|
487
548
|
elif isinstance(spec, dict):
|
|
488
549
|
cls._validate_column_spec(name, spec)
|
|
489
550
|
if 'type' in spec:
|
|
490
|
-
col_type = ts.ColumnType.normalize_type(
|
|
551
|
+
col_type = ts.ColumnType.normalize_type(
|
|
552
|
+
spec['type'], nullable_default=True, allow_builtin_types=False)
|
|
491
553
|
value_expr = spec.get('value')
|
|
492
554
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
493
555
|
# create copy so we can modify it
|
|
@@ -499,6 +561,8 @@ class Table(SchemaObject):
|
|
|
499
561
|
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
|
|
500
562
|
else None
|
|
501
563
|
)
|
|
564
|
+
else:
|
|
565
|
+
raise excs.Error(f'Invalid value for column {name!r}')
|
|
502
566
|
|
|
503
567
|
column = Column(
|
|
504
568
|
name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
|
|
@@ -508,7 +572,7 @@ class Table(SchemaObject):
|
|
|
508
572
|
|
|
509
573
|
@classmethod
|
|
510
574
|
def _verify_column(
|
|
511
|
-
cls, col: Column, existing_column_names:
|
|
575
|
+
cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
|
|
512
576
|
) -> None:
|
|
513
577
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
514
578
|
if is_system_column_name(col.name):
|
|
@@ -529,7 +593,7 @@ class Table(SchemaObject):
|
|
|
529
593
|
@classmethod
|
|
530
594
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
531
595
|
"""Check integrity of user-supplied schema and set defaults"""
|
|
532
|
-
column_names:
|
|
596
|
+
column_names: set[str] = set()
|
|
533
597
|
for col in schema:
|
|
534
598
|
cls._verify_column(col, column_names)
|
|
535
599
|
column_names.add(col.name)
|
|
@@ -710,7 +774,7 @@ class Table(SchemaObject):
|
|
|
710
774
|
|
|
711
775
|
def _drop_index(
|
|
712
776
|
self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
|
|
713
|
-
_idx_class: Optional[
|
|
777
|
+
_idx_class: Optional[type[index.IndexBase]] = None
|
|
714
778
|
) -> None:
|
|
715
779
|
if self._tbl_version_path.is_snapshot():
|
|
716
780
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
@@ -741,36 +805,68 @@ class Table(SchemaObject):
|
|
|
741
805
|
|
|
742
806
|
@overload
|
|
743
807
|
def insert(
|
|
744
|
-
|
|
808
|
+
self,
|
|
809
|
+
rows: Iterable[dict[str, Any]],
|
|
810
|
+
/,
|
|
811
|
+
*,
|
|
812
|
+
print_stats: bool = False,
|
|
813
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
745
814
|
) -> UpdateStatus: ...
|
|
746
815
|
|
|
747
816
|
@overload
|
|
748
|
-
def insert(
|
|
817
|
+
def insert(
|
|
818
|
+
self,
|
|
819
|
+
*,
|
|
820
|
+
print_stats: bool = False,
|
|
821
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
822
|
+
**kwargs: Any
|
|
823
|
+
) -> UpdateStatus: ...
|
|
749
824
|
|
|
750
825
|
@abc.abstractmethod # type: ignore[misc]
|
|
751
826
|
def insert(
|
|
752
|
-
|
|
753
|
-
|
|
827
|
+
self,
|
|
828
|
+
rows: Optional[Iterable[dict[str, Any]]] = None,
|
|
829
|
+
/,
|
|
830
|
+
*,
|
|
831
|
+
print_stats: bool = False,
|
|
832
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
833
|
+
**kwargs: Any
|
|
754
834
|
) -> UpdateStatus:
|
|
755
835
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
756
836
|
|
|
757
837
|
To insert multiple rows at a time:
|
|
758
|
-
|
|
838
|
+
|
|
839
|
+
```python
|
|
840
|
+
insert(
|
|
841
|
+
rows: Iterable[dict[str, Any]],
|
|
842
|
+
/,
|
|
843
|
+
*,
|
|
844
|
+
print_stats: bool = False,
|
|
845
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
846
|
+
)```
|
|
759
847
|
|
|
760
848
|
To insert just a single row, you can use the more concise syntax:
|
|
761
|
-
|
|
849
|
+
|
|
850
|
+
```python
|
|
851
|
+
insert(
|
|
852
|
+
*,
|
|
853
|
+
print_stats: bool = False,
|
|
854
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
855
|
+
**kwargs: Any
|
|
856
|
+
)```
|
|
762
857
|
|
|
763
858
|
Args:
|
|
764
859
|
rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
|
|
765
860
|
names to values.
|
|
766
861
|
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
767
|
-
print_stats: If
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
If
|
|
772
|
-
|
|
773
|
-
|
|
862
|
+
print_stats: If `True`, print statistics about the cost of computed columns.
|
|
863
|
+
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
864
|
+
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
865
|
+
|
|
866
|
+
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
867
|
+
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
868
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
869
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
774
870
|
|
|
775
871
|
Returns:
|
|
776
872
|
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
@@ -781,7 +877,7 @@ class Table(SchemaObject):
|
|
|
781
877
|
- The table is a view or snapshot.
|
|
782
878
|
- The table has been dropped.
|
|
783
879
|
- One of the rows being inserted does not conform to the table schema.
|
|
784
|
-
- An error occurs during processing of computed columns, and `
|
|
880
|
+
- An error occurs during processing of computed columns, and `on_error='ignore'`.
|
|
785
881
|
|
|
786
882
|
Examples:
|
|
787
883
|
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
@@ -867,7 +963,7 @@ class Table(SchemaObject):
|
|
|
867
963
|
|
|
868
964
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
869
965
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
870
|
-
rowids: list[
|
|
966
|
+
rowids: list[tuple[int, ...]] = []
|
|
871
967
|
if len(pk_col_names) == 0 and not has_rowid:
|
|
872
968
|
raise excs.Error('Table must have primary key for batch update')
|
|
873
969
|
|