pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -140,12 +140,12 @@ class Column:
|
|
|
140
140
|
The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
|
|
141
141
|
{{validation_error}}
|
|
142
142
|
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
143
|
-
"""
|
|
143
|
+
""" # noqa: E501
|
|
144
144
|
)
|
|
145
145
|
.strip()
|
|
146
146
|
.format(validation_error=self._value_expr.validation_error)
|
|
147
147
|
)
|
|
148
|
-
warnings.warn(message, category=excs.PixeltableWarning)
|
|
148
|
+
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
149
149
|
return self._value_expr
|
|
150
150
|
|
|
151
151
|
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
@@ -165,8 +165,10 @@ class Column:
|
|
|
165
165
|
return False
|
|
166
166
|
from pixeltable import exprs
|
|
167
167
|
|
|
168
|
-
|
|
169
|
-
|
|
168
|
+
window_fn_calls = list(
|
|
169
|
+
self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
|
|
170
|
+
)
|
|
171
|
+
return len(window_fn_calls) > 0
|
|
170
172
|
|
|
171
173
|
def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
|
|
172
174
|
assert self.tbl is not None
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
-
import datetime
|
|
5
4
|
import json
|
|
6
5
|
import logging
|
|
7
6
|
from uuid import UUID
|
|
8
7
|
|
|
9
8
|
import sqlalchemy as sql
|
|
10
|
-
from sqlalchemy.dialects.postgresql import JSONB
|
|
11
9
|
|
|
12
10
|
from pixeltable.env import Env
|
|
13
11
|
from pixeltable.metadata import schema
|
|
@@ -24,8 +22,9 @@ class Dir(SchemaObject):
|
|
|
24
22
|
@classmethod
|
|
25
23
|
def _create(cls, parent_id: UUID, name: str) -> Dir:
|
|
26
24
|
session = Env.get().session
|
|
25
|
+
user = Env.get().user
|
|
27
26
|
assert session is not None
|
|
28
|
-
dir_md = schema.DirMd(name=name, user=
|
|
27
|
+
dir_md = schema.DirMd(name=name, user=user, additional_md={})
|
|
29
28
|
dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
|
|
30
29
|
session.add(dir_record)
|
|
31
30
|
session.flush()
|
|
@@ -48,14 +47,15 @@ class Dir(SchemaObject):
|
|
|
48
47
|
|
|
49
48
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
50
49
|
# print(
|
|
51
|
-
# f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id}
|
|
50
|
+
# f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} '
|
|
51
|
+
# f'new_name={new_name} new_dir_id={new_dir_id}'
|
|
52
52
|
# )
|
|
53
53
|
super()._move(new_name, new_dir_id)
|
|
54
54
|
stmt = sql.text(
|
|
55
55
|
(
|
|
56
56
|
f'UPDATE {schema.Dir.__table__} '
|
|
57
57
|
f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
|
|
58
|
-
f" {schema.Dir.md.name}
|
|
58
|
+
f" {schema.Dir.md.name} = jsonb_set({schema.Dir.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
59
59
|
f'WHERE {schema.Dir.id.name} = :id'
|
|
60
60
|
)
|
|
61
61
|
)
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -6,6 +6,8 @@ import itertools
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Optional
|
|
8
8
|
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
9
11
|
import pixeltable.exceptions as excs
|
|
10
12
|
|
|
11
13
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -32,7 +34,7 @@ class UpdateStatus:
|
|
|
32
34
|
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
33
35
|
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
34
36
|
|
|
35
|
-
def __iadd__(self, other: 'UpdateStatus') ->
|
|
37
|
+
def __iadd__(self, other: 'UpdateStatus') -> Self:
|
|
36
38
|
self.num_rows += other.num_rows
|
|
37
39
|
self.num_computed_values += other.num_computed_values
|
|
38
40
|
self.num_excs += other.num_excs
|
|
@@ -66,8 +68,8 @@ class MediaValidation(enum.Enum):
|
|
|
66
68
|
try:
|
|
67
69
|
return cls[name.upper()]
|
|
68
70
|
except KeyError:
|
|
69
|
-
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__
|
|
70
|
-
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
|
|
71
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
72
|
+
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]') from None
|
|
71
73
|
|
|
72
74
|
|
|
73
75
|
class IfExistsParam(enum.Enum):
|
|
@@ -81,8 +83,8 @@ class IfExistsParam(enum.Enum):
|
|
|
81
83
|
try:
|
|
82
84
|
return cls[param_val.upper()]
|
|
83
85
|
except KeyError:
|
|
84
|
-
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__
|
|
85
|
-
raise excs.Error(f'{param_name} must be one of: [{val_strs}]')
|
|
86
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
87
|
+
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
86
88
|
|
|
87
89
|
|
|
88
90
|
class IfNotExistsParam(enum.Enum):
|
|
@@ -94,28 +96,24 @@ class IfNotExistsParam(enum.Enum):
|
|
|
94
96
|
try:
|
|
95
97
|
return cls[param_val.upper()]
|
|
96
98
|
except KeyError:
|
|
97
|
-
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__
|
|
98
|
-
raise excs.Error(f'{param_name} must be one of: [{val_strs}]')
|
|
99
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
100
|
+
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
99
101
|
|
|
100
102
|
|
|
101
|
-
def is_valid_identifier(name: str) -> bool:
|
|
102
|
-
return name.isidentifier() and not name.startswith('_')
|
|
103
|
+
def is_valid_identifier(name: str, allow_system_identifiers: bool = False) -> bool:
|
|
104
|
+
return name.isidentifier() and (allow_system_identifiers or not name.startswith('_'))
|
|
103
105
|
|
|
104
106
|
|
|
105
|
-
def is_valid_path(path: str, empty_is_valid: bool) -> bool:
|
|
107
|
+
def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
|
|
106
108
|
if path == '':
|
|
107
109
|
return empty_is_valid
|
|
108
|
-
|
|
109
|
-
for part in path.split('.'):
|
|
110
|
-
if not is_valid_identifier(part):
|
|
111
|
-
return False
|
|
112
|
-
return True
|
|
110
|
+
return all(is_valid_identifier(part, allow_system_paths) for part in path.split('.'))
|
|
113
111
|
|
|
114
112
|
|
|
115
113
|
def is_system_column_name(name: str) -> bool:
|
|
116
114
|
from pixeltable.catalog import InsertableTable, View
|
|
117
115
|
|
|
118
|
-
global _PREDEF_SYMBOLS
|
|
116
|
+
global _PREDEF_SYMBOLS # noqa: PLW0603
|
|
119
117
|
if _PREDEF_SYMBOLS is None:
|
|
120
118
|
_PREDEF_SYMBOLS = set(itertools.chain(dir(InsertableTable), dir(View)))
|
|
121
119
|
return name == _POS_COLUMN_NAME or name in _PREDEF_SYMBOLS
|
|
@@ -2,12 +2,11 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, overload
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
|
-
import
|
|
10
|
-
from pixeltable import exceptions as excs
|
|
9
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
11
10
|
from pixeltable.env import Env
|
|
12
11
|
from pixeltable.utils.filecache import FileCache
|
|
13
12
|
|
|
@@ -18,9 +17,8 @@ from .table_version_handle import TableVersionHandle
|
|
|
18
17
|
from .table_version_path import TableVersionPath
|
|
19
18
|
|
|
20
19
|
if TYPE_CHECKING:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from pixeltable.globals import RowData, TableDataSource
|
|
20
|
+
from pixeltable import exprs
|
|
21
|
+
from pixeltable.globals import TableDataSource
|
|
24
22
|
from pixeltable.io.table_data_conduit import TableDataConduit
|
|
25
23
|
|
|
26
24
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -167,7 +165,7 @@ class InsertableTable(Table):
|
|
|
167
165
|
self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
|
|
168
166
|
) -> pxt.UpdateStatus:
|
|
169
167
|
"""Insert row batches into this table from a `TableDataConduit`."""
|
|
170
|
-
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
168
|
+
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
171
169
|
|
|
172
170
|
status = pxt.UpdateStatus()
|
|
173
171
|
with Env.get().begin_xact():
|
|
@@ -213,7 +211,7 @@ class InsertableTable(Table):
|
|
|
213
211
|
msg = str(e)
|
|
214
212
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
|
|
215
213
|
|
|
216
|
-
def delete(self, where: Optional['
|
|
214
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
217
215
|
"""Delete rows in this table.
|
|
218
216
|
|
|
219
217
|
Args:
|
pixeltable/catalog/path.py
CHANGED
|
@@ -11,8 +11,8 @@ _logger = logging.getLogger('pixeltable')
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Path:
|
|
14
|
-
def __init__(self, path: str, empty_is_valid: bool = False):
|
|
15
|
-
if not is_valid_path(path, empty_is_valid):
|
|
14
|
+
def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
|
|
15
|
+
if not is_valid_path(path, empty_is_valid, allow_system_paths):
|
|
16
16
|
raise excs.Error(f"Invalid path format: '{path}'")
|
|
17
17
|
self.components = path.split('.')
|
|
18
18
|
|
|
@@ -27,7 +27,11 @@ class Path:
|
|
|
27
27
|
|
|
28
28
|
@property
|
|
29
29
|
def is_root(self) -> bool:
|
|
30
|
-
return self.components[0]
|
|
30
|
+
return not self.components[0]
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def is_system_path(self) -> bool:
|
|
34
|
+
return self.components[0].startswith('_')
|
|
31
35
|
|
|
32
36
|
@property
|
|
33
37
|
def parent(self) -> Path:
|
|
@@ -35,15 +39,15 @@ class Path:
|
|
|
35
39
|
if self.is_root:
|
|
36
40
|
return self
|
|
37
41
|
else:
|
|
38
|
-
return Path('', empty_is_valid=True)
|
|
42
|
+
return Path('', empty_is_valid=True, allow_system_paths=True)
|
|
39
43
|
else:
|
|
40
|
-
return Path('.'.join(self.components[:-1]))
|
|
44
|
+
return Path('.'.join(self.components[:-1]), allow_system_paths=True)
|
|
41
45
|
|
|
42
46
|
def append(self, name: str) -> Path:
|
|
43
47
|
if self.is_root:
|
|
44
|
-
return Path(name)
|
|
48
|
+
return Path(name, allow_system_paths=True)
|
|
45
49
|
else:
|
|
46
|
-
return Path(f'{
|
|
50
|
+
return Path(f'{self}.{name}', allow_system_paths=True)
|
|
47
51
|
|
|
48
52
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
49
53
|
"""
|
|
@@ -67,6 +71,9 @@ class Path:
|
|
|
67
71
|
for i in range(0, len(self.components)):
|
|
68
72
|
yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
|
|
69
73
|
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
return repr(str(self))
|
|
76
|
+
|
|
70
77
|
def __str__(self) -> str:
|
|
71
78
|
return '.'.join(self.components)
|
|
72
79
|
|
pixeltable/catalog/table.py
CHANGED
|
@@ -15,14 +15,9 @@ import pandas as pd
|
|
|
15
15
|
import sqlalchemy as sql
|
|
16
16
|
|
|
17
17
|
import pixeltable as pxt
|
|
18
|
-
import
|
|
19
|
-
import pixeltable.env as env
|
|
20
|
-
import pixeltable.exceptions as excs
|
|
21
|
-
import pixeltable.exprs as exprs
|
|
22
|
-
import pixeltable.index as index
|
|
23
|
-
import pixeltable.metadata.schema as schema
|
|
24
|
-
import pixeltable.type_system as ts
|
|
18
|
+
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
25
19
|
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.metadata import schema
|
|
26
21
|
|
|
27
22
|
from ..exprs import ColumnRef
|
|
28
23
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -38,16 +33,14 @@ from .globals import (
|
|
|
38
33
|
is_valid_identifier,
|
|
39
34
|
)
|
|
40
35
|
from .schema_object import SchemaObject
|
|
41
|
-
from .table_version import TableVersion
|
|
42
36
|
from .table_version_handle import TableVersionHandle
|
|
43
37
|
from .table_version_path import TableVersionPath
|
|
44
38
|
|
|
45
39
|
if TYPE_CHECKING:
|
|
46
|
-
import datasets # type: ignore[import-untyped]
|
|
47
40
|
import torch.utils.data
|
|
48
41
|
|
|
49
42
|
import pixeltable.plan
|
|
50
|
-
from pixeltable.globals import
|
|
43
|
+
from pixeltable.globals import TableDataSource
|
|
51
44
|
|
|
52
45
|
_logger = logging.getLogger('pixeltable')
|
|
53
46
|
|
|
@@ -82,7 +75,7 @@ class Table(SchemaObject):
|
|
|
82
75
|
(
|
|
83
76
|
f'UPDATE {schema.Table.__table__} '
|
|
84
77
|
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
85
|
-
f" {schema.Table.md.name}
|
|
78
|
+
f" {schema.Table.md.name} = jsonb_set({schema.Table.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
86
79
|
f'WHERE {schema.Table.id.name} = :id'
|
|
87
80
|
)
|
|
88
81
|
)
|
|
@@ -102,6 +95,7 @@ class Table(SchemaObject):
|
|
|
102
95
|
'col1': StringType(),
|
|
103
96
|
'col2': IntType(),
|
|
104
97
|
},
|
|
98
|
+
'is_replica': False,
|
|
105
99
|
'version': 22,
|
|
106
100
|
'schema_version': 1,
|
|
107
101
|
'comment': '',
|
|
@@ -117,6 +111,7 @@ class Table(SchemaObject):
|
|
|
117
111
|
md = super().get_metadata()
|
|
118
112
|
md['base'] = self._base._path() if self._base is not None else None
|
|
119
113
|
md['schema'] = self._schema
|
|
114
|
+
md['is_replica'] = self._tbl_version.get().is_replica
|
|
120
115
|
md['version'] = self._version
|
|
121
116
|
md['schema_version'] = self._tbl_version.get().schema_version
|
|
122
117
|
md['comment'] = self._comment
|
|
@@ -146,14 +141,14 @@ class Table(SchemaObject):
|
|
|
146
141
|
if self._is_dropped:
|
|
147
142
|
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
148
143
|
|
|
149
|
-
def __getattr__(self, name: str) -> '
|
|
144
|
+
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
150
145
|
"""Return a ColumnRef for the given name."""
|
|
151
146
|
col = self._tbl_version_path.get_column(name)
|
|
152
147
|
if col is None:
|
|
153
148
|
raise AttributeError(f'Column {name!r} unknown')
|
|
154
149
|
return ColumnRef(col)
|
|
155
150
|
|
|
156
|
-
def __getitem__(self, name: str) -> '
|
|
151
|
+
def __getitem__(self, name: str) -> 'exprs.ColumnRef':
|
|
157
152
|
"""Return a ColumnRef for the given name."""
|
|
158
153
|
return getattr(self, name)
|
|
159
154
|
|
|
@@ -232,15 +227,15 @@ class Table(SchemaObject):
|
|
|
232
227
|
"""Return rows from this table."""
|
|
233
228
|
return self._df().collect()
|
|
234
229
|
|
|
235
|
-
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
230
|
+
def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
236
231
|
"""Return rows from this table."""
|
|
237
232
|
return self._df().show(*args, **kwargs)
|
|
238
233
|
|
|
239
|
-
def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
234
|
+
def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
240
235
|
"""Return the first n rows inserted into this table."""
|
|
241
236
|
return self._df().head(*args, **kwargs)
|
|
242
237
|
|
|
243
|
-
def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
238
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
244
239
|
"""Return the last n rows inserted into this table."""
|
|
245
240
|
return self._df().tail(*args, **kwargs)
|
|
246
241
|
|
|
@@ -287,7 +282,7 @@ class Table(SchemaObject):
|
|
|
287
282
|
return self._tbl_version.get().comment
|
|
288
283
|
|
|
289
284
|
@property
|
|
290
|
-
def _num_retained_versions(self):
|
|
285
|
+
def _num_retained_versions(self) -> int:
|
|
291
286
|
return self._tbl_version.get().num_retained_versions
|
|
292
287
|
|
|
293
288
|
@property
|
|
@@ -406,12 +401,12 @@ class Table(SchemaObject):
|
|
|
406
401
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
407
402
|
"""Returns True if the column has dependents, False otherwise."""
|
|
408
403
|
assert col is not None
|
|
409
|
-
assert col.name in self._schema
|
|
404
|
+
assert col.name in self._schema
|
|
410
405
|
if any(c.name is not None for c in col.dependent_cols):
|
|
411
406
|
return True
|
|
412
407
|
return any(
|
|
413
408
|
col in store.get_local_columns()
|
|
414
|
-
for view in
|
|
409
|
+
for view in (self, *self._get_views(recursive=True))
|
|
415
410
|
for store in view._tbl_version.get().external_stores.values()
|
|
416
411
|
)
|
|
417
412
|
|
|
@@ -429,7 +424,7 @@ class Table(SchemaObject):
|
|
|
429
424
|
raise excs.Error(f'Duplicate column name: {new_col_name!r}')
|
|
430
425
|
elif if_exists == IfExistsParam.IGNORE:
|
|
431
426
|
cols_to_ignore.append(new_col_name)
|
|
432
|
-
elif if_exists
|
|
427
|
+
elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
|
|
433
428
|
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
434
429
|
# for views, it is possible that the existing column
|
|
435
430
|
# is a base table column; in that case, we should not
|
|
@@ -440,7 +435,8 @@ class Table(SchemaObject):
|
|
|
440
435
|
# replace directive if column has dependents.
|
|
441
436
|
if self._column_has_dependents(col):
|
|
442
437
|
raise excs.Error(
|
|
443
|
-
f'Column {new_col_name!r} already exists and has dependents.
|
|
438
|
+
f'Column {new_col_name!r} already exists and has dependents. '
|
|
439
|
+
f'Cannot {if_exists.name.lower()} it.'
|
|
444
440
|
)
|
|
445
441
|
self.drop_column(new_col_name)
|
|
446
442
|
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
@@ -452,8 +448,8 @@ class Table(SchemaObject):
|
|
|
452
448
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
453
449
|
) -> UpdateStatus:
|
|
454
450
|
"""
|
|
455
|
-
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
456
|
-
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
451
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
452
|
+
columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
457
453
|
|
|
458
454
|
The format of the `schema` argument is identical to the format of the schema in a call to
|
|
459
455
|
[`create_table()`][pixeltable.globals.create_table].
|
|
@@ -464,10 +460,12 @@ class Table(SchemaObject):
|
|
|
464
460
|
|
|
465
461
|
- `'error'`: an exception will be raised.
|
|
466
462
|
- `'ignore'`: do nothing and return.
|
|
467
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
463
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
464
|
+
dependents.
|
|
468
465
|
|
|
469
466
|
Note that the `if_exists` parameter is applied to all columns in the schema.
|
|
470
|
-
To apply different behaviors to different columns, please use
|
|
467
|
+
To apply different behaviors to different columns, please use
|
|
468
|
+
[`add_column()`][pixeltable.Table.add_column] for each column.
|
|
471
469
|
|
|
472
470
|
Returns:
|
|
473
471
|
Information about the execution status of the operation.
|
|
@@ -528,7 +526,8 @@ class Table(SchemaObject):
|
|
|
528
526
|
|
|
529
527
|
- `'error'`: an exception will be raised.
|
|
530
528
|
- `'ignore'`: do nothing and return.
|
|
531
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
529
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
530
|
+
no dependents.
|
|
532
531
|
|
|
533
532
|
Returns:
|
|
534
533
|
Information about the execution status of the operation.
|
|
@@ -559,7 +558,7 @@ class Table(SchemaObject):
|
|
|
559
558
|
col_type = next(iter(kwargs.values()))
|
|
560
559
|
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
561
560
|
raise excs.Error(
|
|
562
|
-
|
|
561
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
563
562
|
)
|
|
564
563
|
return self.add_columns(kwargs, if_exists=if_exists)
|
|
565
564
|
|
|
@@ -590,7 +589,8 @@ class Table(SchemaObject):
|
|
|
590
589
|
|
|
591
590
|
- `'error'`: an exception will be raised.
|
|
592
591
|
- `'ignore'`: do nothing and return.
|
|
593
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
592
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
593
|
+
no dependents.
|
|
594
594
|
|
|
595
595
|
Returns:
|
|
596
596
|
Information about the execution status of the operation.
|
|
@@ -614,7 +614,8 @@ class Table(SchemaObject):
|
|
|
614
614
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
615
615
|
if len(kwargs) != 1:
|
|
616
616
|
raise excs.Error(
|
|
617
|
-
f'add_computed_column() requires exactly one keyword argument of the form
|
|
617
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
618
|
+
'"column-name=type|value-expression"; '
|
|
618
619
|
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
619
620
|
)
|
|
620
621
|
col_name, spec = next(iter(kwargs.items()))
|
|
@@ -625,6 +626,15 @@ class Table(SchemaObject):
|
|
|
625
626
|
if stored is not None:
|
|
626
627
|
col_schema['stored'] = stored
|
|
627
628
|
|
|
629
|
+
# Raise an error if the column expression refers to a column error property
|
|
630
|
+
if isinstance(spec, exprs.Expr):
|
|
631
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
632
|
+
if e.is_error_prop():
|
|
633
|
+
raise excs.Error(
|
|
634
|
+
'Use of a reference to an error property of another column is not allowed in a computed '
|
|
635
|
+
f'column. The specified computation for this column contains this reference: `{e!r}`'
|
|
636
|
+
)
|
|
637
|
+
|
|
628
638
|
with Env.get().begin_xact():
|
|
629
639
|
# handle existing columns based on if_exists parameter
|
|
630
640
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
@@ -651,16 +661,15 @@ class Table(SchemaObject):
|
|
|
651
661
|
"""
|
|
652
662
|
assert isinstance(spec, dict)
|
|
653
663
|
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
654
|
-
for k in spec
|
|
664
|
+
for k in spec:
|
|
655
665
|
if k not in valid_keys:
|
|
656
666
|
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
657
667
|
|
|
658
668
|
if 'type' not in spec and 'value' not in spec:
|
|
659
669
|
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
660
670
|
|
|
661
|
-
if 'type' in spec:
|
|
662
|
-
|
|
663
|
-
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
671
|
+
if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
672
|
+
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
664
673
|
|
|
665
674
|
if 'value' in spec:
|
|
666
675
|
value_expr = exprs.Expr.from_object(spec['value'])
|
|
@@ -682,7 +691,7 @@ class Table(SchemaObject):
|
|
|
682
691
|
for name, spec in schema.items():
|
|
683
692
|
col_type: Optional[ts.ColumnType] = None
|
|
684
693
|
value_expr: Optional[exprs.Expr] = None
|
|
685
|
-
primary_key:
|
|
694
|
+
primary_key: bool = False
|
|
686
695
|
media_validation: Optional[catalog.MediaValidation] = None
|
|
687
696
|
stored = True
|
|
688
697
|
|
|
@@ -704,7 +713,7 @@ class Table(SchemaObject):
|
|
|
704
713
|
value_expr = value_expr.copy()
|
|
705
714
|
value_expr.bind_rel_paths()
|
|
706
715
|
stored = spec.get('stored', True)
|
|
707
|
-
primary_key = spec.get('primary_key')
|
|
716
|
+
primary_key = spec.get('primary_key', False)
|
|
708
717
|
media_validation_str = spec.get('media_validation')
|
|
709
718
|
media_validation = (
|
|
710
719
|
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
|
|
@@ -740,8 +749,8 @@ class Table(SchemaObject):
|
|
|
740
749
|
if col.stored is False and col.has_window_fn_call():
|
|
741
750
|
raise excs.Error(
|
|
742
751
|
(
|
|
743
|
-
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a
|
|
744
|
-
f'function'
|
|
752
|
+
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
|
|
753
|
+
f'streaming function'
|
|
745
754
|
)
|
|
746
755
|
)
|
|
747
756
|
|
|
@@ -787,21 +796,21 @@ class Table(SchemaObject):
|
|
|
787
796
|
if self._tbl_version_path.is_snapshot():
|
|
788
797
|
raise excs.Error('Cannot drop column from a snapshot.')
|
|
789
798
|
col: Column = None
|
|
790
|
-
|
|
799
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
791
800
|
if isinstance(column, str):
|
|
792
801
|
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
793
802
|
if col is None:
|
|
794
|
-
if
|
|
803
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
795
804
|
raise excs.Error(f'Column {column!r} unknown')
|
|
796
|
-
assert
|
|
805
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
797
806
|
return
|
|
798
807
|
col = self._tbl_version.get().cols_by_name[column]
|
|
799
808
|
else:
|
|
800
809
|
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
801
810
|
if not exists:
|
|
802
|
-
if
|
|
811
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
803
812
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
804
|
-
assert
|
|
813
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
805
814
|
return
|
|
806
815
|
col = column.col
|
|
807
816
|
|
|
@@ -817,7 +826,7 @@ class Table(SchemaObject):
|
|
|
817
826
|
# (transitive) views of this table.
|
|
818
827
|
dependent_stores = [
|
|
819
828
|
(view, store)
|
|
820
|
-
for view in
|
|
829
|
+
for view in (self, *self._get_views(recursive=True))
|
|
821
830
|
for store in view._tbl_version.get().external_stores.values()
|
|
822
831
|
if col in store.get_local_columns()
|
|
823
832
|
]
|
|
@@ -924,7 +933,8 @@ class Table(SchemaObject):
|
|
|
924
933
|
- `'replace'` or `'replace_force'`: replace the existing index with the new one.
|
|
925
934
|
|
|
926
935
|
Raises:
|
|
927
|
-
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
936
|
+
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
937
|
+
the specified column does not exist.
|
|
928
938
|
|
|
929
939
|
Examples:
|
|
930
940
|
Add an index to the `img` column of the table `my_table`:
|
|
@@ -962,18 +972,18 @@ class Table(SchemaObject):
|
|
|
962
972
|
|
|
963
973
|
with Env.get().begin_xact():
|
|
964
974
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
965
|
-
|
|
975
|
+
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
966
976
|
# An index with the same name already exists.
|
|
967
977
|
# Handle it according to if_exists.
|
|
968
|
-
if
|
|
978
|
+
if if_exists_ == IfExistsParam.ERROR:
|
|
969
979
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
970
980
|
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
971
981
|
raise excs.Error(
|
|
972
|
-
f'Index `{idx_name}` is not an embedding index. Cannot {
|
|
982
|
+
f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
|
|
973
983
|
)
|
|
974
|
-
if
|
|
984
|
+
if if_exists_ == IfExistsParam.IGNORE:
|
|
975
985
|
return
|
|
976
|
-
assert
|
|
986
|
+
assert if_exists_ in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
|
|
977
987
|
self.drop_index(idx_name=idx_name)
|
|
978
988
|
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
979
989
|
from pixeltable.index import EmbeddingIndex
|
|
@@ -986,7 +996,7 @@ class Table(SchemaObject):
|
|
|
986
996
|
idx = EmbeddingIndex(
|
|
987
997
|
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
988
998
|
)
|
|
989
|
-
|
|
999
|
+
_ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
990
1000
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
991
1001
|
FileCache.get().emit_eviction_warnings()
|
|
992
1002
|
|
|
@@ -1141,11 +1151,11 @@ class Table(SchemaObject):
|
|
|
1141
1151
|
assert (col is None) != (idx_name is None)
|
|
1142
1152
|
|
|
1143
1153
|
if idx_name is not None:
|
|
1144
|
-
|
|
1154
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1145
1155
|
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1146
|
-
if
|
|
1156
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1147
1157
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1148
|
-
assert
|
|
1158
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1149
1159
|
return
|
|
1150
1160
|
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
1151
1161
|
else:
|
|
@@ -1157,10 +1167,10 @@ class Table(SchemaObject):
|
|
|
1157
1167
|
if _idx_class is not None:
|
|
1158
1168
|
idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
|
|
1159
1169
|
if len(idx_info_list) == 0:
|
|
1160
|
-
|
|
1161
|
-
if
|
|
1170
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1171
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1162
1172
|
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
1163
|
-
assert
|
|
1173
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1164
1174
|
return
|
|
1165
1175
|
if len(idx_info_list) > 1:
|
|
1166
1176
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
@@ -1274,7 +1284,7 @@ class Table(SchemaObject):
|
|
|
1274
1284
|
raise NotImplementedError
|
|
1275
1285
|
|
|
1276
1286
|
def update(
|
|
1277
|
-
self, value_spec: dict[str, Any], where: Optional['
|
|
1287
|
+
self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
|
|
1278
1288
|
) -> UpdateStatus:
|
|
1279
1289
|
"""Update rows in this table.
|
|
1280
1290
|
|
|
@@ -1341,7 +1351,7 @@ class Table(SchemaObject):
|
|
|
1341
1351
|
rows = list(rows)
|
|
1342
1352
|
|
|
1343
1353
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1344
|
-
pk_col_names =
|
|
1354
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1345
1355
|
|
|
1346
1356
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1347
1357
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
@@ -1351,16 +1361,16 @@ class Table(SchemaObject):
|
|
|
1351
1361
|
|
|
1352
1362
|
for row_spec in rows:
|
|
1353
1363
|
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1354
|
-
row_spec, allow_pk=not has_rowid, allow_exprs=False
|
|
1364
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1355
1365
|
)
|
|
1356
1366
|
if has_rowid:
|
|
1357
1367
|
# we expect the _rowid column to be present for each row
|
|
1358
1368
|
assert _ROWID_COLUMN_NAME in row_spec
|
|
1359
1369
|
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1360
1370
|
else:
|
|
1361
|
-
col_names =
|
|
1371
|
+
col_names = {col.name for col in col_vals}
|
|
1362
1372
|
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1363
|
-
missing_cols = pk_col_names -
|
|
1373
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1364
1374
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1365
1375
|
row_updates.append(col_vals)
|
|
1366
1376
|
|
|
@@ -1375,7 +1385,7 @@ class Table(SchemaObject):
|
|
|
1375
1385
|
FileCache.get().emit_eviction_warnings()
|
|
1376
1386
|
return status
|
|
1377
1387
|
|
|
1378
|
-
def delete(self, where: Optional['
|
|
1388
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
1379
1389
|
"""Delete rows in this table.
|
|
1380
1390
|
|
|
1381
1391
|
Args:
|