pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -2,154 +2,192 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import builtins
|
|
5
|
+
import datetime
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
8
|
+
from keyword import iskeyword as is_python_keyword
|
|
7
9
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
|
|
12
13
|
import pandas as pd
|
|
13
14
|
import sqlalchemy as sql
|
|
15
|
+
from typing_extensions import overload
|
|
14
16
|
|
|
15
17
|
import pixeltable as pxt
|
|
16
|
-
import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
19
|
+
from pixeltable.catalog.table_metadata import (
|
|
20
|
+
ColumnMetadata,
|
|
21
|
+
EmbeddingIndexParams,
|
|
22
|
+
IndexMetadata,
|
|
23
|
+
TableMetadata,
|
|
24
|
+
VersionMetadata,
|
|
25
|
+
)
|
|
26
|
+
from pixeltable.metadata import schema
|
|
27
|
+
from pixeltable.metadata.utils import MetadataUtils
|
|
28
|
+
from pixeltable.utils.object_stores import ObjectOps
|
|
23
29
|
|
|
24
30
|
from ..exprs import ColumnRef
|
|
25
31
|
from ..utils.description_helper import DescriptionHelper
|
|
26
32
|
from ..utils.filecache import FileCache
|
|
27
33
|
from .column import Column
|
|
28
|
-
from .globals import
|
|
34
|
+
from .globals import (
|
|
35
|
+
_ROWID_COLUMN_NAME,
|
|
36
|
+
IfExistsParam,
|
|
37
|
+
IfNotExistsParam,
|
|
38
|
+
MediaValidation,
|
|
39
|
+
is_system_column_name,
|
|
40
|
+
is_valid_identifier,
|
|
41
|
+
)
|
|
29
42
|
from .schema_object import SchemaObject
|
|
30
|
-
from .
|
|
43
|
+
from .table_version_handle import TableVersionHandle
|
|
31
44
|
from .table_version_path import TableVersionPath
|
|
45
|
+
from .update_status import UpdateStatus
|
|
46
|
+
|
|
47
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
48
|
+
|
|
32
49
|
|
|
33
50
|
if TYPE_CHECKING:
|
|
34
51
|
import torch.utils.data
|
|
52
|
+
|
|
35
53
|
import pixeltable.plan
|
|
54
|
+
from pixeltable.globals import TableDataSource
|
|
55
|
+
|
|
36
56
|
|
|
37
57
|
_logger = logging.getLogger('pixeltable')
|
|
38
58
|
|
|
59
|
+
|
|
39
60
|
class Table(SchemaObject):
|
|
40
61
|
"""
|
|
41
62
|
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
42
63
|
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
64
|
+
|
|
65
|
+
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
66
|
+
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
43
67
|
"""
|
|
44
|
-
|
|
45
|
-
#
|
|
68
|
+
|
|
69
|
+
# the chain of TableVersions needed to run queries and supply metadata (eg, schema)
|
|
70
|
+
_tbl_version_path: TableVersionPath
|
|
71
|
+
|
|
72
|
+
# the physical TableVersion backing this Table; None for pure snapshots
|
|
73
|
+
_tbl_version: TableVersionHandle | None
|
|
46
74
|
|
|
47
75
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
48
76
|
super().__init__(id, name, dir_id)
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
self.__query_scope = self.QueryScope(self)
|
|
77
|
+
self._tbl_version_path = tbl_version_path
|
|
78
|
+
self._tbl_version = None
|
|
52
79
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
80
|
+
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
81
|
+
old_name = self._name
|
|
82
|
+
old_dir_id = self._dir_id
|
|
56
83
|
|
|
57
|
-
|
|
58
|
-
self.__table = table
|
|
59
|
-
self._queries = {}
|
|
84
|
+
cat = catalog.Catalog.get()
|
|
60
85
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
86
|
+
@cat.register_undo_action
|
|
87
|
+
def _() -> None:
|
|
88
|
+
# TODO: We should really be invalidating the Table instance and forcing a reload.
|
|
89
|
+
self._name = old_name
|
|
90
|
+
self._dir_id = old_dir_id
|
|
65
91
|
|
|
66
|
-
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
67
|
-
self._check_is_dropped()
|
|
68
92
|
super()._move(new_name, new_dir_id)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
f
|
|
73
|
-
f
|
|
74
|
-
f"
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
93
|
+
conn = env.Env.get().conn
|
|
94
|
+
stmt = sql.text(
|
|
95
|
+
(
|
|
96
|
+
f'UPDATE {schema.Table.__table__} '
|
|
97
|
+
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
98
|
+
f" {schema.Table.md.name} = jsonb_set({schema.Table.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
99
|
+
f'WHERE {schema.Table.id.name} = :id'
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
103
|
+
|
|
104
|
+
# this is duplicated from SchemaObject so that our API docs show the docstring for Table
|
|
105
|
+
def get_metadata(self) -> 'TableMetadata':
|
|
78
106
|
"""
|
|
79
107
|
Retrieves metadata associated with this table.
|
|
80
108
|
|
|
81
109
|
Returns:
|
|
82
|
-
A
|
|
83
|
-
|
|
84
|
-
```python
|
|
85
|
-
{
|
|
86
|
-
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
87
|
-
'schema': {
|
|
88
|
-
'col1': StringType(),
|
|
89
|
-
'col2': IntType(),
|
|
90
|
-
},
|
|
91
|
-
'version': 22,
|
|
92
|
-
'schema_version': 1,
|
|
93
|
-
'comment': '',
|
|
94
|
-
'num_retained_versions': 10,
|
|
95
|
-
'is_view': False,
|
|
96
|
-
'is_snapshot': False,
|
|
97
|
-
'media_validation': 'on_write',
|
|
98
|
-
}
|
|
99
|
-
```
|
|
110
|
+
A [TableMetadata][pixeltable.TableMetadata] instance containing this table's metadata.
|
|
100
111
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
112
|
+
from pixeltable.catalog import retry_loop
|
|
113
|
+
|
|
114
|
+
@retry_loop(for_write=False)
|
|
115
|
+
def op() -> 'TableMetadata':
|
|
116
|
+
return self._get_metadata()
|
|
117
|
+
|
|
118
|
+
return op()
|
|
119
|
+
|
|
120
|
+
def _get_metadata(self) -> TableMetadata:
|
|
121
|
+
tvp = self._tbl_version_path
|
|
122
|
+
tv = tvp.tbl_version.get()
|
|
123
|
+
columns = tvp.columns()
|
|
124
|
+
column_info: dict[str, ColumnMetadata] = {}
|
|
125
|
+
for col in columns:
|
|
126
|
+
column_info[col.name] = ColumnMetadata(
|
|
127
|
+
name=col.name,
|
|
128
|
+
type_=col.col_type._to_str(as_schema=True),
|
|
129
|
+
version_added=col.schema_version_add,
|
|
130
|
+
is_stored=col.is_stored,
|
|
131
|
+
is_primary_key=col.is_pk,
|
|
132
|
+
media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
|
|
133
|
+
computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
|
|
134
|
+
defined_in=col.get_tbl().name,
|
|
135
|
+
)
|
|
121
136
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
137
|
+
indices = tv.idxs_by_name.values()
|
|
138
|
+
index_info: dict[str, IndexMetadata] = {}
|
|
139
|
+
for info in indices:
|
|
140
|
+
if isinstance(info.idx, index.EmbeddingIndex):
|
|
141
|
+
col_ref = ColumnRef(info.col)
|
|
142
|
+
embedding = info.idx.embeddings[info.col.col_type._type](col_ref)
|
|
143
|
+
index_info[info.name] = IndexMetadata(
|
|
144
|
+
name=info.name,
|
|
145
|
+
columns=[info.col.name],
|
|
146
|
+
index_type='embedding',
|
|
147
|
+
parameters=EmbeddingIndexParams(
|
|
148
|
+
metric=info.idx.metric.name.lower(), # type: ignore[typeddict-item]
|
|
149
|
+
embedding=str(embedding),
|
|
150
|
+
embedding_functions=[str(fn) for fn in info.idx.embeddings.values()],
|
|
151
|
+
),
|
|
152
|
+
)
|
|
127
153
|
|
|
128
|
-
|
|
129
|
-
|
|
154
|
+
return TableMetadata(
|
|
155
|
+
name=self._name,
|
|
156
|
+
path=self._path(),
|
|
157
|
+
columns=column_info,
|
|
158
|
+
indices=index_info,
|
|
159
|
+
is_replica=tv.is_replica,
|
|
160
|
+
is_view=False,
|
|
161
|
+
is_snapshot=False,
|
|
162
|
+
version=self._get_version(),
|
|
163
|
+
version_created=datetime.datetime.fromtimestamp(tv.created_at, tz=datetime.timezone.utc),
|
|
164
|
+
schema_version=tvp.schema_version(),
|
|
165
|
+
comment=self._get_comment(),
|
|
166
|
+
media_validation=self._get_media_validation().name.lower(), # type: ignore[typeddict-item]
|
|
167
|
+
base=None,
|
|
168
|
+
)
|
|
130
169
|
|
|
131
|
-
def
|
|
132
|
-
|
|
133
|
-
|
|
170
|
+
def _get_version(self) -> int:
|
|
171
|
+
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
172
|
+
return self._tbl_version_path.version()
|
|
134
173
|
|
|
135
|
-
def
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return self._tbl_version_path.get_column_ref(name)
|
|
174
|
+
def _get_pxt_uri(self) -> str | None:
|
|
175
|
+
with catalog.Catalog.get().begin_xact(tbl_id=self._id):
|
|
176
|
+
return catalog.Catalog.get().get_additional_md(self._id).get('pxt_uri')
|
|
139
177
|
|
|
140
|
-
|
|
141
|
-
|
|
178
|
+
def __hash__(self) -> int:
|
|
179
|
+
return hash(self._tbl_version_path.tbl_id)
|
|
142
180
|
|
|
143
|
-
|
|
144
|
-
|
|
181
|
+
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
182
|
+
"""Return a ColumnRef for the given name."""
|
|
183
|
+
col = self._tbl_version_path.get_column(name)
|
|
184
|
+
if col is None:
|
|
185
|
+
raise AttributeError(f'Unknown column: {name}')
|
|
186
|
+
return ColumnRef(col, reference_tbl=self._tbl_version_path)
|
|
145
187
|
|
|
146
|
-
def __getitem__(self,
|
|
147
|
-
"""Return a ColumnRef
|
|
148
|
-
|
|
149
|
-
if isinstance(index, str):
|
|
150
|
-
return getattr(self, index)
|
|
151
|
-
else:
|
|
152
|
-
return self._df()[index]
|
|
188
|
+
def __getitem__(self, name: str) -> 'exprs.ColumnRef':
|
|
189
|
+
"""Return a ColumnRef for the given name."""
|
|
190
|
+
return getattr(self, name)
|
|
153
191
|
|
|
154
192
|
def list_views(self, *, recursive: bool = True) -> list[str]:
|
|
155
193
|
"""
|
|
@@ -162,130 +200,160 @@ class Table(SchemaObject):
|
|
|
162
200
|
Returns:
|
|
163
201
|
A list of view paths.
|
|
164
202
|
"""
|
|
165
|
-
|
|
166
|
-
return [t._path for t in self._get_views(recursive=recursive)]
|
|
203
|
+
from pixeltable.catalog import retry_loop
|
|
167
204
|
|
|
168
|
-
|
|
169
|
-
|
|
205
|
+
# we need retry_loop() here, because we end up loading Tables for the views
|
|
206
|
+
@retry_loop(tbl=self._tbl_version_path, for_write=False)
|
|
207
|
+
def op() -> list[str]:
|
|
208
|
+
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
209
|
+
|
|
210
|
+
return op()
|
|
211
|
+
|
|
212
|
+
def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
|
|
213
|
+
cat = catalog.Catalog.get()
|
|
214
|
+
view_ids = cat.get_view_ids(self._id)
|
|
215
|
+
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
216
|
+
if mutable_only:
|
|
217
|
+
views = [t for t in views if t._tbl_version_path.is_mutable()]
|
|
170
218
|
if recursive:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
219
|
+
views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
|
|
220
|
+
return views
|
|
221
|
+
|
|
222
|
+
def select(self, *items: Any, **named_items: Any) -> 'pxt.Query':
|
|
223
|
+
"""Select columns or expressions from this table.
|
|
174
224
|
|
|
175
|
-
|
|
176
|
-
"""Return a DataFrame for this table.
|
|
225
|
+
See [`Query.select`][pixeltable.Query.select] for more details.
|
|
177
226
|
"""
|
|
178
|
-
|
|
227
|
+
from pixeltable.catalog import Catalog
|
|
179
228
|
from pixeltable.plan import FromClause
|
|
180
|
-
return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
|
|
181
229
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
230
|
+
query = pxt.Query(FromClause(tbls=[self._tbl_version_path]))
|
|
231
|
+
if len(items) == 0 and len(named_items) == 0:
|
|
232
|
+
return query # Select(*); no further processing is necessary
|
|
185
233
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
return self._df().select(*items, **named_items)
|
|
234
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
235
|
+
return query.select(*items, **named_items)
|
|
189
236
|
|
|
190
|
-
def where(self, pred: 'exprs.Expr') -> 'pxt.
|
|
191
|
-
"""
|
|
192
|
-
|
|
237
|
+
def where(self, pred: 'exprs.Expr') -> 'pxt.Query':
|
|
238
|
+
"""Filter rows from this table based on the expression.
|
|
239
|
+
|
|
240
|
+
See [`Query.where`][pixeltable.Query.where] for more details.
|
|
241
|
+
"""
|
|
242
|
+
from pixeltable.catalog import Catalog
|
|
243
|
+
|
|
244
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
245
|
+
return self.select().where(pred)
|
|
193
246
|
|
|
194
247
|
def join(
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return self._df().join(other, on=on, how=how)
|
|
248
|
+
self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
|
|
249
|
+
) -> 'pxt.Query':
|
|
250
|
+
"""Join this table with another table."""
|
|
251
|
+
from pixeltable.catalog import Catalog
|
|
200
252
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
return self._df().order_by(*items, asc=asc)
|
|
253
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
254
|
+
return self.select().join(other, on=on, how=how)
|
|
204
255
|
|
|
205
|
-
def
|
|
206
|
-
"""
|
|
207
|
-
return self._df().group_by(*items)
|
|
256
|
+
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.Query':
|
|
257
|
+
"""Order the rows of this table based on the expression.
|
|
208
258
|
|
|
209
|
-
|
|
210
|
-
|
|
259
|
+
See [`Query.order_by`][pixeltable.Query.order_by] for more details.
|
|
260
|
+
"""
|
|
261
|
+
from pixeltable.catalog import Catalog
|
|
211
262
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
return self._df().collect()
|
|
263
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
264
|
+
return self.select().order_by(*items, asc=asc)
|
|
215
265
|
|
|
216
|
-
def
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
266
|
+
def group_by(self, *items: 'exprs.Expr') -> 'pxt.Query':
|
|
267
|
+
"""Group the rows of this table based on the expression.
|
|
268
|
+
|
|
269
|
+
See [`Query.group_by`][pixeltable.Query.group_by] for more details.
|
|
220
270
|
"""
|
|
221
|
-
|
|
271
|
+
from pixeltable.catalog import Catalog
|
|
272
|
+
|
|
273
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
274
|
+
return self.select().group_by(*items)
|
|
275
|
+
|
|
276
|
+
def distinct(self) -> 'pxt.Query':
|
|
277
|
+
"""Remove duplicate rows from table."""
|
|
278
|
+
return self.select().distinct()
|
|
279
|
+
|
|
280
|
+
def limit(self, n: int) -> 'pxt.Query':
|
|
281
|
+
return self.select().limit(n)
|
|
282
|
+
|
|
283
|
+
def sample(
|
|
284
|
+
self,
|
|
285
|
+
n: int | None = None,
|
|
286
|
+
n_per_stratum: int | None = None,
|
|
287
|
+
fraction: float | None = None,
|
|
288
|
+
seed: int | None = None,
|
|
289
|
+
stratify_by: Any = None,
|
|
290
|
+
) -> pxt.Query:
|
|
291
|
+
"""Choose a shuffled sample of rows
|
|
292
|
+
|
|
293
|
+
See [`Query.sample`][pixeltable.Query.sample] for more details.
|
|
294
|
+
"""
|
|
295
|
+
return self.select().sample(
|
|
296
|
+
n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def collect(self) -> 'pxt._query.ResultSet':
|
|
300
|
+
"""Return rows from this table."""
|
|
301
|
+
return self.select().collect()
|
|
302
|
+
|
|
303
|
+
def show(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
|
|
304
|
+
"""Return rows from this table."""
|
|
305
|
+
return self.select().show(*args, **kwargs)
|
|
222
306
|
|
|
223
|
-
def head(
|
|
224
|
-
self, *args, **kwargs
|
|
225
|
-
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
307
|
+
def head(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
|
|
226
308
|
"""Return the first n rows inserted into this table."""
|
|
227
|
-
return self.
|
|
309
|
+
return self.select().head(*args, **kwargs)
|
|
228
310
|
|
|
229
|
-
def tail(
|
|
230
|
-
self, *args, **kwargs
|
|
231
|
-
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
311
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
|
|
232
312
|
"""Return the last n rows inserted into this table."""
|
|
233
|
-
return self.
|
|
313
|
+
return self.select().tail(*args, **kwargs)
|
|
234
314
|
|
|
235
315
|
def count(self) -> int:
|
|
236
316
|
"""Return the number of rows in this table."""
|
|
237
|
-
return self.
|
|
317
|
+
return self.select().count()
|
|
238
318
|
|
|
239
|
-
@property
|
|
240
319
|
def columns(self) -> list[str]:
|
|
241
|
-
"""Return the names of the columns in this table.
|
|
320
|
+
"""Return the names of the columns in this table."""
|
|
242
321
|
cols = self._tbl_version_path.columns()
|
|
243
322
|
return [c.name for c in cols]
|
|
244
323
|
|
|
245
|
-
|
|
246
|
-
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
324
|
+
def _get_schema(self) -> dict[str, ts.ColumnType]:
|
|
247
325
|
"""Return the schema (column names and column types) of this table."""
|
|
248
326
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
249
327
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
"""Return the names of the registered queries for this table."""
|
|
253
|
-
return list(self.__query_scope._queries.keys())
|
|
328
|
+
def get_base_table(self) -> 'Table' | None:
|
|
329
|
+
return self._get_base_table()
|
|
254
330
|
|
|
255
|
-
@
|
|
256
|
-
def
|
|
257
|
-
"""
|
|
258
|
-
The base table of this `Table`. If this table is a view, returns the `Table`
|
|
259
|
-
from which it was derived. Otherwise, returns `None`.
|
|
260
|
-
"""
|
|
261
|
-
if self._tbl_version_path.base is None:
|
|
262
|
-
return None
|
|
263
|
-
base_id = self._tbl_version_path.base.tbl_version.id
|
|
264
|
-
return catalog.Catalog.get().tbls[base_id]
|
|
331
|
+
@abc.abstractmethod
|
|
332
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
333
|
+
"""The base's Table instance. Requires a transaction context"""
|
|
265
334
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
"""
|
|
271
|
-
bases = []
|
|
272
|
-
base = self._base
|
|
335
|
+
def _get_base_tables(self) -> list['Table']:
|
|
336
|
+
"""The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
|
|
337
|
+
bases: list[Table] = []
|
|
338
|
+
base = self._get_base_table()
|
|
273
339
|
while base is not None:
|
|
274
340
|
bases.append(base)
|
|
275
|
-
base = base.
|
|
341
|
+
base = base._get_base_table()
|
|
276
342
|
return bases
|
|
277
343
|
|
|
278
344
|
@property
|
|
279
|
-
|
|
280
|
-
|
|
345
|
+
@abc.abstractmethod
|
|
346
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
347
|
+
"""The effective versions of the ancestor bases, starting with its immediate base."""
|
|
281
348
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
return self._tbl_version.num_retained_versions
|
|
349
|
+
def _get_comment(self) -> str:
|
|
350
|
+
return self._tbl_version_path.comment()
|
|
285
351
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
352
|
+
def _get_num_retained_versions(self) -> int:
|
|
353
|
+
return self._tbl_version_path.num_retained_versions()
|
|
354
|
+
|
|
355
|
+
def _get_media_validation(self) -> MediaValidation:
|
|
356
|
+
return self._tbl_version_path.media_validation()
|
|
289
357
|
|
|
290
358
|
def __repr__(self) -> str:
|
|
291
359
|
return self._descriptors().to_string()
|
|
@@ -297,74 +365,56 @@ class Table(SchemaObject):
|
|
|
297
365
|
"""
|
|
298
366
|
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
299
367
|
"""
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
helper.append(
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
else:
|
|
318
|
-
title = f'View\n{self._path!r}'
|
|
319
|
-
title += f'\n(of {self.__bases_to_desc()})'
|
|
320
|
-
return title
|
|
321
|
-
|
|
322
|
-
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
368
|
+
from pixeltable.catalog import Catalog
|
|
369
|
+
|
|
370
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
371
|
+
helper = DescriptionHelper()
|
|
372
|
+
helper.append(self._table_descriptor())
|
|
373
|
+
helper.append(self._col_descriptor())
|
|
374
|
+
idxs = self._index_descriptor()
|
|
375
|
+
if not idxs.empty:
|
|
376
|
+
helper.append(idxs)
|
|
377
|
+
stores = self._external_store_descriptor()
|
|
378
|
+
if not stores.empty:
|
|
379
|
+
helper.append(stores)
|
|
380
|
+
if self._get_comment():
|
|
381
|
+
helper.append(f'COMMENT: {self._get_comment()}')
|
|
382
|
+
return helper
|
|
383
|
+
|
|
384
|
+
def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
|
|
323
385
|
return pd.DataFrame(
|
|
324
386
|
{
|
|
325
387
|
'Column Name': col.name,
|
|
326
388
|
'Type': col.col_type._to_str(as_schema=True),
|
|
327
|
-
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
|
|
389
|
+
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else '',
|
|
328
390
|
}
|
|
329
|
-
for col in self.
|
|
391
|
+
for col in self._tbl_version_path.columns()
|
|
330
392
|
if columns is None or col.name in columns
|
|
331
393
|
)
|
|
332
394
|
|
|
333
|
-
def
|
|
334
|
-
bases = self._bases
|
|
335
|
-
assert len(bases) >= 1
|
|
336
|
-
if len(bases) <= 2:
|
|
337
|
-
return ', '.join(repr(b._path) for b in bases)
|
|
338
|
-
else:
|
|
339
|
-
return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
|
|
340
|
-
|
|
341
|
-
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
395
|
+
def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
|
|
342
396
|
from pixeltable import index
|
|
343
397
|
|
|
398
|
+
if self._tbl_version is None:
|
|
399
|
+
return pd.DataFrame([])
|
|
344
400
|
pd_rows = []
|
|
345
|
-
for name, info in self._tbl_version.idxs_by_name.items():
|
|
401
|
+
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
346
402
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
embed_str = f'{display_embed} (+1)'
|
|
350
|
-
else:
|
|
351
|
-
embed_str = str(display_embed)
|
|
403
|
+
col_ref = ColumnRef(info.col)
|
|
404
|
+
embedding = info.idx.embeddings[info.col.col_type._type](col_ref)
|
|
352
405
|
row = {
|
|
353
406
|
'Index Name': name,
|
|
354
407
|
'Column': info.col.name,
|
|
355
408
|
'Metric': str(info.idx.metric.name.lower()),
|
|
356
|
-
'Embedding':
|
|
409
|
+
'Embedding': str(embedding),
|
|
357
410
|
}
|
|
358
411
|
pd_rows.append(row)
|
|
359
412
|
return pd.DataFrame(pd_rows)
|
|
360
413
|
|
|
361
414
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
362
415
|
pd_rows = []
|
|
363
|
-
for name, store in self.
|
|
364
|
-
row = {
|
|
365
|
-
'External Store': name,
|
|
366
|
-
'Type': type(store).__name__,
|
|
367
|
-
}
|
|
416
|
+
for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
|
|
417
|
+
row = {'External Store': name, 'Type': type(store).__name__}
|
|
368
418
|
pd_rows.append(row)
|
|
369
419
|
return pd.DataFrame(pd_rows)
|
|
370
420
|
|
|
@@ -372,77 +422,103 @@ class Table(SchemaObject):
|
|
|
372
422
|
"""
|
|
373
423
|
Print the table schema.
|
|
374
424
|
"""
|
|
375
|
-
self._check_is_dropped()
|
|
376
425
|
if getattr(builtins, '__IPYTHON__', False):
|
|
377
|
-
from IPython.display import display
|
|
378
|
-
|
|
426
|
+
from IPython.display import Markdown, display
|
|
427
|
+
|
|
428
|
+
display(Markdown(self._repr_html_()))
|
|
379
429
|
else:
|
|
380
430
|
print(repr(self))
|
|
381
431
|
|
|
382
|
-
def _drop(self) -> None:
|
|
383
|
-
cat = catalog.Catalog.get()
|
|
384
|
-
# verify all dependents are deleted by now
|
|
385
|
-
for dep in cat.tbl_dependents[self._id]:
|
|
386
|
-
assert dep._is_dropped
|
|
387
|
-
self._check_is_dropped()
|
|
388
|
-
self._tbl_version.drop()
|
|
389
|
-
self._is_dropped = True
|
|
390
|
-
# update catalog
|
|
391
|
-
cat = catalog.Catalog.get()
|
|
392
|
-
del cat.tbls[self._id]
|
|
393
|
-
|
|
394
432
|
# TODO Factor this out into a separate module.
|
|
395
433
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
396
|
-
def to_pytorch_dataset(self, image_format
|
|
434
|
+
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
397
435
|
"""Return a PyTorch Dataset for this table.
|
|
398
|
-
|
|
436
|
+
See Query.to_pytorch_dataset()
|
|
399
437
|
"""
|
|
400
|
-
return self.
|
|
438
|
+
return self.select().to_pytorch_dataset(image_format=image_format)
|
|
401
439
|
|
|
402
440
|
def to_coco_dataset(self) -> Path:
|
|
403
441
|
"""Return the path to a COCO json file for this table.
|
|
404
|
-
|
|
405
|
-
"""
|
|
406
|
-
return self._df().to_coco_dataset()
|
|
407
|
-
|
|
408
|
-
def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
|
|
442
|
+
See Query.to_coco_dataset()
|
|
409
443
|
"""
|
|
410
|
-
|
|
444
|
+
return self.select().to_coco_dataset()
|
|
411
445
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
is
|
|
446
|
+
def _column_has_dependents(self, col: Column) -> bool:
|
|
447
|
+
"""Returns True if the column has dependents, False otherwise."""
|
|
448
|
+
assert col is not None
|
|
449
|
+
assert col.name in self._get_schema()
|
|
450
|
+
cat = catalog.Catalog.get()
|
|
451
|
+
if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
|
|
452
|
+
return True
|
|
453
|
+
assert self._tbl_version is not None
|
|
454
|
+
return any(
|
|
455
|
+
col in store.get_local_columns()
|
|
456
|
+
for view in (self, *self._get_views(recursive=True))
|
|
457
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
458
|
+
)
|
|
415
459
|
|
|
416
|
-
|
|
460
|
+
def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
|
|
461
|
+
"""Check and handle existing columns in the new column specification based on the if_exists parameter.
|
|
417
462
|
|
|
418
|
-
|
|
463
|
+
If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
|
|
419
464
|
"""
|
|
420
|
-
self.
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
465
|
+
assert self._tbl_version is not None
|
|
466
|
+
existing_col_names = set(self._get_schema().keys())
|
|
467
|
+
cols_to_ignore = []
|
|
468
|
+
for new_col_name in new_col_names:
|
|
469
|
+
if new_col_name in existing_col_names:
|
|
470
|
+
if if_exists == IfExistsParam.ERROR:
|
|
471
|
+
raise excs.Error(f'Duplicate column name: {new_col_name}')
|
|
472
|
+
elif if_exists == IfExistsParam.IGNORE:
|
|
473
|
+
cols_to_ignore.append(new_col_name)
|
|
474
|
+
elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
|
|
475
|
+
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
476
|
+
# for views, it is possible that the existing column
|
|
477
|
+
# is a base table column; in that case, we should not
|
|
478
|
+
# drop/replace that column. Continue to raise error.
|
|
479
|
+
raise excs.Error(f'Column {new_col_name!r} is a base table column. Cannot replace it.')
|
|
480
|
+
col = self._tbl_version.get().cols_by_name[new_col_name]
|
|
481
|
+
# cannot drop a column with dependents; so reject
|
|
482
|
+
# replace directive if column has dependents.
|
|
483
|
+
if self._column_has_dependents(col):
|
|
484
|
+
raise excs.Error(
|
|
485
|
+
f'Column {new_col_name!r} already exists and has dependents. '
|
|
486
|
+
f'Cannot {if_exists.name.lower()} it.'
|
|
487
|
+
)
|
|
488
|
+
self.drop_column(new_col_name)
|
|
489
|
+
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
490
|
+
return cols_to_ignore
|
|
426
491
|
|
|
427
492
|
def add_columns(
|
|
428
493
|
self,
|
|
429
|
-
schema: dict[str,
|
|
494
|
+
schema: dict[str, ts.ColumnType | builtins.type | _GenericAlias],
|
|
495
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
430
496
|
) -> UpdateStatus:
|
|
431
497
|
"""
|
|
432
|
-
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
433
|
-
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
498
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
499
|
+
columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
434
500
|
|
|
435
|
-
The format of the `schema` argument is
|
|
436
|
-
[`create_table()`][pixeltable.globals.create_table].
|
|
501
|
+
The format of the `schema` argument is a dict mapping column names to their types.
|
|
437
502
|
|
|
438
503
|
Args:
|
|
439
504
|
schema: A dictionary mapping column names to types.
|
|
505
|
+
if_exists: Determines the behavior if a column already exists. Must be one of the following:
|
|
506
|
+
|
|
507
|
+
- `'error'`: an exception will be raised.
|
|
508
|
+
- `'ignore'`: do nothing and return.
|
|
509
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
510
|
+
dependents.
|
|
511
|
+
|
|
512
|
+
Note that the `if_exists` parameter is applied to all columns in the schema.
|
|
513
|
+
To apply different behaviors to different columns, please use
|
|
514
|
+
[`add_column()`][pixeltable.Table.add_column] for each column.
|
|
440
515
|
|
|
441
516
|
Returns:
|
|
442
517
|
Information about the execution status of the operation.
|
|
443
518
|
|
|
444
519
|
Raises:
|
|
445
|
-
Error: If any column name is invalid or already exists
|
|
520
|
+
Error: If any column name is invalid, or already exists and `if_exists='error'`,
|
|
521
|
+
or `if_exists='replace*'` but the column has dependents or is a basetable column.
|
|
446
522
|
|
|
447
523
|
Examples:
|
|
448
524
|
Add multiple columns to the table `my_table`:
|
|
@@ -454,50 +530,60 @@ class Table(SchemaObject):
|
|
|
454
530
|
... }
|
|
455
531
|
... tbl.add_columns(schema)
|
|
456
532
|
"""
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
533
|
+
from pixeltable.catalog import Catalog
|
|
534
|
+
|
|
535
|
+
# lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
|
|
536
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
537
|
+
self.__check_mutable('add columns to')
|
|
538
|
+
col_schema = {
|
|
539
|
+
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
540
|
+
for col_name, spec in schema.items()
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
# handle existing columns based on if_exists parameter
|
|
544
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
545
|
+
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
546
|
+
)
|
|
547
|
+
# if all columns to be added already exist and user asked to ignore
|
|
548
|
+
# existing columns, there's nothing to do.
|
|
549
|
+
for cname in cols_to_ignore:
|
|
550
|
+
assert cname in col_schema
|
|
551
|
+
del col_schema[cname]
|
|
552
|
+
result = UpdateStatus()
|
|
553
|
+
if len(col_schema) == 0:
|
|
554
|
+
return result
|
|
555
|
+
new_cols = self._create_columns(col_schema)
|
|
556
|
+
for new_col in new_cols:
|
|
557
|
+
self._verify_column(new_col)
|
|
558
|
+
assert self._tbl_version is not None
|
|
559
|
+
result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
560
|
+
FileCache.get().emit_eviction_warnings()
|
|
561
|
+
return result
|
|
562
|
+
|
|
473
563
|
def add_column(
|
|
474
564
|
self,
|
|
475
565
|
*,
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
479
|
-
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
|
|
566
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
567
|
+
**kwargs: ts.ColumnType | builtins.type | _GenericAlias | exprs.Expr,
|
|
480
568
|
) -> UpdateStatus:
|
|
481
569
|
"""
|
|
482
|
-
Adds
|
|
570
|
+
Adds an ordinary (non-computed) column to the table.
|
|
483
571
|
|
|
484
572
|
Args:
|
|
485
573
|
kwargs: Exactly one keyword argument of the form `col_name=col_type`.
|
|
486
|
-
|
|
487
|
-
print_stats: If `True`, print execution metrics during evaluation.
|
|
488
|
-
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
489
|
-
row.
|
|
574
|
+
if_exists: Determines the behavior if the column already exists. Must be one of the following:
|
|
490
575
|
|
|
491
|
-
- `'
|
|
492
|
-
- `'ignore'`:
|
|
493
|
-
|
|
494
|
-
|
|
576
|
+
- `'error'`: an exception will be raised.
|
|
577
|
+
- `'ignore'`: do nothing and return.
|
|
578
|
+
- `'replace'` or `'replace_force'`: drop the existing column and add the new column, if it has
|
|
579
|
+
no dependents.
|
|
495
580
|
|
|
496
581
|
Returns:
|
|
497
582
|
Information about the execution status of the operation.
|
|
498
583
|
|
|
499
584
|
Raises:
|
|
500
|
-
Error: If the column name is invalid or already exists
|
|
585
|
+
Error: If the column name is invalid, or already exists and `if_exists='erorr'`,
|
|
586
|
+
or `if_exists='replace*'` but the column has dependents or is a basetable column.
|
|
501
587
|
|
|
502
588
|
Examples:
|
|
503
589
|
Add an int column:
|
|
@@ -506,52 +592,59 @@ class Table(SchemaObject):
|
|
|
506
592
|
|
|
507
593
|
Alternatively, this can also be expressed as:
|
|
508
594
|
|
|
509
|
-
>>> tbl
|
|
595
|
+
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
510
596
|
"""
|
|
511
|
-
self._check_is_dropped()
|
|
512
597
|
# verify kwargs and construct column schema dict
|
|
513
598
|
if len(kwargs) != 1:
|
|
514
599
|
raise excs.Error(
|
|
515
|
-
f'add_column() requires exactly one keyword argument of the form
|
|
516
|
-
f'got {len(kwargs)} instead ({", ".join(
|
|
600
|
+
f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
|
|
601
|
+
f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
|
|
517
602
|
)
|
|
518
|
-
|
|
519
|
-
if not
|
|
520
|
-
raise excs.Error(
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
525
|
-
else:
|
|
526
|
-
col_schema['value'] = spec
|
|
527
|
-
if stored is not None:
|
|
528
|
-
col_schema['stored'] = stored
|
|
529
|
-
|
|
530
|
-
new_col = self._create_columns({col_name: col_schema})[0]
|
|
531
|
-
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
532
|
-
status = self._tbl_version.add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
533
|
-
FileCache.get().emit_eviction_warnings()
|
|
534
|
-
return status
|
|
603
|
+
col_type = next(iter(kwargs.values()))
|
|
604
|
+
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
605
|
+
raise excs.Error(
|
|
606
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
607
|
+
)
|
|
608
|
+
return self.add_columns(kwargs, if_exists=if_exists)
|
|
535
609
|
|
|
536
610
|
def add_computed_column(
|
|
537
611
|
self,
|
|
538
612
|
*,
|
|
539
|
-
stored:
|
|
613
|
+
stored: bool | None = None,
|
|
614
|
+
destination: str | Path | None = None,
|
|
540
615
|
print_stats: bool = False,
|
|
541
616
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
542
|
-
|
|
617
|
+
if_exists: Literal['error', 'ignore', 'replace'] = 'error',
|
|
618
|
+
**kwargs: exprs.Expr,
|
|
543
619
|
) -> UpdateStatus:
|
|
544
620
|
"""
|
|
545
621
|
Adds a computed column to the table.
|
|
546
622
|
|
|
547
623
|
Args:
|
|
548
624
|
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
625
|
+
stored: Whether the column is materialized and stored or computed on demand.
|
|
626
|
+
destination: An object store reference for persisting computed files.
|
|
627
|
+
print_stats: If `True`, print execution metrics during evaluation.
|
|
628
|
+
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
629
|
+
row.
|
|
630
|
+
|
|
631
|
+
- `'abort'`: an exception will be raised and the column will not be added.
|
|
632
|
+
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
633
|
+
with errors will have a `None` value for the column, with information about the error stored in the
|
|
634
|
+
corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
|
|
635
|
+
if_exists: Determines the behavior if the column already exists. Must be one of the following:
|
|
636
|
+
|
|
637
|
+
- `'error'`: an exception will be raised.
|
|
638
|
+
- `'ignore'`: do nothing and return.
|
|
639
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
640
|
+
no dependents.
|
|
549
641
|
|
|
550
642
|
Returns:
|
|
551
643
|
Information about the execution status of the operation.
|
|
552
644
|
|
|
553
645
|
Raises:
|
|
554
|
-
Error: If the column name is invalid or already exists
|
|
646
|
+
Error: If the column name is invalid or already exists and `if_exists='error'`,
|
|
647
|
+
or `if_exists='replace*'` but the column has dependents or is a basetable column.
|
|
555
648
|
|
|
556
649
|
Examples:
|
|
557
650
|
For a table with an image column `frame`, add an image column `rotated` that rotates the image by
|
|
@@ -563,25 +656,53 @@ class Table(SchemaObject):
|
|
|
563
656
|
|
|
564
657
|
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
565
658
|
"""
|
|
566
|
-
|
|
567
|
-
if len(kwargs) != 1:
|
|
568
|
-
raise excs.Error(
|
|
569
|
-
f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
|
|
570
|
-
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
571
|
-
)
|
|
572
|
-
col_name, spec = next(iter(kwargs.items()))
|
|
573
|
-
if not is_valid_identifier(col_name):
|
|
574
|
-
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
575
|
-
|
|
576
|
-
col_schema: dict[str, Any] = {'value': spec}
|
|
577
|
-
if stored is not None:
|
|
578
|
-
col_schema['stored'] = stored
|
|
659
|
+
from pixeltable.catalog import Catalog
|
|
579
660
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
661
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
662
|
+
self.__check_mutable('add columns to')
|
|
663
|
+
if len(kwargs) != 1:
|
|
664
|
+
raise excs.Error(
|
|
665
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
666
|
+
'`col_name=col_type` or `col_name=expression`; '
|
|
667
|
+
f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
|
|
668
|
+
)
|
|
669
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
670
|
+
if not is_valid_identifier(col_name):
|
|
671
|
+
raise excs.Error(f'Invalid column name: {col_name}')
|
|
672
|
+
|
|
673
|
+
col_schema: dict[str, Any] = {'value': spec}
|
|
674
|
+
if stored is not None:
|
|
675
|
+
col_schema['stored'] = stored
|
|
676
|
+
|
|
677
|
+
if destination is not None:
|
|
678
|
+
col_schema['destination'] = destination
|
|
679
|
+
|
|
680
|
+
# Raise an error if the column expression refers to a column error property
|
|
681
|
+
if isinstance(spec, exprs.Expr):
|
|
682
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
683
|
+
if e.is_cellmd_prop():
|
|
684
|
+
raise excs.Error(
|
|
685
|
+
f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
|
|
686
|
+
f'is not allowed in a computed column.'
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# handle existing columns based on if_exists parameter
|
|
690
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
691
|
+
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
692
|
+
)
|
|
693
|
+
# if the column to add already exists and user asked to ignore
|
|
694
|
+
# existing column, there's nothing to do.
|
|
695
|
+
result = UpdateStatus()
|
|
696
|
+
if len(cols_to_ignore) != 0:
|
|
697
|
+
assert cols_to_ignore[0] == col_name
|
|
698
|
+
return result
|
|
699
|
+
|
|
700
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
701
|
+
self._verify_column(new_col)
|
|
702
|
+
assert self._tbl_version is not None
|
|
703
|
+
result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
704
|
+
FileCache.get().emit_eviction_warnings()
|
|
705
|
+
return result
|
|
585
706
|
|
|
586
707
|
@classmethod
|
|
587
708
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -591,118 +712,132 @@ class Table(SchemaObject):
|
|
|
591
712
|
(on account of containing Python Callables or Exprs).
|
|
592
713
|
"""
|
|
593
714
|
assert isinstance(spec, dict)
|
|
594
|
-
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
595
|
-
for k in spec
|
|
715
|
+
valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
|
|
716
|
+
for k in spec:
|
|
596
717
|
if k not in valid_keys:
|
|
597
|
-
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
718
|
+
raise excs.Error(f'Column {name!r}: invalid key {k!r}')
|
|
598
719
|
|
|
599
720
|
if 'type' not in spec and 'value' not in spec:
|
|
600
|
-
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
721
|
+
raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
|
|
601
722
|
|
|
602
|
-
if 'type' in spec:
|
|
603
|
-
|
|
604
|
-
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
723
|
+
if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
724
|
+
raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
|
|
605
725
|
|
|
606
726
|
if 'value' in spec:
|
|
607
727
|
value_expr = exprs.Expr.from_object(spec['value'])
|
|
608
728
|
if value_expr is None:
|
|
609
|
-
raise excs.Error(f
|
|
729
|
+
raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
|
|
610
730
|
if 'type' in spec:
|
|
611
|
-
raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
|
|
731
|
+
raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
|
|
612
732
|
|
|
613
733
|
if 'media_validation' in spec:
|
|
614
|
-
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
|
|
734
|
+
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
|
|
615
735
|
|
|
616
736
|
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
617
|
-
raise excs.Error(f
|
|
737
|
+
raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
|
|
738
|
+
|
|
739
|
+
d = spec.get('destination')
|
|
740
|
+
if d is not None and not isinstance(d, (str, Path)):
|
|
741
|
+
raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
|
|
618
742
|
|
|
619
743
|
@classmethod
|
|
620
744
|
def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
|
|
621
745
|
"""Construct list of Columns, given schema"""
|
|
622
746
|
columns: list[Column] = []
|
|
623
747
|
for name, spec in schema.items():
|
|
624
|
-
col_type:
|
|
625
|
-
value_expr:
|
|
626
|
-
primary_key:
|
|
627
|
-
media_validation:
|
|
748
|
+
col_type: ts.ColumnType | None = None
|
|
749
|
+
value_expr: exprs.Expr | None = None
|
|
750
|
+
primary_key: bool = False
|
|
751
|
+
media_validation: catalog.MediaValidation | None = None
|
|
628
752
|
stored = True
|
|
753
|
+
destination: str | None = None
|
|
629
754
|
|
|
630
755
|
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
631
756
|
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
632
757
|
elif isinstance(spec, exprs.Expr):
|
|
633
758
|
# create copy so we can modify it
|
|
634
759
|
value_expr = spec.copy()
|
|
760
|
+
value_expr.bind_rel_paths()
|
|
635
761
|
elif isinstance(spec, dict):
|
|
636
762
|
cls._validate_column_spec(name, spec)
|
|
637
763
|
if 'type' in spec:
|
|
638
764
|
col_type = ts.ColumnType.normalize_type(
|
|
639
|
-
spec['type'], nullable_default=True, allow_builtin_types=False
|
|
765
|
+
spec['type'], nullable_default=True, allow_builtin_types=False
|
|
766
|
+
)
|
|
640
767
|
value_expr = spec.get('value')
|
|
641
768
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
642
769
|
# create copy so we can modify it
|
|
643
770
|
value_expr = value_expr.copy()
|
|
771
|
+
value_expr.bind_rel_paths()
|
|
644
772
|
stored = spec.get('stored', True)
|
|
645
|
-
primary_key = spec.get('primary_key')
|
|
773
|
+
primary_key = spec.get('primary_key', False)
|
|
646
774
|
media_validation_str = spec.get('media_validation')
|
|
647
775
|
media_validation = (
|
|
648
|
-
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
|
|
649
|
-
else None
|
|
776
|
+
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
|
|
650
777
|
)
|
|
778
|
+
destination = spec.get('destination')
|
|
651
779
|
else:
|
|
652
780
|
raise excs.Error(f'Invalid value for column {name!r}')
|
|
653
781
|
|
|
654
782
|
column = Column(
|
|
655
|
-
name,
|
|
656
|
-
|
|
783
|
+
name,
|
|
784
|
+
col_type=col_type,
|
|
785
|
+
computed_with=value_expr,
|
|
786
|
+
stored=stored,
|
|
787
|
+
is_pk=primary_key,
|
|
788
|
+
media_validation=media_validation,
|
|
789
|
+
destination=destination,
|
|
790
|
+
)
|
|
791
|
+
# Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
|
|
792
|
+
# media destination, it gets validated at this time.
|
|
793
|
+
ObjectOps.validate_destination(column.destination, column.name)
|
|
657
794
|
columns.append(column)
|
|
795
|
+
|
|
658
796
|
return columns
|
|
659
797
|
|
|
660
798
|
@classmethod
|
|
661
|
-
def
|
|
662
|
-
|
|
663
|
-
|
|
799
|
+
def validate_column_name(cls, name: str) -> None:
|
|
800
|
+
"""Check that a name is usable as a pixeltable column name"""
|
|
801
|
+
if is_system_column_name(name) or is_python_keyword(name):
|
|
802
|
+
raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
803
|
+
if not is_valid_identifier(name):
|
|
804
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
805
|
+
|
|
806
|
+
@classmethod
|
|
807
|
+
def _verify_column(cls, col: Column) -> None:
|
|
664
808
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
raise excs.Error(f"Invalid column name: {col.name!r}")
|
|
669
|
-
if col.name in existing_column_names:
|
|
670
|
-
raise excs.Error(f'Duplicate column name: {col.name!r}')
|
|
671
|
-
if existing_query_names is not None and col.name in existing_query_names:
|
|
672
|
-
raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
|
|
673
|
-
if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
|
|
674
|
-
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
|
|
809
|
+
cls.validate_column_name(col.name)
|
|
810
|
+
if col.stored is False and not col.is_computed:
|
|
811
|
+
raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
|
|
675
812
|
if col.stored is False and col.has_window_fn_call():
|
|
676
|
-
raise excs.Error(
|
|
677
|
-
|
|
678
|
-
|
|
813
|
+
raise excs.Error(
|
|
814
|
+
(
|
|
815
|
+
f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
|
|
816
|
+
f'streaming function'
|
|
817
|
+
)
|
|
818
|
+
)
|
|
819
|
+
if col._explicit_destination is not None and not (col.stored and col.is_computed):
|
|
820
|
+
raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
|
|
679
821
|
|
|
680
822
|
@classmethod
|
|
681
823
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
682
824
|
"""Check integrity of user-supplied schema and set defaults"""
|
|
683
|
-
column_names: set[str] = set()
|
|
684
825
|
for col in schema:
|
|
685
|
-
cls._verify_column(col
|
|
686
|
-
column_names.add(col.name)
|
|
687
|
-
|
|
688
|
-
def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
|
|
689
|
-
col = self._tbl_version_path.get_column(column_name, include_bases)
|
|
690
|
-
if col is None:
|
|
691
|
-
raise excs.Error(f'Column {column_name!r} unknown')
|
|
692
|
-
|
|
693
|
-
def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
|
|
694
|
-
exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
|
|
695
|
-
if not exists:
|
|
696
|
-
raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
|
|
826
|
+
cls._verify_column(col)
|
|
697
827
|
|
|
698
|
-
def drop_column(self, column:
|
|
828
|
+
def drop_column(self, column: str | ColumnRef, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
699
829
|
"""Drop a column from the table.
|
|
700
830
|
|
|
701
831
|
Args:
|
|
702
832
|
column: The name or reference of the column to drop.
|
|
833
|
+
if_not_exists: Directive for handling a non-existent column. Must be one of the following:
|
|
834
|
+
|
|
835
|
+
- `'error'`: raise an error if the column does not exist.
|
|
836
|
+
- `'ignore'`: do nothing if the column does not exist.
|
|
703
837
|
|
|
704
838
|
Raises:
|
|
705
|
-
Error: If the column does not exist
|
|
839
|
+
Error: If the column does not exist and `if_exists='error'`,
|
|
840
|
+
or if it is referenced by a dependent computed column.
|
|
706
841
|
|
|
707
842
|
Examples:
|
|
708
843
|
Drop the column `col` from the table `my_table` by column name:
|
|
@@ -714,42 +849,96 @@ class Table(SchemaObject):
|
|
|
714
849
|
|
|
715
850
|
>>> tbl = pxt.get_table('my_table')
|
|
716
851
|
... tbl.drop_column(tbl.col)
|
|
852
|
+
|
|
853
|
+
Drop the column `col` from the table `my_table` if it exists, otherwise do nothing:
|
|
854
|
+
|
|
855
|
+
>>> tbl = pxt.get_table('my_table')
|
|
856
|
+
... tbl.drop_col(tbl.col, if_not_exists='ignore')
|
|
717
857
|
"""
|
|
718
|
-
|
|
719
|
-
col: Column = None
|
|
720
|
-
if isinstance(column, str):
|
|
721
|
-
self.__check_column_name_exists(column)
|
|
722
|
-
col = self._tbl_version.cols_by_name[column]
|
|
723
|
-
else:
|
|
724
|
-
self.__check_column_ref_exists(column)
|
|
725
|
-
col = column.col
|
|
858
|
+
from pixeltable.catalog import Catalog
|
|
726
859
|
|
|
727
|
-
|
|
728
|
-
if len(dependent_user_cols) > 0:
|
|
729
|
-
raise excs.Error(
|
|
730
|
-
f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
|
|
731
|
-
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
732
|
-
)
|
|
860
|
+
cat = Catalog.get()
|
|
733
861
|
|
|
734
|
-
#
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
if
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
862
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
863
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
864
|
+
self.__check_mutable('drop columns from')
|
|
865
|
+
col: Column = None
|
|
866
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
867
|
+
|
|
868
|
+
if isinstance(column, str):
|
|
869
|
+
col = self._tbl_version_path.get_column(column)
|
|
870
|
+
if col is None:
|
|
871
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
872
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
873
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
874
|
+
return
|
|
875
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
876
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
877
|
+
col = self._tbl_version.get().cols_by_name[column]
|
|
878
|
+
else:
|
|
879
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
880
|
+
if not exists:
|
|
881
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
882
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
883
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
884
|
+
return
|
|
885
|
+
col = column.col
|
|
886
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
887
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
888
|
+
|
|
889
|
+
dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
|
|
890
|
+
if len(dependent_user_cols) > 0:
|
|
891
|
+
raise excs.Error(
|
|
892
|
+
f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
|
|
893
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
views = self._get_views(recursive=True, mutable_only=True)
|
|
897
|
+
|
|
898
|
+
# See if any view predicates depend on this column
|
|
899
|
+
dependent_views: list[tuple[Table, exprs.Expr]] = []
|
|
900
|
+
for view in views:
|
|
901
|
+
if view._tbl_version is not None:
|
|
902
|
+
predicate = view._tbl_version.get().predicate
|
|
903
|
+
if predicate is not None:
|
|
904
|
+
for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
|
|
905
|
+
if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
|
|
906
|
+
dependent_views.append((view, predicate))
|
|
907
|
+
|
|
908
|
+
if len(dependent_views) > 0:
|
|
909
|
+
dependent_views_str = '\n'.join(
|
|
910
|
+
f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
|
|
911
|
+
)
|
|
912
|
+
raise excs.Error(
|
|
913
|
+
f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# See if this column has a dependent store. We need to look through all stores in all
|
|
917
|
+
# (transitive) views of this table.
|
|
918
|
+
col_handle = col.handle
|
|
919
|
+
dependent_stores = [
|
|
920
|
+
(view, store)
|
|
921
|
+
for view in (self, *views)
|
|
922
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
923
|
+
if col_handle in store.get_local_columns()
|
|
746
924
|
]
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
925
|
+
if len(dependent_stores) > 0:
|
|
926
|
+
dependent_store_names = [
|
|
927
|
+
store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
|
|
928
|
+
for view, store in dependent_stores
|
|
929
|
+
]
|
|
930
|
+
raise excs.Error(
|
|
931
|
+
f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
|
|
932
|
+
f'{", ".join(dependent_store_names)}'
|
|
933
|
+
)
|
|
934
|
+
all_columns = self.columns()
|
|
935
|
+
if len(all_columns) == 1 and col.name == all_columns[0]:
|
|
936
|
+
raise excs.Error(
|
|
937
|
+
f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
|
|
938
|
+
f' Tables must have at least one column.'
|
|
939
|
+
)
|
|
751
940
|
|
|
752
|
-
|
|
941
|
+
self._tbl_version.get().drop_column(col)
|
|
753
942
|
|
|
754
943
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
755
944
|
"""Rename a column.
|
|
@@ -767,89 +956,164 @@ class Table(SchemaObject):
|
|
|
767
956
|
>>> tbl = pxt.get_table('my_table')
|
|
768
957
|
... tbl.rename_column('col1', 'col2')
|
|
769
958
|
"""
|
|
770
|
-
|
|
959
|
+
from pixeltable.catalog import Catalog
|
|
960
|
+
|
|
961
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
962
|
+
self._tbl_version.get().rename_column(old_name, new_name)
|
|
963
|
+
|
|
964
|
+
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
965
|
+
"""
|
|
966
|
+
Returns list of all the indexes on this table. Used for testing.
|
|
967
|
+
|
|
968
|
+
Returns:
|
|
969
|
+
A list of index information, each containing the index's
|
|
970
|
+
id, name, and the name of the column it indexes.
|
|
971
|
+
"""
|
|
972
|
+
index_info = []
|
|
973
|
+
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
974
|
+
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
975
|
+
return index_info
|
|
771
976
|
|
|
772
977
|
def add_embedding_index(
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
978
|
+
self,
|
|
979
|
+
column: str | ColumnRef,
|
|
980
|
+
*,
|
|
981
|
+
idx_name: str | None = None,
|
|
982
|
+
embedding: pxt.Function | None = None,
|
|
983
|
+
string_embed: pxt.Function | None = None,
|
|
984
|
+
image_embed: pxt.Function | None = None,
|
|
985
|
+
metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
|
|
986
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
776
987
|
) -> None:
|
|
777
988
|
"""
|
|
778
|
-
Add an embedding index to the table. Once the index is
|
|
989
|
+
Add an embedding index to the table. Once the index is created, it will be automatically kept up-to-date as new
|
|
779
990
|
rows are inserted into the table.
|
|
780
991
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
992
|
+
To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
|
|
993
|
+
Only `String` and `Image` columns are currently supported.
|
|
994
|
+
|
|
995
|
+
Examples:
|
|
996
|
+
Here's an example that uses a
|
|
997
|
+
[CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
|
|
998
|
+
|
|
999
|
+
>>> from pixeltable.functions.huggingface import clip
|
|
1000
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1001
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
1002
|
+
|
|
1003
|
+
Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
|
|
1004
|
+
|
|
1005
|
+
>>> reference_img = PIL.Image.open('my_image.jpg')
|
|
1006
|
+
>>> sim = tbl.img.similarity(image=reference_img)
|
|
1007
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
1008
|
+
|
|
1009
|
+
If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
|
|
1010
|
+
performed using any of its supported modalities. In our example, CLIP supports both text and images, so we
|
|
1011
|
+
can also search for images using a text description:
|
|
1012
|
+
|
|
1013
|
+
>>> sim = tbl.img.similarity(string='a picture of a train')
|
|
1014
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
1015
|
+
|
|
1016
|
+
Audio and video lookups would look like this:
|
|
1017
|
+
|
|
1018
|
+
>>> sim = tbl.img.similarity(audio='/path/to/audio.flac')
|
|
1019
|
+
>>> sim = tbl.img.similarity(video='/path/to/video.mp4')
|
|
785
1020
|
|
|
786
1021
|
Args:
|
|
787
|
-
column: The name of, or reference to, the column to
|
|
788
|
-
idx_name:
|
|
789
|
-
If specified, the name must be unique for this table.
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
1022
|
+
column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
|
|
1023
|
+
idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
|
|
1024
|
+
automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
|
|
1025
|
+
embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
|
|
1026
|
+
or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
|
|
1027
|
+
array of floats.
|
|
1028
|
+
string_embed: An optional UDF to use for the string embedding component of this index.
|
|
1029
|
+
Can be used in conjunction with `image_embed` to construct multimodal embeddings manually, by
|
|
1030
|
+
specifying different embedding functions for different data types.
|
|
1031
|
+
image_embed: An optional UDF to use for the image embedding component of this index.
|
|
1032
|
+
Can be used in conjunction with `string_embed` to construct multimodal embeddings manually, by
|
|
1033
|
+
specifying different embedding functions for different data types.
|
|
1034
|
+
metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`.
|
|
1035
|
+
The default is `'cosine'`.
|
|
1036
|
+
if_exists: Directive for handling an existing index with the same name. Must be one of the following:
|
|
1037
|
+
|
|
1038
|
+
- `'error'`: raise an error if an index with the same name already exists.
|
|
1039
|
+
- `'ignore'`: do nothing if an index with the same name already exists.
|
|
1040
|
+
- `'replace'` or `'replace_force'`: replace the existing index with the new one.
|
|
794
1041
|
|
|
795
1042
|
Raises:
|
|
796
|
-
Error: If an index with
|
|
1043
|
+
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
1044
|
+
the specified column does not exist.
|
|
797
1045
|
|
|
798
1046
|
Examples:
|
|
799
|
-
Add an index to the `img` column of the table `my_table
|
|
1047
|
+
Add an index to the `img` column of the table `my_table`:
|
|
800
1048
|
|
|
1049
|
+
>>> from pixeltable.functions.huggingface import clip
|
|
801
1050
|
>>> tbl = pxt.get_table('my_table')
|
|
802
|
-
|
|
1051
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1052
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
803
1053
|
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
1054
|
+
Alternatively, the `img` column may be specified by name:
|
|
1055
|
+
|
|
1056
|
+
>>> tbl.add_embedding_index('img', embedding=embedding_fn)
|
|
807
1057
|
|
|
808
|
-
Add
|
|
809
|
-
and with a specific name
|
|
1058
|
+
Add a second index to the `img` column, using the inner product as the distance metric,
|
|
1059
|
+
and with a specific name:
|
|
810
1060
|
|
|
811
1061
|
>>> tbl.add_embedding_index(
|
|
812
|
-
...
|
|
813
|
-
... idx_name='
|
|
814
|
-
...
|
|
815
|
-
... string_embed=my_string_func,
|
|
1062
|
+
... tbl.img,
|
|
1063
|
+
... idx_name='ip_idx',
|
|
1064
|
+
... embedding=embedding_fn,
|
|
816
1065
|
... metric='ip'
|
|
817
1066
|
... )
|
|
818
1067
|
|
|
819
|
-
|
|
1068
|
+
Add an index using separately specified string and image embeddings:
|
|
820
1069
|
|
|
821
1070
|
>>> tbl.add_embedding_index(
|
|
822
1071
|
... tbl.img,
|
|
823
|
-
...
|
|
824
|
-
... image_embed=
|
|
825
|
-
... string_embed=my_string_func,
|
|
826
|
-
... metric='ip'
|
|
1072
|
+
... string_embed=string_embedding_fn,
|
|
1073
|
+
... image_embed=image_embedding_fn
|
|
827
1074
|
... )
|
|
828
1075
|
"""
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
self.
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
1076
|
+
from pixeltable.catalog import Catalog
|
|
1077
|
+
|
|
1078
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1079
|
+
self.__check_mutable('add an index to')
|
|
1080
|
+
col = self._resolve_column_parameter(column)
|
|
1081
|
+
|
|
1082
|
+
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
1083
|
+
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
1084
|
+
# An index with the same name already exists.
|
|
1085
|
+
# Handle it according to if_exists.
|
|
1086
|
+
if if_exists_ == IfExistsParam.ERROR:
|
|
1087
|
+
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
1088
|
+
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
1089
|
+
raise excs.Error(
|
|
1090
|
+
f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
|
|
1091
|
+
)
|
|
1092
|
+
if if_exists_ == IfExistsParam.IGNORE:
|
|
1093
|
+
return
|
|
1094
|
+
assert if_exists_ in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
|
|
1095
|
+
self.drop_index(idx_name=idx_name)
|
|
1096
|
+
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
1097
|
+
from pixeltable.index import EmbeddingIndex
|
|
1098
|
+
|
|
1099
|
+
# idx_name must be a valid pixeltable column name
|
|
1100
|
+
if idx_name is not None:
|
|
1101
|
+
Table.validate_column_name(idx_name)
|
|
1102
|
+
|
|
1103
|
+
# validate EmbeddingIndex args
|
|
1104
|
+
idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
|
|
1105
|
+
_ = idx.create_value_expr(col)
|
|
1106
|
+
_ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
1107
|
+
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
1108
|
+
FileCache.get().emit_eviction_warnings()
|
|
848
1109
|
|
|
849
1110
|
def drop_embedding_index(
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
1111
|
+
self,
|
|
1112
|
+
*,
|
|
1113
|
+
column: str | ColumnRef | None = None,
|
|
1114
|
+
idx_name: str | None = None,
|
|
1115
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1116
|
+
) -> None:
|
|
853
1117
|
"""
|
|
854
1118
|
Drop an embedding index from the table. Either a column name or an index name (but not both) must be
|
|
855
1119
|
specified. If a column name or reference is specified, it must be a column containing exactly one
|
|
@@ -859,11 +1123,20 @@ class Table(SchemaObject):
|
|
|
859
1123
|
column: The name of, or reference to, the column from which to drop the index.
|
|
860
1124
|
The column must have only one embedding index.
|
|
861
1125
|
idx_name: The name of the index to drop.
|
|
1126
|
+
if_not_exists: Directive for handling a non-existent index. Must be one of the following:
|
|
1127
|
+
|
|
1128
|
+
- `'error'`: raise an error if the index does not exist.
|
|
1129
|
+
- `'ignore'`: do nothing if the index does not exist.
|
|
1130
|
+
|
|
1131
|
+
Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
|
|
1132
|
+
and it does not exist, or when `column` is specified and it has no index.
|
|
1133
|
+
`if_not_exists` does not apply to non-exisitng column.
|
|
862
1134
|
|
|
863
1135
|
Raises:
|
|
864
1136
|
Error: If `column` is specified, but the column does not exist, or it contains no embedding
|
|
865
|
-
indices or multiple embedding indices.
|
|
866
|
-
Error: If `idx_name` is specified, but the index
|
|
1137
|
+
indices and `if_not_exists='error'`, or the column has multiple embedding indices.
|
|
1138
|
+
Error: If `idx_name` is specified, but the index is not an embedding index, or
|
|
1139
|
+
the index does not exist and `if_not_exists='error'`.
|
|
867
1140
|
|
|
868
1141
|
Examples:
|
|
869
1142
|
Drop the embedding index on the `img` column of the table `my_table` by column name:
|
|
@@ -880,25 +1153,46 @@ class Table(SchemaObject):
|
|
|
880
1153
|
>>> tbl = pxt.get_table('my_table')
|
|
881
1154
|
... tbl.drop_embedding_index(idx_name='idx1')
|
|
882
1155
|
|
|
1156
|
+
Drop the embedding index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
|
|
1157
|
+
>>> tbl = pxt.get_table('my_table')
|
|
1158
|
+
... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
|
|
883
1159
|
"""
|
|
1160
|
+
from pixeltable.catalog import Catalog
|
|
1161
|
+
|
|
884
1162
|
if (column is None) == (idx_name is None):
|
|
885
1163
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
886
1164
|
|
|
1165
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1166
|
+
col: Column = None
|
|
1167
|
+
if idx_name is None:
|
|
1168
|
+
col = self._resolve_column_parameter(column)
|
|
1169
|
+
assert col is not None
|
|
1170
|
+
|
|
1171
|
+
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1172
|
+
|
|
1173
|
+
def _resolve_column_parameter(self, column: str | ColumnRef) -> Column:
|
|
1174
|
+
"""Resolve a column parameter to a Column object"""
|
|
887
1175
|
col: Column = None
|
|
888
|
-
if
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
1176
|
+
if isinstance(column, str):
|
|
1177
|
+
col = self._tbl_version_path.get_column(column)
|
|
1178
|
+
if col is None:
|
|
1179
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
1180
|
+
elif isinstance(column, ColumnRef):
|
|
1181
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
1182
|
+
if not exists:
|
|
1183
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1184
|
+
col = column.col
|
|
1185
|
+
else:
|
|
1186
|
+
raise excs.Error(f'Invalid column parameter type: {type(column)}')
|
|
1187
|
+
return col
|
|
897
1188
|
|
|
898
1189
|
def drop_index(
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1190
|
+
self,
|
|
1191
|
+
*,
|
|
1192
|
+
column: str | ColumnRef | None = None,
|
|
1193
|
+
idx_name: str | None = None,
|
|
1194
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1195
|
+
) -> None:
|
|
902
1196
|
"""
|
|
903
1197
|
Drop an index from the table. Either a column name or an index name (but not both) must be
|
|
904
1198
|
specified. If a column name or reference is specified, it must be a column containing exactly one index;
|
|
@@ -908,6 +1202,14 @@ class Table(SchemaObject):
|
|
|
908
1202
|
column: The name of, or reference to, the column from which to drop the index.
|
|
909
1203
|
The column must have only one embedding index.
|
|
910
1204
|
idx_name: The name of the index to drop.
|
|
1205
|
+
if_not_exists: Directive for handling a non-existent index. Must be one of the following:
|
|
1206
|
+
|
|
1207
|
+
- `'error'`: raise an error if the index does not exist.
|
|
1208
|
+
- `'ignore'`: do nothing if the index does not exist.
|
|
1209
|
+
|
|
1210
|
+
Note that `if_not_exists` parameter is only applicable when an `idx_name` is specified
|
|
1211
|
+
and it does not exist, or when `column` is specified and it has no index.
|
|
1212
|
+
`if_not_exists` does not apply to non-exisitng column.
|
|
911
1213
|
|
|
912
1214
|
Raises:
|
|
913
1215
|
Error: If `column` is specified, but the column does not exist, or it contains no
|
|
@@ -929,76 +1231,105 @@ class Table(SchemaObject):
|
|
|
929
1231
|
>>> tbl = pxt.get_table('my_table')
|
|
930
1232
|
... tbl.drop_index(idx_name='idx1')
|
|
931
1233
|
|
|
1234
|
+
Drop the index `idx1` of the table `my_table` by index name, if it exists, otherwise do nothing:
|
|
1235
|
+
>>> tbl = pxt.get_table('my_table')
|
|
1236
|
+
... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
|
|
1237
|
+
|
|
932
1238
|
"""
|
|
1239
|
+
from pixeltable.catalog import Catalog
|
|
1240
|
+
|
|
933
1241
|
if (column is None) == (idx_name is None):
|
|
934
1242
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
935
1243
|
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
if
|
|
939
|
-
self.
|
|
940
|
-
col
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
col = column.col
|
|
944
|
-
assert col is not None
|
|
945
|
-
self._drop_index(col=col, idx_name=idx_name)
|
|
1244
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1245
|
+
col: Column = None
|
|
1246
|
+
if idx_name is None:
|
|
1247
|
+
col = self._resolve_column_parameter(column)
|
|
1248
|
+
assert col is not None
|
|
1249
|
+
|
|
1250
|
+
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
946
1251
|
|
|
947
1252
|
def _drop_index(
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
1253
|
+
self,
|
|
1254
|
+
*,
|
|
1255
|
+
col: Column | None = None,
|
|
1256
|
+
idx_name: str | None = None,
|
|
1257
|
+
_idx_class: type[index.IndexBase] | None = None,
|
|
1258
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
951
1259
|
) -> None:
|
|
952
|
-
|
|
953
|
-
|
|
1260
|
+
from pixeltable.catalog import Catalog
|
|
1261
|
+
|
|
1262
|
+
self.__check_mutable('drop an index from')
|
|
954
1263
|
assert (col is None) != (idx_name is None)
|
|
955
1264
|
|
|
956
1265
|
if idx_name is not None:
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
1266
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1267
|
+
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1268
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1269
|
+
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1270
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1271
|
+
return
|
|
1272
|
+
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
960
1273
|
else:
|
|
961
|
-
if col.
|
|
1274
|
+
if col.get_tbl().id != self._tbl_version.id:
|
|
962
1275
|
raise excs.Error(
|
|
963
|
-
f'Column {col.name!r}:
|
|
964
|
-
|
|
1276
|
+
f'Column {col.name!r}: '
|
|
1277
|
+
f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
|
|
1278
|
+
)
|
|
1279
|
+
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
965
1280
|
if _idx_class is not None:
|
|
966
|
-
|
|
967
|
-
if len(
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
1281
|
+
idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
|
|
1282
|
+
if len(idx_info_list) == 0:
|
|
1283
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1284
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1285
|
+
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
1286
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1287
|
+
return
|
|
1288
|
+
if len(idx_info_list) > 1:
|
|
1289
|
+
raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
|
|
1290
|
+
idx_info = idx_info_list[0]
|
|
1291
|
+
|
|
1292
|
+
# Find out if anything depends on this index
|
|
1293
|
+
val_col = idx_info.val_col
|
|
1294
|
+
dependent_user_cols = [
|
|
1295
|
+
c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
|
|
1296
|
+
]
|
|
1297
|
+
if len(dependent_user_cols) > 0:
|
|
1298
|
+
raise excs.Error(
|
|
1299
|
+
f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
|
|
1300
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
1301
|
+
)
|
|
1302
|
+
self._tbl_version.get().drop_index(idx_info.id)
|
|
973
1303
|
|
|
974
1304
|
@overload
|
|
975
1305
|
def insert(
|
|
976
1306
|
self,
|
|
977
|
-
|
|
1307
|
+
source: TableDataSource,
|
|
978
1308
|
/,
|
|
979
1309
|
*,
|
|
1310
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
1311
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
1312
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
980
1313
|
print_stats: bool = False,
|
|
981
|
-
|
|
1314
|
+
**kwargs: Any,
|
|
982
1315
|
) -> UpdateStatus: ...
|
|
983
1316
|
|
|
984
1317
|
@overload
|
|
985
1318
|
def insert(
|
|
986
|
-
self,
|
|
987
|
-
*,
|
|
988
|
-
print_stats: bool = False,
|
|
989
|
-
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
990
|
-
**kwargs: Any
|
|
1319
|
+
self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
|
|
991
1320
|
) -> UpdateStatus: ...
|
|
992
1321
|
|
|
993
|
-
@abc.abstractmethod
|
|
1322
|
+
@abc.abstractmethod
|
|
994
1323
|
def insert(
|
|
995
1324
|
self,
|
|
996
|
-
|
|
1325
|
+
source: TableDataSource | None = None,
|
|
997
1326
|
/,
|
|
998
1327
|
*,
|
|
999
|
-
|
|
1328
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
1329
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
1000
1330
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1001
|
-
|
|
1331
|
+
print_stats: bool = False,
|
|
1332
|
+
**kwargs: Any,
|
|
1002
1333
|
) -> UpdateStatus:
|
|
1003
1334
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
1004
1335
|
|
|
@@ -1006,35 +1337,40 @@ class Table(SchemaObject):
|
|
|
1006
1337
|
|
|
1007
1338
|
```python
|
|
1008
1339
|
insert(
|
|
1009
|
-
|
|
1340
|
+
source: TableSourceDataType,
|
|
1010
1341
|
/,
|
|
1011
1342
|
*,
|
|
1343
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1012
1344
|
print_stats: bool = False,
|
|
1013
|
-
|
|
1014
|
-
)
|
|
1345
|
+
**kwargs: Any,
|
|
1346
|
+
)
|
|
1347
|
+
```
|
|
1015
1348
|
|
|
1016
1349
|
To insert just a single row, you can use the more concise syntax:
|
|
1017
1350
|
|
|
1018
1351
|
```python
|
|
1019
1352
|
insert(
|
|
1020
1353
|
*,
|
|
1021
|
-
print_stats: bool = False,
|
|
1022
1354
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1355
|
+
print_stats: bool = False,
|
|
1023
1356
|
**kwargs: Any
|
|
1024
|
-
)
|
|
1357
|
+
)
|
|
1358
|
+
```
|
|
1025
1359
|
|
|
1026
1360
|
Args:
|
|
1027
|
-
|
|
1028
|
-
names to values.
|
|
1361
|
+
source: A data source from which data can be imported.
|
|
1029
1362
|
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
1030
|
-
|
|
1363
|
+
(if inserting multiple rows) Additional keyword arguments are passed to the data source.
|
|
1364
|
+
source_format: A hint about the format of the source data
|
|
1365
|
+
schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
|
|
1031
1366
|
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
1032
1367
|
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
1033
1368
|
|
|
1034
1369
|
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
1035
1370
|
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
1036
|
-
|
|
1037
|
-
|
|
1371
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
1372
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
1373
|
+
print_stats: If `True`, print statistics about the cost of computed columns.
|
|
1038
1374
|
|
|
1039
1375
|
Returns:
|
|
1040
1376
|
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
@@ -1046,6 +1382,7 @@ class Table(SchemaObject):
|
|
|
1046
1382
|
- The table has been dropped.
|
|
1047
1383
|
- One of the rows being inserted does not conform to the table schema.
|
|
1048
1384
|
- An error occurs during processing of computed columns, and `on_error='ignore'`.
|
|
1385
|
+
- An error occurs while importing data from a source, and `on_error='abort'`.
|
|
1049
1386
|
|
|
1050
1387
|
Examples:
|
|
1051
1388
|
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
@@ -1057,11 +1394,24 @@ class Table(SchemaObject):
|
|
|
1057
1394
|
Insert a single row using the alternative syntax:
|
|
1058
1395
|
|
|
1059
1396
|
>>> tbl.insert(a=3, b=3, c=3)
|
|
1397
|
+
|
|
1398
|
+
Insert rows from a CSV file:
|
|
1399
|
+
|
|
1400
|
+
>>> tbl.insert(source='path/to/file.csv')
|
|
1401
|
+
|
|
1402
|
+
Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
|
|
1403
|
+
|
|
1404
|
+
>>> class MyModel(pydantic.BaseModel):
|
|
1405
|
+
... a: int
|
|
1406
|
+
... b: int
|
|
1407
|
+
...
|
|
1408
|
+
... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
|
|
1409
|
+
... tbl.insert(models)
|
|
1060
1410
|
"""
|
|
1061
1411
|
raise NotImplementedError
|
|
1062
1412
|
|
|
1063
1413
|
def update(
|
|
1064
|
-
|
|
1414
|
+
self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
|
|
1065
1415
|
) -> UpdateStatus:
|
|
1066
1416
|
"""Update rows in this table.
|
|
1067
1417
|
|
|
@@ -1070,6 +1420,9 @@ class Table(SchemaObject):
|
|
|
1070
1420
|
where: a predicate to filter rows to update.
|
|
1071
1421
|
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
1072
1422
|
|
|
1423
|
+
Returns:
|
|
1424
|
+
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
1425
|
+
|
|
1073
1426
|
Examples:
|
|
1074
1427
|
Set column `int_col` to 1 for all rows:
|
|
1075
1428
|
|
|
@@ -1087,13 +1440,19 @@ class Table(SchemaObject):
|
|
|
1087
1440
|
|
|
1088
1441
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1089
1442
|
"""
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1443
|
+
from pixeltable.catalog import Catalog
|
|
1444
|
+
|
|
1445
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1446
|
+
self.__check_mutable('update')
|
|
1447
|
+
result = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1448
|
+
FileCache.get().emit_eviction_warnings()
|
|
1449
|
+
return result
|
|
1093
1450
|
|
|
1094
1451
|
def batch_update(
|
|
1095
|
-
|
|
1096
|
-
|
|
1452
|
+
self,
|
|
1453
|
+
rows: Iterable[dict[str, Any]],
|
|
1454
|
+
cascade: bool = True,
|
|
1455
|
+
if_not_exists: Literal['error', 'ignore', 'insert'] = 'error',
|
|
1097
1456
|
) -> UpdateStatus:
|
|
1098
1457
|
"""Update rows in this table.
|
|
1099
1458
|
|
|
@@ -1111,47 +1470,137 @@ class Table(SchemaObject):
|
|
|
1111
1470
|
Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
|
|
1112
1471
|
If either row does not exist, this raises an error:
|
|
1113
1472
|
|
|
1114
|
-
>>> tbl.
|
|
1473
|
+
>>> tbl.batch_update(
|
|
1474
|
+
... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}]
|
|
1475
|
+
... )
|
|
1115
1476
|
|
|
1116
1477
|
Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
|
|
1117
1478
|
the row with new `id` 3 (assuming this key does not exist):
|
|
1118
1479
|
|
|
1119
|
-
>>> tbl.
|
|
1120
|
-
|
|
1121
|
-
|
|
1480
|
+
>>> tbl.batch_update(
|
|
1481
|
+
... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
|
|
1482
|
+
... if_not_exists='insert'
|
|
1483
|
+
... )
|
|
1122
1484
|
"""
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1485
|
+
from pixeltable.catalog import Catalog
|
|
1486
|
+
|
|
1487
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1488
|
+
self.__check_mutable('update')
|
|
1489
|
+
rows = list(rows)
|
|
1490
|
+
|
|
1491
|
+
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1492
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1493
|
+
|
|
1494
|
+
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1495
|
+
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
1496
|
+
rowids: list[tuple[int, ...]] = []
|
|
1497
|
+
if len(pk_col_names) == 0 and not has_rowid:
|
|
1498
|
+
raise excs.Error('Table must have primary key for batch update')
|
|
1499
|
+
|
|
1500
|
+
for row_spec in rows:
|
|
1501
|
+
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1502
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1503
|
+
)
|
|
1504
|
+
if has_rowid:
|
|
1505
|
+
# we expect the _rowid column to be present for each row
|
|
1506
|
+
assert _ROWID_COLUMN_NAME in row_spec
|
|
1507
|
+
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1508
|
+
else:
|
|
1509
|
+
col_names = {col.name for col in col_vals}
|
|
1510
|
+
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1511
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1512
|
+
raise excs.Error(
|
|
1513
|
+
f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
|
|
1514
|
+
)
|
|
1515
|
+
row_updates.append(col_vals)
|
|
1516
|
+
|
|
1517
|
+
result = self._tbl_version.get().batch_update(
|
|
1518
|
+
row_updates,
|
|
1519
|
+
rowids,
|
|
1520
|
+
error_if_not_exists=if_not_exists == 'error',
|
|
1521
|
+
insert_if_not_exists=if_not_exists == 'insert',
|
|
1522
|
+
cascade=cascade,
|
|
1523
|
+
)
|
|
1524
|
+
FileCache.get().emit_eviction_warnings()
|
|
1525
|
+
return result
|
|
1526
|
+
|
|
1527
|
+
def recompute_columns(
|
|
1528
|
+
self,
|
|
1529
|
+
*columns: str | ColumnRef,
|
|
1530
|
+
where: 'exprs.Expr' | None = None,
|
|
1531
|
+
errors_only: bool = False,
|
|
1532
|
+
cascade: bool = True,
|
|
1533
|
+
) -> UpdateStatus:
|
|
1534
|
+
"""Recompute the values in one or more computed columns of this table.
|
|
1535
|
+
|
|
1536
|
+
Args:
|
|
1537
|
+
columns: The names or references of the computed columns to recompute.
|
|
1538
|
+
where: A predicate to filter rows to recompute.
|
|
1539
|
+
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1540
|
+
`errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
|
|
1541
|
+
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
1542
|
+
|
|
1543
|
+
Examples:
|
|
1544
|
+
Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
|
|
1545
|
+
depends on them:
|
|
1546
|
+
|
|
1547
|
+
>>> tbl.recompute_columns('c1', 'c2')
|
|
1548
|
+
|
|
1549
|
+
Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
|
|
1550
|
+
it:
|
|
1551
|
+
|
|
1552
|
+
>>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
|
|
1553
|
+
|
|
1554
|
+
Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
|
|
1555
|
+
|
|
1556
|
+
>>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
|
|
1557
|
+
|
|
1558
|
+
Recompute column `c1` and its dependents, but only for rows that have errors in it:
|
|
1559
|
+
|
|
1560
|
+
>>> tbl.recompute_columns('c1', errors_only=True)
|
|
1561
|
+
"""
|
|
1562
|
+
from pixeltable.catalog import Catalog
|
|
1563
|
+
|
|
1564
|
+
cat = Catalog.get()
|
|
1565
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
1566
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1567
|
+
self.__check_mutable('recompute columns of')
|
|
1568
|
+
if len(columns) == 0:
|
|
1569
|
+
raise excs.Error('At least one column must be specified to recompute')
|
|
1570
|
+
if errors_only and len(columns) > 1:
|
|
1571
|
+
raise excs.Error('Cannot use errors_only=True with multiple columns')
|
|
1572
|
+
|
|
1573
|
+
col_names: list[str] = []
|
|
1574
|
+
for column in columns:
|
|
1575
|
+
col_name: str
|
|
1576
|
+
col: Column
|
|
1577
|
+
if isinstance(column, str):
|
|
1578
|
+
col = self._tbl_version_path.get_column(column)
|
|
1579
|
+
if col is None:
|
|
1580
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
1581
|
+
col_name = column
|
|
1582
|
+
else:
|
|
1583
|
+
assert isinstance(column, ColumnRef)
|
|
1584
|
+
col = column.col
|
|
1585
|
+
if not self._tbl_version_path.has_column(col):
|
|
1586
|
+
raise excs.Error(f'Unknown column: {col.name}')
|
|
1587
|
+
col_name = col.name
|
|
1588
|
+
if not col.is_computed:
|
|
1589
|
+
raise excs.Error(f'Column {col_name!r} is not a computed column')
|
|
1590
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
1591
|
+
raise excs.Error(f'Cannot recompute column of a base: {col_name}')
|
|
1592
|
+
col_names.append(col_name)
|
|
1593
|
+
|
|
1594
|
+
if where is not None and not where.is_bound_by([self._tbl_version_path]):
|
|
1595
|
+
raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
|
|
1596
|
+
|
|
1597
|
+
result = self._tbl_version.get().recompute_columns(
|
|
1598
|
+
col_names, where=where, errors_only=errors_only, cascade=cascade
|
|
1599
|
+
)
|
|
1600
|
+
FileCache.get().emit_eviction_warnings()
|
|
1601
|
+
return result
|
|
1602
|
+
|
|
1603
|
+
def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
|
|
1155
1604
|
"""Delete rows in this table.
|
|
1156
1605
|
|
|
1157
1606
|
Args:
|
|
@@ -1174,69 +1623,96 @@ class Table(SchemaObject):
|
|
|
1174
1623
|
.. warning::
|
|
1175
1624
|
This operation is irreversible.
|
|
1176
1625
|
"""
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1626
|
+
with catalog.Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1627
|
+
self.__check_mutable('revert')
|
|
1628
|
+
self._tbl_version.get().revert()
|
|
1629
|
+
# remove cached md in order to force a reload on the next operation
|
|
1630
|
+
self._tbl_version_path.clear_cached_md()
|
|
1180
1631
|
|
|
1181
|
-
|
|
1182
|
-
|
|
1632
|
+
def push(self) -> None:
|
|
1633
|
+
from pixeltable.share import push_replica
|
|
1634
|
+
from pixeltable.share.protocol import PxtUri
|
|
1183
1635
|
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1636
|
+
pxt_uri = self._get_pxt_uri()
|
|
1637
|
+
tbl_version = self._tbl_version_path.tbl_version.get()
|
|
1638
|
+
|
|
1639
|
+
if tbl_version.is_replica:
|
|
1640
|
+
raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
|
|
1641
|
+
|
|
1642
|
+
if pxt_uri is None:
|
|
1643
|
+
raise excs.Error(
|
|
1644
|
+
f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
|
|
1645
|
+
'To publish it, use `pxt.publish()` instead.'
|
|
1646
|
+
)
|
|
1647
|
+
|
|
1648
|
+
if isinstance(self, catalog.View) and self._is_anonymous_snapshot():
|
|
1649
|
+
raise excs.Error(
|
|
1650
|
+
f'push(): Cannot push specific-version table handle {tbl_version.versioned_name!r}. '
|
|
1651
|
+
'To push the latest version instead:\n'
|
|
1652
|
+
f' t = pxt.get_table({self._name!r})\n'
|
|
1653
|
+
f' t.push()'
|
|
1654
|
+
)
|
|
1655
|
+
|
|
1656
|
+
if self._tbl_version is None:
|
|
1657
|
+
# Named snapshots never have new versions to push.
|
|
1658
|
+
env.Env.get().console_logger.info('push(): Everything up to date.')
|
|
1659
|
+
return
|
|
1660
|
+
|
|
1661
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
|
|
1662
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1663
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
|
|
1664
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1665
|
+
|
|
1666
|
+
push_replica(uuid_uri, self)
|
|
1667
|
+
|
|
1668
|
+
def pull(self) -> None:
|
|
1669
|
+
from pixeltable.share import pull_replica
|
|
1670
|
+
from pixeltable.share.protocol import PxtUri
|
|
1671
|
+
|
|
1672
|
+
pxt_uri = self._get_pxt_uri()
|
|
1673
|
+
tbl_version = self._tbl_version_path.tbl_version.get()
|
|
1674
|
+
|
|
1675
|
+
if not tbl_version.is_replica or pxt_uri is None:
|
|
1676
|
+
raise excs.Error(
|
|
1677
|
+
f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
if isinstance(self, catalog.View) and self._is_anonymous_snapshot():
|
|
1681
|
+
raise excs.Error(
|
|
1682
|
+
f'pull(): Cannot pull specific-version table handle {tbl_version.versioned_name!r}. '
|
|
1683
|
+
'To pull the latest version instead:\n'
|
|
1684
|
+
f' t = pxt.get_table({self._name!r})\n'
|
|
1685
|
+
f' t.pull()'
|
|
1686
|
+
)
|
|
1687
|
+
|
|
1688
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
|
|
1689
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1690
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
|
|
1691
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1692
|
+
|
|
1693
|
+
pull_replica(self._path(), uuid_uri)
|
|
1217
1694
|
|
|
1218
|
-
@property
|
|
1219
1695
|
def external_stores(self) -> list[str]:
|
|
1220
|
-
return list(self._tbl_version.external_stores.keys())
|
|
1696
|
+
return list(self._tbl_version.get().external_stores.keys())
|
|
1221
1697
|
|
|
1222
1698
|
def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
|
|
1223
1699
|
"""
|
|
1224
1700
|
Links the specified `ExternalStore` to this table.
|
|
1225
1701
|
"""
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1702
|
+
from pixeltable.catalog import Catalog
|
|
1703
|
+
|
|
1704
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1705
|
+
self.__check_mutable('link an external store to')
|
|
1706
|
+
if store.name in self.external_stores():
|
|
1707
|
+
raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
|
|
1708
|
+
_logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
|
|
1709
|
+
|
|
1710
|
+
store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
|
|
1711
|
+
self._tbl_version.get().link_external_store(store)
|
|
1712
|
+
env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
|
|
1233
1713
|
|
|
1234
1714
|
def unlink_external_stores(
|
|
1235
|
-
|
|
1236
|
-
stores: Optional[str | list[str]] = None,
|
|
1237
|
-
*,
|
|
1238
|
-
delete_external_data: bool = False,
|
|
1239
|
-
ignore_errors: bool = False
|
|
1715
|
+
self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
|
|
1240
1716
|
) -> None:
|
|
1241
1717
|
"""
|
|
1242
1718
|
Unlinks this table's external stores.
|
|
@@ -1249,31 +1725,37 @@ class Table(SchemaObject):
|
|
|
1249
1725
|
delete_external_data (bool): If `True`, then the external data store will also be deleted. WARNING: This
|
|
1250
1726
|
is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
|
|
1251
1727
|
"""
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1728
|
+
from pixeltable.catalog import Catalog
|
|
1729
|
+
|
|
1730
|
+
if not self._tbl_version_path.is_mutable():
|
|
1731
|
+
return
|
|
1732
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1733
|
+
all_stores = self.external_stores()
|
|
1734
|
+
|
|
1735
|
+
if stores is None:
|
|
1736
|
+
stores = all_stores
|
|
1737
|
+
elif isinstance(stores, str):
|
|
1738
|
+
stores = [stores]
|
|
1739
|
+
|
|
1740
|
+
# Validation
|
|
1741
|
+
if not ignore_errors:
|
|
1742
|
+
for store_name in stores:
|
|
1743
|
+
if store_name not in all_stores:
|
|
1744
|
+
raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
|
|
1745
|
+
|
|
1746
|
+
for store_name in stores:
|
|
1747
|
+
store = self._tbl_version.get().external_stores[store_name]
|
|
1748
|
+
# get hold of the store's debug string before deleting it
|
|
1749
|
+
store_str = str(store)
|
|
1750
|
+
store.unlink(self._tbl_version.get()) # might call tbl_version.drop_columns()
|
|
1751
|
+
self._tbl_version.get().unlink_external_store(store)
|
|
1752
|
+
if delete_external_data and isinstance(store, pxt.io.external_store.Project):
|
|
1753
|
+
store.delete()
|
|
1754
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
|
|
1269
1755
|
|
|
1270
1756
|
def sync(
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
*,
|
|
1274
|
-
export_data: bool = True,
|
|
1275
|
-
import_data: bool = True
|
|
1276
|
-
) -> 'pxt.io.SyncStatus':
|
|
1757
|
+
self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
|
|
1758
|
+
) -> UpdateStatus:
|
|
1277
1759
|
"""
|
|
1278
1760
|
Synchronizes this table with its linked external stores.
|
|
1279
1761
|
|
|
@@ -1283,28 +1765,139 @@ class Table(SchemaObject):
|
|
|
1283
1765
|
export_data: If `True`, data from this table will be exported to the external stores during synchronization.
|
|
1284
1766
|
import_data: If `True`, data from the external stores will be imported to this table during synchronization.
|
|
1285
1767
|
"""
|
|
1286
|
-
|
|
1287
|
-
all_stores = self.external_stores
|
|
1768
|
+
from pixeltable.catalog import Catalog
|
|
1288
1769
|
|
|
1289
|
-
if
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1770
|
+
if not self._tbl_version_path.is_mutable():
|
|
1771
|
+
return UpdateStatus()
|
|
1772
|
+
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1773
|
+
# have their updates propagated down the tree
|
|
1774
|
+
base_tv = self._tbl_version_path.get_tbl_versions()[-1]
|
|
1775
|
+
with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
|
|
1776
|
+
all_stores = self.external_stores()
|
|
1293
1777
|
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1778
|
+
if stores is None:
|
|
1779
|
+
stores = all_stores
|
|
1780
|
+
elif isinstance(stores, str):
|
|
1781
|
+
stores = [stores]
|
|
1297
1782
|
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
sync_status =
|
|
1783
|
+
for store in stores:
|
|
1784
|
+
if store not in all_stores:
|
|
1785
|
+
raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
|
|
1786
|
+
|
|
1787
|
+
sync_status = UpdateStatus()
|
|
1788
|
+
for store in stores:
|
|
1789
|
+
store_obj = self._tbl_version.get().external_stores[store]
|
|
1790
|
+
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
1791
|
+
sync_status += store_sync_status
|
|
1303
1792
|
|
|
1304
1793
|
return sync_status
|
|
1305
1794
|
|
|
1306
1795
|
def __dir__(self) -> list[str]:
|
|
1307
|
-
return list(super().__dir__()) + list(self.
|
|
1796
|
+
return list(super().__dir__()) + list(self._get_schema().keys())
|
|
1308
1797
|
|
|
1309
1798
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1310
|
-
return list(self.
|
|
1799
|
+
return list(self._get_schema().keys())
|
|
1800
|
+
|
|
1801
|
+
def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
|
|
1802
|
+
"""
|
|
1803
|
+
Returns information about versions of this table, most recent first.
|
|
1804
|
+
|
|
1805
|
+
`get_versions()` is intended for programmatic access to version metadata; for human-readable
|
|
1806
|
+
output, use [`history()`][pixeltable.Table.history] instead.
|
|
1807
|
+
|
|
1808
|
+
Args:
|
|
1809
|
+
n: if specified, will return at most `n` versions
|
|
1810
|
+
|
|
1811
|
+
Returns:
|
|
1812
|
+
A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
|
|
1813
|
+
recent first.
|
|
1814
|
+
|
|
1815
|
+
Examples:
|
|
1816
|
+
Retrieve metadata about all versions of the table `tbl`:
|
|
1817
|
+
|
|
1818
|
+
>>> tbl.get_versions()
|
|
1819
|
+
|
|
1820
|
+
Retrieve metadata about the most recent 5 versions of the table `tbl`:
|
|
1821
|
+
|
|
1822
|
+
>>> tbl.get_versions(n=5)
|
|
1823
|
+
"""
|
|
1824
|
+
from pixeltable.catalog import Catalog
|
|
1825
|
+
|
|
1826
|
+
if n is None:
|
|
1827
|
+
n = 1_000_000_000
|
|
1828
|
+
if not isinstance(n, int) or n < 1:
|
|
1829
|
+
raise excs.Error(f'Invalid value for `n`: {n}')
|
|
1830
|
+
|
|
1831
|
+
# Retrieve the table history components from the catalog
|
|
1832
|
+
tbl_id = self._id
|
|
1833
|
+
# Collect an extra version, if available, to allow for computation of the first version's schema change
|
|
1834
|
+
vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
|
|
1835
|
+
|
|
1836
|
+
# Construct the metadata change description dictionary
|
|
1837
|
+
md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
|
|
1838
|
+
md_dict = MetadataUtils._create_md_change_dict(md_list)
|
|
1839
|
+
|
|
1840
|
+
# Construct report lines
|
|
1841
|
+
if len(vers_list) > n:
|
|
1842
|
+
assert len(vers_list) == n + 1
|
|
1843
|
+
over_count = 1
|
|
1844
|
+
else:
|
|
1845
|
+
over_count = 0
|
|
1846
|
+
|
|
1847
|
+
metadata_dicts: list[VersionMetadata] = []
|
|
1848
|
+
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1849
|
+
version = vers_md.version_md.version
|
|
1850
|
+
schema_change = md_dict.get(version, None)
|
|
1851
|
+
update_status = vers_md.version_md.update_status
|
|
1852
|
+
if update_status is None:
|
|
1853
|
+
update_status = UpdateStatus()
|
|
1854
|
+
change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
|
|
1855
|
+
rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
|
|
1856
|
+
metadata_dicts.append(
|
|
1857
|
+
VersionMetadata(
|
|
1858
|
+
version=version,
|
|
1859
|
+
created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
|
|
1860
|
+
user=vers_md.version_md.user,
|
|
1861
|
+
change_type=change_type,
|
|
1862
|
+
inserts=rcs.ins_rows,
|
|
1863
|
+
updates=rcs.upd_rows,
|
|
1864
|
+
deletes=rcs.del_rows,
|
|
1865
|
+
errors=rcs.num_excs,
|
|
1866
|
+
computed=rcs.computed_values,
|
|
1867
|
+
schema_change=schema_change,
|
|
1868
|
+
)
|
|
1869
|
+
)
|
|
1870
|
+
|
|
1871
|
+
return metadata_dicts
|
|
1872
|
+
|
|
1873
|
+
def history(self, n: int | None = None) -> pd.DataFrame:
|
|
1874
|
+
"""
|
|
1875
|
+
Returns a human-readable report about versions of this table.
|
|
1876
|
+
|
|
1877
|
+
`history()` is intended for human-readable output of version metadata; for programmatic access,
|
|
1878
|
+
use [`get_versions()`][pixeltable.Table.get_versions] instead.
|
|
1879
|
+
|
|
1880
|
+
Args:
|
|
1881
|
+
n: if specified, will return at most `n` versions
|
|
1882
|
+
|
|
1883
|
+
Returns:
|
|
1884
|
+
A report with information about each version, one per row, most recent first.
|
|
1885
|
+
|
|
1886
|
+
Examples:
|
|
1887
|
+
Report all versions of the table:
|
|
1888
|
+
|
|
1889
|
+
>>> tbl.history()
|
|
1890
|
+
|
|
1891
|
+
Report only the most recent 5 changes to the table:
|
|
1892
|
+
|
|
1893
|
+
>>> tbl.history(n=5)
|
|
1894
|
+
"""
|
|
1895
|
+
versions = self.get_versions(n)
|
|
1896
|
+
assert len(versions) > 0
|
|
1897
|
+
return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
|
|
1898
|
+
|
|
1899
|
+
def __check_mutable(self, op_descr: str) -> None:
|
|
1900
|
+
if self._tbl_version_path.is_replica():
|
|
1901
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a replica.')
|
|
1902
|
+
if self._tbl_version_path.is_snapshot():
|
|
1903
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|