pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,39 +1,105 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
import dataclasses
|
|
4
5
|
import importlib
|
|
6
|
+
import itertools
|
|
5
7
|
import logging
|
|
6
8
|
import time
|
|
7
9
|
import uuid
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
|
|
9
11
|
from uuid import UUID
|
|
10
12
|
|
|
11
13
|
import jsonschema.exceptions
|
|
12
14
|
import sqlalchemy as sql
|
|
15
|
+
from sqlalchemy import exc as sql_exc
|
|
13
16
|
|
|
14
|
-
import pixeltable as pxt
|
|
15
17
|
import pixeltable.exceptions as excs
|
|
18
|
+
import pixeltable.exprs as exprs
|
|
19
|
+
import pixeltable.index as index
|
|
16
20
|
import pixeltable.type_system as ts
|
|
17
|
-
from pixeltable import exprs, index
|
|
18
21
|
from pixeltable.env import Env
|
|
19
22
|
from pixeltable.iterators import ComponentIterator
|
|
20
23
|
from pixeltable.metadata import schema
|
|
21
|
-
from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
22
24
|
from pixeltable.utils.filecache import FileCache
|
|
23
|
-
from pixeltable.utils.
|
|
25
|
+
from pixeltable.utils.object_stores import ObjectOps
|
|
24
26
|
|
|
25
27
|
from ..func.globals import resolve_symbol
|
|
26
28
|
from .column import Column
|
|
27
|
-
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation,
|
|
29
|
+
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
|
|
30
|
+
from .tbl_ops import DeleteTableMdOp, DeleteTableMediaFilesOp, DropStoreTableOp, TableOp
|
|
31
|
+
from .update_status import RowCountStats, UpdateStatus
|
|
28
32
|
|
|
29
33
|
if TYPE_CHECKING:
|
|
30
34
|
from pixeltable import exec, store
|
|
35
|
+
from pixeltable._query import Query
|
|
36
|
+
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
37
|
+
from pixeltable.io import ExternalStore
|
|
38
|
+
from pixeltable.plan import SampleClause
|
|
31
39
|
|
|
32
|
-
from .
|
|
40
|
+
from .table_version_path import TableVersionPath
|
|
33
41
|
|
|
34
42
|
_logger = logging.getLogger('pixeltable')
|
|
35
43
|
|
|
36
44
|
|
|
45
|
+
@dataclasses.dataclass(frozen=True)
|
|
46
|
+
class TableVersionMd:
|
|
47
|
+
"""
|
|
48
|
+
Complete set of md records for a specific TableVersion instance.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
tbl_md: schema.TableMd
|
|
52
|
+
version_md: schema.VersionMd
|
|
53
|
+
schema_version_md: schema.SchemaVersionMd
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def is_pure_snapshot(self) -> bool:
|
|
57
|
+
return (
|
|
58
|
+
self.tbl_md.view_md is not None
|
|
59
|
+
and self.tbl_md.view_md.is_snapshot
|
|
60
|
+
and self.tbl_md.view_md.predicate is None
|
|
61
|
+
and self.tbl_md.view_md.sample_clause is None
|
|
62
|
+
and len(self.schema_version_md.columns) == 0
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def as_dict(self) -> dict:
|
|
66
|
+
from .catalog import md_dict_factory
|
|
67
|
+
|
|
68
|
+
return dataclasses.asdict(self, dict_factory=md_dict_factory)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_dict(cls, data: dict[str, Any]) -> TableVersionMd:
|
|
72
|
+
return schema.md_from_dict(cls, data)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclasses.dataclass(frozen=True, slots=True)
|
|
76
|
+
class TableVersionKey:
|
|
77
|
+
tbl_id: UUID
|
|
78
|
+
effective_version: int | None
|
|
79
|
+
anchor_tbl_id: UUID | None
|
|
80
|
+
|
|
81
|
+
def __post_init__(self) -> None:
|
|
82
|
+
assert self.effective_version is None or self.anchor_tbl_id is None
|
|
83
|
+
|
|
84
|
+
# Allow unpacking as a tuple
|
|
85
|
+
def __iter__(self) -> Iterator[Any]:
|
|
86
|
+
return iter((self.tbl_id, self.effective_version, self.anchor_tbl_id))
|
|
87
|
+
|
|
88
|
+
def as_dict(self) -> dict:
|
|
89
|
+
return {
|
|
90
|
+
'id': str(self.tbl_id),
|
|
91
|
+
'effective_version': self.effective_version,
|
|
92
|
+
'anchor_tbl_id': str(self.anchor_tbl_id) if self.anchor_tbl_id is not None else None,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_dict(cls, d: dict) -> TableVersionKey:
|
|
97
|
+
tbl_id = UUID(d['id'])
|
|
98
|
+
effective_version = d['effective_version']
|
|
99
|
+
anchor_tbl_id = d.get('anchor_tbl_id')
|
|
100
|
+
return cls(tbl_id, effective_version, UUID(anchor_tbl_id) if anchor_tbl_id is not None else None)
|
|
101
|
+
|
|
102
|
+
|
|
37
103
|
class TableVersion:
|
|
38
104
|
"""
|
|
39
105
|
TableVersion represents a particular version of a table/view along with its physical representation:
|
|
@@ -50,43 +116,68 @@ class TableVersion:
|
|
|
50
116
|
|
|
51
117
|
Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
|
|
52
118
|
Use a TableVersionHandle instead.
|
|
119
|
+
|
|
120
|
+
Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
|
|
121
|
+
two classes.
|
|
122
|
+
|
|
123
|
+
TableVersions come in three "flavors" depending on the `effective_version` and `anchor_tbl_id` settings:
|
|
124
|
+
- if both are None, it's a live table that tracks `tbl_md.current_version`
|
|
125
|
+
- if `effective_version` is defined, it's a snapshot of the specific version given by `effective_version`
|
|
126
|
+
- if `anchor_tbl_id` is defined, it's a replica table that is "anchored" to the given table, in the following
|
|
127
|
+
sense: if n is the latest non-fragment version of `anchor_tbl_id`, then the tracked version is m, where m
|
|
128
|
+
is the latest version of `tbl_id` (possibly a fragment) with created_at(m) <= created_at(n).
|
|
129
|
+
In the typical case, `anchor_tbl_id` is a descendant of `tbl_id` and the anchored TableVersion instance
|
|
130
|
+
appears along the TableVersionPath for `anchor_tbl_id`.
|
|
131
|
+
In the TableVersionPath for a replica, all path elements will have the same anchor_tbl_id, the tbl_id
|
|
132
|
+
of the primary (leaf) table. (It is also possible for one or more path elements at the base to be snapshots.)
|
|
133
|
+
At most one of `effective_version` and `anchor_tbl_id` can be specified.
|
|
53
134
|
"""
|
|
54
135
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
next_idx_id: int
|
|
70
|
-
next_rowid: int
|
|
71
|
-
predicate: Optional[exprs.Expr]
|
|
72
|
-
mutable_views: list[TableVersionHandle] # target for data operation propagation (only set for live tables)
|
|
73
|
-
iterator_cls: Optional[type[ComponentIterator]]
|
|
74
|
-
iterator_args: Optional[exprs.InlineDict]
|
|
136
|
+
key: TableVersionKey
|
|
137
|
+
|
|
138
|
+
# record metadata stored in catalog
|
|
139
|
+
_tbl_md: schema.TableMd
|
|
140
|
+
_version_md: schema.VersionMd
|
|
141
|
+
_schema_version_md: schema.SchemaVersionMd
|
|
142
|
+
|
|
143
|
+
path: 'TableVersionPath' | None # only set for non-snapshots; needed to resolve computed cols
|
|
144
|
+
base: TableVersionHandle | None # only set for views
|
|
145
|
+
predicate: exprs.Expr | None
|
|
146
|
+
sample_clause: 'SampleClause' | None
|
|
147
|
+
|
|
148
|
+
iterator_cls: type[ComponentIterator] | None
|
|
149
|
+
iterator_args: exprs.InlineDict | None
|
|
75
150
|
num_iterator_cols: int
|
|
76
151
|
|
|
152
|
+
# target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
|
|
153
|
+
mutable_views: frozenset[TableVersionHandle]
|
|
154
|
+
|
|
77
155
|
# contains complete history of columns, incl dropped ones
|
|
78
156
|
cols: list[Column]
|
|
79
157
|
# contains only user-facing (named) columns visible in this version
|
|
80
158
|
cols_by_name: dict[str, Column]
|
|
81
159
|
# contains only columns visible in this version, both system and user
|
|
82
160
|
cols_by_id: dict[int, Column]
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#
|
|
161
|
+
|
|
162
|
+
# True if this TableVersion instance can have indices:
|
|
163
|
+
# - live version of a mutable table
|
|
164
|
+
# - the most recent version of a replica
|
|
165
|
+
supports_idxs: bool
|
|
166
|
+
|
|
167
|
+
# only populated with indices visible in this TableVersion instance
|
|
168
|
+
idxs: dict[int, TableVersion.IndexInfo] # key: index id
|
|
86
169
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
170
|
+
idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
|
|
171
|
+
|
|
172
|
+
external_stores: dict[str, ExternalStore]
|
|
173
|
+
store_tbl: 'store.StoreBase' | None
|
|
174
|
+
|
|
175
|
+
is_initialized: bool # True if init() has been called
|
|
87
176
|
|
|
88
|
-
|
|
89
|
-
|
|
177
|
+
# used by Catalog to invalidate cached instances at the end of a transaction;
|
|
178
|
+
# True if this instance reflects the state of stored metadata in the context of this transaction and
|
|
179
|
+
# it is the instance cached in Catalog
|
|
180
|
+
is_validated: bool
|
|
90
181
|
|
|
91
182
|
@dataclasses.dataclass
|
|
92
183
|
class IndexInfo:
|
|
@@ -99,28 +190,25 @@ class TableVersion:
|
|
|
99
190
|
|
|
100
191
|
def __init__(
|
|
101
192
|
self,
|
|
102
|
-
|
|
193
|
+
key: TableVersionKey,
|
|
103
194
|
tbl_md: schema.TableMd,
|
|
104
|
-
|
|
105
|
-
schema_version_md: schema.
|
|
195
|
+
version_md: schema.VersionMd,
|
|
196
|
+
schema_version_md: schema.SchemaVersionMd,
|
|
106
197
|
mutable_views: list[TableVersionHandle],
|
|
107
|
-
base_path:
|
|
108
|
-
base:
|
|
109
|
-
# base_store_tbl: Optional['store.StoreBase'] = None,
|
|
198
|
+
base_path: 'TableVersionPath' | None = None,
|
|
199
|
+
base: TableVersionHandle | None = None,
|
|
110
200
|
):
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
self.
|
|
114
|
-
self.
|
|
115
|
-
self.
|
|
116
|
-
self.
|
|
117
|
-
self.
|
|
118
|
-
self.
|
|
119
|
-
self.schema_version = schema_version_md.schema_version
|
|
120
|
-
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
121
|
-
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
201
|
+
assert key.anchor_tbl_id is None or isinstance(key.anchor_tbl_id, UUID)
|
|
202
|
+
|
|
203
|
+
self.is_validated = True # a freshly constructed instance is always valid
|
|
204
|
+
self.is_initialized = False
|
|
205
|
+
self.key = key
|
|
206
|
+
self._tbl_md = copy.deepcopy(tbl_md)
|
|
207
|
+
self._version_md = copy.deepcopy(version_md)
|
|
208
|
+
self._schema_version_md = copy.deepcopy(schema_version_md)
|
|
122
209
|
assert not (self.is_view and base is None)
|
|
123
210
|
self.base = base
|
|
211
|
+
self.store_tbl = None
|
|
124
212
|
|
|
125
213
|
# mutable tables need their TableVersionPath for expr eval during updates
|
|
126
214
|
from .table_version_handle import TableVersionHandle
|
|
@@ -129,27 +217,19 @@ class TableVersion:
|
|
|
129
217
|
if self.is_snapshot:
|
|
130
218
|
self.path = None
|
|
131
219
|
else:
|
|
132
|
-
self_handle = TableVersionHandle(
|
|
220
|
+
self_handle = TableVersionHandle(key)
|
|
133
221
|
if self.is_view:
|
|
134
222
|
assert base_path is not None
|
|
135
223
|
self.path = TableVersionPath(self_handle, base=base_path)
|
|
136
224
|
|
|
137
|
-
if self.is_snapshot:
|
|
138
|
-
self.next_col_id = -1
|
|
139
|
-
self.next_idx_id = -1 # TODO: can snapshots have separate indices?
|
|
140
|
-
self.next_rowid = -1
|
|
141
|
-
else:
|
|
142
|
-
assert tbl_md.current_version == self.version
|
|
143
|
-
self.next_col_id = tbl_md.next_col_id
|
|
144
|
-
self.next_idx_id = tbl_md.next_idx_id
|
|
145
|
-
self.next_rowid = tbl_md.next_row_id
|
|
146
|
-
|
|
147
225
|
# view-specific initialization
|
|
148
226
|
from pixeltable import exprs
|
|
227
|
+
from pixeltable.plan import SampleClause
|
|
149
228
|
|
|
150
229
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
151
230
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
152
|
-
self.
|
|
231
|
+
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
232
|
+
self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
|
|
153
233
|
|
|
154
234
|
# component view-specific initialization
|
|
155
235
|
self.iterator_cls = None
|
|
@@ -164,44 +244,23 @@ class TableVersion:
|
|
|
164
244
|
self.num_iterator_cols = len(output_schema)
|
|
165
245
|
assert tbl_md.view_md.iterator_args is not None
|
|
166
246
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
cat.add_tbl_version(self)
|
|
247
|
+
self.mutable_views = frozenset(mutable_views)
|
|
248
|
+
assert self.is_mutable or len(self.mutable_views) == 0
|
|
170
249
|
|
|
171
|
-
# init schema after we determined whether we're a component view, and before we create the store table
|
|
172
250
|
self.cols = []
|
|
173
251
|
self.cols_by_name = {}
|
|
174
252
|
self.cols_by_id = {}
|
|
175
|
-
self.
|
|
253
|
+
self.idxs = {}
|
|
176
254
|
self.idxs_by_name = {}
|
|
255
|
+
self.idxs_by_col = {}
|
|
256
|
+
self.supports_idxs = self.effective_version is None or (
|
|
257
|
+
self.is_replica and self.effective_version == self.tbl_md.current_version
|
|
258
|
+
)
|
|
177
259
|
self.external_stores = {}
|
|
178
260
|
|
|
179
|
-
self._init_schema(tbl_md, schema_version_md)
|
|
180
|
-
|
|
181
|
-
# Init external stores (this needs to happen after the schema is created)
|
|
182
|
-
self._init_external_stores(tbl_md)
|
|
183
|
-
|
|
184
261
|
def __hash__(self) -> int:
|
|
185
262
|
return hash(self.id)
|
|
186
263
|
|
|
187
|
-
def create_snapshot_copy(self) -> TableVersion:
|
|
188
|
-
"""Create a snapshot copy of this TableVersion"""
|
|
189
|
-
assert not self.is_snapshot
|
|
190
|
-
base = self.path.base.tbl_version if self.is_view else None
|
|
191
|
-
return TableVersion(
|
|
192
|
-
self.id,
|
|
193
|
-
self._create_tbl_md(),
|
|
194
|
-
self.version,
|
|
195
|
-
self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
|
|
196
|
-
mutable_views=[],
|
|
197
|
-
base=base,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
def create_handle(self) -> TableVersionHandle:
|
|
201
|
-
from .table_version_handle import TableVersionHandle
|
|
202
|
-
|
|
203
|
-
return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
|
|
204
|
-
|
|
205
264
|
@property
|
|
206
265
|
def versioned_name(self) -> str:
|
|
207
266
|
if self.effective_version is None:
|
|
@@ -209,75 +268,117 @@ class TableVersion:
|
|
|
209
268
|
else:
|
|
210
269
|
return f'{self.name}:{self.effective_version}'
|
|
211
270
|
|
|
271
|
+
def __repr__(self) -> str:
|
|
272
|
+
return (
|
|
273
|
+
f'TableVersion(id={self.id!r}, name={self.name!r}, effective_version={self.effective_version}, '
|
|
274
|
+
f'anchor_tbl_id={self.anchor_tbl_id}; version={self.version})'
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
@property
|
|
278
|
+
def handle(self) -> 'TableVersionHandle':
|
|
279
|
+
from .table_version_handle import TableVersionHandle
|
|
280
|
+
|
|
281
|
+
return TableVersionHandle(self.key, tbl_version=self)
|
|
282
|
+
|
|
212
283
|
@classmethod
|
|
213
|
-
def
|
|
284
|
+
def create_initial_md(
|
|
214
285
|
cls,
|
|
215
|
-
dir_id: UUID,
|
|
216
286
|
name: str,
|
|
217
287
|
cols: list[Column],
|
|
218
288
|
num_retained_versions: int,
|
|
219
289
|
comment: str,
|
|
220
290
|
media_validation: MediaValidation,
|
|
221
|
-
|
|
222
|
-
view_md:
|
|
223
|
-
) ->
|
|
224
|
-
|
|
291
|
+
create_default_idxs: bool,
|
|
292
|
+
view_md: schema.ViewMd | None = None,
|
|
293
|
+
) -> TableVersionMd:
|
|
294
|
+
from .table_version_handle import TableVersionHandle
|
|
295
|
+
|
|
225
296
|
user = Env.get().user
|
|
297
|
+
timestamp = time.time()
|
|
298
|
+
|
|
299
|
+
tbl_id = uuid.uuid4()
|
|
300
|
+
tbl_id_str = str(tbl_id)
|
|
301
|
+
tbl_handle = TableVersionHandle(TableVersionKey(tbl_id, None, None))
|
|
302
|
+
column_ids = itertools.count()
|
|
303
|
+
index_ids = itertools.count()
|
|
226
304
|
|
|
227
|
-
# assign ids
|
|
228
|
-
|
|
305
|
+
# assign ids, create metadata
|
|
306
|
+
column_md: dict[int, schema.ColumnMd] = {}
|
|
307
|
+
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
229
308
|
for pos, col in enumerate(cols):
|
|
230
|
-
col.
|
|
309
|
+
col.tbl_handle = tbl_handle
|
|
310
|
+
col.id = next(column_ids)
|
|
231
311
|
col.schema_version_add = 0
|
|
232
|
-
cols_by_name[col.name] = col
|
|
233
312
|
if col.is_computed:
|
|
234
313
|
col.check_value_expr()
|
|
314
|
+
col_md, sch_md = col.to_md(pos)
|
|
315
|
+
assert sch_md is not None
|
|
316
|
+
column_md[col.id] = col_md
|
|
317
|
+
schema_col_md[col.id] = sch_md
|
|
318
|
+
|
|
319
|
+
index_md: dict[int, schema.IndexMd] = {}
|
|
320
|
+
if create_default_idxs and (view_md is None or not view_md.is_snapshot):
|
|
321
|
+
index_cols: list[Column] = []
|
|
322
|
+
for col in (c for c in cols if cls._is_btree_indexable(c)):
|
|
323
|
+
idx = index.BtreeIndex()
|
|
324
|
+
val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
|
|
325
|
+
index_cols.extend([val_col, undo_col])
|
|
326
|
+
|
|
327
|
+
idx_id = next(index_ids)
|
|
328
|
+
idx_cls = type(idx)
|
|
329
|
+
md = schema.IndexMd(
|
|
330
|
+
id=idx_id,
|
|
331
|
+
name=f'idx{idx_id}',
|
|
332
|
+
indexed_col_id=col.id,
|
|
333
|
+
indexed_col_tbl_id=tbl_id_str,
|
|
334
|
+
index_val_col_id=val_col.id,
|
|
335
|
+
index_val_undo_col_id=undo_col.id,
|
|
336
|
+
schema_version_add=0,
|
|
337
|
+
schema_version_drop=None,
|
|
338
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
|
|
339
|
+
init_args=idx.as_dict(),
|
|
340
|
+
)
|
|
341
|
+
index_md[idx_id] = md
|
|
235
342
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
343
|
+
for col in index_cols:
|
|
344
|
+
col_md, _ = col.to_md()
|
|
345
|
+
column_md[col.id] = col_md
|
|
346
|
+
|
|
347
|
+
assert all(column_md[id].id == id for id in column_md)
|
|
348
|
+
assert all(index_md[id].id == id for id in index_md)
|
|
349
|
+
|
|
350
|
+
cols.extend(index_cols)
|
|
351
|
+
|
|
352
|
+
tbl_md = schema.TableMd(
|
|
353
|
+
tbl_id=tbl_id_str,
|
|
243
354
|
name=name,
|
|
244
355
|
user=user,
|
|
245
356
|
is_replica=False,
|
|
246
357
|
current_version=0,
|
|
247
358
|
current_schema_version=0,
|
|
248
|
-
next_col_id=
|
|
249
|
-
next_idx_id=
|
|
359
|
+
next_col_id=next(column_ids),
|
|
360
|
+
next_idx_id=next(index_ids),
|
|
250
361
|
next_row_id=0,
|
|
362
|
+
view_sn=0,
|
|
251
363
|
column_md=column_md,
|
|
252
|
-
index_md=
|
|
364
|
+
index_md=index_md,
|
|
253
365
|
external_stores=[],
|
|
254
366
|
view_md=view_md,
|
|
255
367
|
additional_md={},
|
|
256
368
|
)
|
|
257
|
-
# create a schema.Table here, we need it to call our c'tor;
|
|
258
|
-
# don't add it to the session yet, we might add index metadata
|
|
259
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
260
369
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
370
|
+
table_version_md = schema.VersionMd(
|
|
371
|
+
tbl_id=tbl_id_str,
|
|
372
|
+
created_at=timestamp,
|
|
373
|
+
version=0,
|
|
374
|
+
schema_version=0,
|
|
375
|
+
user=user,
|
|
376
|
+
update_status=None,
|
|
377
|
+
additional_md={},
|
|
267
378
|
)
|
|
268
379
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
for pos, col in enumerate(cols):
|
|
272
|
-
md = schema.SchemaColumn(
|
|
273
|
-
pos=pos,
|
|
274
|
-
name=col.name,
|
|
275
|
-
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
276
|
-
)
|
|
277
|
-
schema_col_md[col.id] = md
|
|
278
|
-
|
|
279
|
-
schema_version_md = schema.TableSchemaVersionMd(
|
|
280
|
-
tbl_id=str(tbl_record.id),
|
|
380
|
+
schema_version_md = schema.SchemaVersionMd(
|
|
381
|
+
tbl_id=tbl_id_str,
|
|
281
382
|
schema_version=0,
|
|
282
383
|
preceding_schema_version=None,
|
|
283
384
|
columns=schema_col_md,
|
|
@@ -286,152 +387,227 @@ class TableVersion:
|
|
|
286
387
|
media_validation=media_validation.name.lower(),
|
|
287
388
|
additional_md={},
|
|
288
389
|
)
|
|
289
|
-
|
|
290
|
-
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
|
|
291
|
-
)
|
|
390
|
+
return TableVersionMd(tbl_md, table_version_md, schema_version_md)
|
|
292
391
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
392
|
+
def exec_op(self, op: TableOp) -> None:
|
|
393
|
+
from pixeltable.store import StoreBase
|
|
394
|
+
|
|
395
|
+
assert op.delete_table_md_op is None # that needs to get handled by Catalog
|
|
396
|
+
|
|
397
|
+
if op.create_store_table_op is not None:
|
|
398
|
+
# this needs to be called outside of a transaction
|
|
399
|
+
self.store_tbl.create()
|
|
400
|
+
|
|
401
|
+
elif op.create_index_op is not None:
|
|
402
|
+
idx_info = self.idxs[op.create_index_op.idx_id]
|
|
403
|
+
with Env.get().begin_xact():
|
|
404
|
+
self.store_tbl.create_index(idx_info.id)
|
|
405
|
+
|
|
406
|
+
elif op.load_view_op is not None:
|
|
407
|
+
from pixeltable.catalog import Catalog
|
|
408
|
+
from pixeltable.plan import Planner
|
|
409
|
+
|
|
410
|
+
from .table_version_path import TableVersionPath
|
|
411
|
+
|
|
412
|
+
# clear out any remaining media files from an aborted previous attempt
|
|
413
|
+
self.delete_media()
|
|
414
|
+
view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
|
|
415
|
+
plan, _ = Planner.create_view_load_plan(view_path)
|
|
416
|
+
_, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
|
|
417
|
+
status = UpdateStatus(row_count_stats=row_counts)
|
|
418
|
+
Catalog.get().store_update_status(self.id, self.version, status)
|
|
419
|
+
_logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
|
|
309
420
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
#
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
421
|
+
elif op.drop_store_table_op is not None:
|
|
422
|
+
# don't reference self.store_tbl here, it needs to reference the metadata for our base table, which at
|
|
423
|
+
# this point may not exist anymore
|
|
424
|
+
with Env.get().begin_xact() as conn:
|
|
425
|
+
drop_stmt = f'DROP TABLE IF EXISTS {StoreBase.storage_name(self.id, self.is_view)}'
|
|
426
|
+
conn.execute(sql.text(drop_stmt))
|
|
316
427
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
session.add(tbl_version_record)
|
|
321
|
-
session.add(schema_version_record)
|
|
322
|
-
return tbl_record.id, tbl_version
|
|
428
|
+
elif op.delete_table_media_files_op:
|
|
429
|
+
self.delete_media()
|
|
430
|
+
FileCache.get().clear(tbl_id=self.id)
|
|
323
431
|
|
|
324
432
|
@classmethod
|
|
325
|
-
def create_replica(cls, md:
|
|
433
|
+
def create_replica(cls, md: TableVersionMd, create_store_tbl: bool = True) -> TableVersion:
|
|
434
|
+
from .catalog import Catalog, TableVersionPath
|
|
435
|
+
|
|
436
|
+
assert Env.get().in_xact
|
|
437
|
+
assert md.tbl_md.is_replica
|
|
326
438
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
439
|
+
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
327
440
|
view_md = md.tbl_md.view_md
|
|
328
|
-
base_path =
|
|
441
|
+
base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
329
442
|
base = base_path.tbl_version if base_path is not None else None
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
)
|
|
333
|
-
|
|
443
|
+
key = TableVersionKey(tbl_id, md.version_md.version, None)
|
|
444
|
+
tbl_version = cls(key, md.tbl_md, md.version_md, md.schema_version_md, [], base_path=base_path, base=base)
|
|
445
|
+
cat = Catalog.get()
|
|
446
|
+
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
447
|
+
# TableVersion instance before.
|
|
448
|
+
# Actually this isn't true, because we might be re-creating a dropped replica.
|
|
449
|
+
# TODO: Understand why old TableVersions are kept around even for a dropped table.
|
|
450
|
+
# assert tbl_version.effective_version is not None
|
|
451
|
+
# assert (tbl_version.id, tbl_version.effective_version, None) not in cat._tbl_versions
|
|
452
|
+
cat._tbl_versions[key] = tbl_version
|
|
453
|
+
tbl_version.init()
|
|
454
|
+
if create_store_tbl:
|
|
455
|
+
tbl_version.store_tbl.create()
|
|
334
456
|
return tbl_version
|
|
335
457
|
|
|
336
|
-
def
|
|
458
|
+
def delete_media(self, tbl_version: int | None = None) -> None:
|
|
459
|
+
# Assemble a set of column destinations and delete objects from all of them
|
|
460
|
+
# None is a valid column destination which refers to the default object location
|
|
461
|
+
destinations = {col.destination for col in self.cols if col.is_stored}
|
|
462
|
+
for dest in destinations:
|
|
463
|
+
ObjectOps.delete(dest, self.id, tbl_version=tbl_version)
|
|
464
|
+
|
|
465
|
+
def drop(self) -> list[TableOp]:
|
|
466
|
+
id_str = str(self.id)
|
|
467
|
+
ops = [
|
|
468
|
+
TableOp(
|
|
469
|
+
tbl_id=id_str,
|
|
470
|
+
op_sn=0,
|
|
471
|
+
num_ops=3,
|
|
472
|
+
needs_xact=False,
|
|
473
|
+
delete_table_media_files_op=DeleteTableMediaFilesOp(),
|
|
474
|
+
),
|
|
475
|
+
TableOp(tbl_id=id_str, op_sn=1, num_ops=3, needs_xact=False, drop_store_table_op=DropStoreTableOp()),
|
|
476
|
+
TableOp(tbl_id=id_str, op_sn=2, num_ops=3, needs_xact=True, delete_table_md_op=DeleteTableMdOp()),
|
|
477
|
+
]
|
|
478
|
+
return ops
|
|
479
|
+
|
|
480
|
+
def init(self) -> None:
|
|
481
|
+
"""
|
|
482
|
+
Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
|
|
483
|
+
in Catalog.
|
|
484
|
+
"""
|
|
337
485
|
from .catalog import Catalog
|
|
338
486
|
|
|
339
487
|
cat = Catalog.get()
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
488
|
+
assert self.key in cat._tbl_versions
|
|
489
|
+
self._init_schema()
|
|
490
|
+
if self.is_mutable:
|
|
491
|
+
cat.record_column_dependencies(self)
|
|
492
|
+
# init external stores; this needs to happen after the schema is created
|
|
493
|
+
self._init_external_stores()
|
|
494
|
+
|
|
495
|
+
self.is_initialized = True
|
|
496
|
+
|
|
497
|
+
def _init_schema(self) -> None:
|
|
498
|
+
from pixeltable.store import StoreComponentView, StoreTable, StoreView
|
|
499
|
+
|
|
500
|
+
from .catalog import Catalog
|
|
501
|
+
|
|
502
|
+
# initialize IndexBase instances and collect sa_col_types
|
|
503
|
+
idxs: dict[int, index.IndexBase] = {}
|
|
504
|
+
val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
|
|
505
|
+
undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
|
|
506
|
+
for md in self.tbl_md.index_md.values():
|
|
507
|
+
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
508
|
+
cls = getattr(index, cls_name)
|
|
509
|
+
idx = cls.from_dict(md.init_args)
|
|
510
|
+
idxs[md.id] = idx
|
|
511
|
+
val_col_idxs[md.index_val_col_id] = idx
|
|
512
|
+
undo_col_idxs[md.index_val_undo_col_id] = idx
|
|
513
|
+
|
|
514
|
+
# initialize Columns
|
|
358
515
|
self.cols = []
|
|
359
516
|
self.cols_by_name = {}
|
|
360
517
|
self.cols_by_id = {}
|
|
361
518
|
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
362
519
|
# point backward.
|
|
363
|
-
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
520
|
+
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
364
521
|
for col_md in sorted_column_md:
|
|
365
|
-
|
|
366
|
-
|
|
522
|
+
col_type = ts.ColumnType.from_dict(col_md.col_type)
|
|
523
|
+
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
367
524
|
media_val = (
|
|
368
525
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
369
526
|
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
370
527
|
else None
|
|
371
528
|
)
|
|
529
|
+
|
|
530
|
+
stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
|
|
531
|
+
sa_col_type: sql.types.TypeEngine | None = None
|
|
532
|
+
if col_md.id in val_col_idxs:
|
|
533
|
+
idx = val_col_idxs[col_md.id]
|
|
534
|
+
# for index value columns, the index gets to override the default
|
|
535
|
+
stores_cellmd = idx.records_value_errors()
|
|
536
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
537
|
+
elif col_md.id in undo_col_idxs:
|
|
538
|
+
idx = undo_col_idxs[col_md.id]
|
|
539
|
+
# for index undo columns, we never store cellmd
|
|
540
|
+
stores_cellmd = False
|
|
541
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
542
|
+
|
|
372
543
|
col = Column(
|
|
373
544
|
col_id=col_md.id,
|
|
374
|
-
name=
|
|
375
|
-
col_type=
|
|
545
|
+
name=schema_col_md.name if schema_col_md is not None else None,
|
|
546
|
+
col_type=col_type,
|
|
376
547
|
is_pk=col_md.is_pk,
|
|
548
|
+
is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
|
|
377
549
|
stored=col_md.stored,
|
|
378
550
|
media_validation=media_val,
|
|
551
|
+
sa_col_type=sa_col_type,
|
|
379
552
|
schema_version_add=col_md.schema_version_add,
|
|
380
553
|
schema_version_drop=col_md.schema_version_drop,
|
|
554
|
+
stores_cellmd=stores_cellmd,
|
|
381
555
|
value_expr_dict=col_md.value_expr,
|
|
556
|
+
tbl_handle=self.handle,
|
|
557
|
+
destination=col_md.destination,
|
|
382
558
|
)
|
|
383
|
-
col.tbl = self.create_handle()
|
|
384
|
-
self.cols.append(col)
|
|
385
559
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
|
|
391
|
-
# column was dropped
|
|
392
|
-
continue
|
|
393
|
-
if col.name is not None:
|
|
394
|
-
self.cols_by_name[col.name] = col
|
|
395
|
-
self.cols_by_id[col.id] = col
|
|
396
|
-
|
|
397
|
-
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
398
|
-
# this guarantees that references always point backwards
|
|
399
|
-
if not self.is_snapshot and col_md.value_expr is not None:
|
|
400
|
-
self._record_refd_columns(col)
|
|
401
|
-
|
|
402
|
-
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
403
|
-
self.idx_md = tbl_md.index_md
|
|
404
|
-
self.idxs_by_name = {}
|
|
405
|
-
import pixeltable.index as index_module
|
|
406
|
-
|
|
407
|
-
for md in tbl_md.index_md.values():
|
|
408
|
-
if md.schema_version_add > self.schema_version or (
|
|
409
|
-
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
560
|
+
self.cols.append(col)
|
|
561
|
+
# populate lookup structures before Expr.from_dict()
|
|
562
|
+
if col_md.schema_version_add <= self.schema_version and (
|
|
563
|
+
col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
|
|
410
564
|
):
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
565
|
+
if col.name is not None:
|
|
566
|
+
self.cols_by_name[col.name] = col
|
|
567
|
+
self.cols_by_id[col.id] = col
|
|
568
|
+
|
|
569
|
+
if self.supports_idxs:
|
|
570
|
+
# create IndexInfo for indices visible in current_version
|
|
571
|
+
visible_idxs = [
|
|
572
|
+
md
|
|
573
|
+
for md in self.tbl_md.index_md.values()
|
|
574
|
+
if md.schema_version_add <= self.schema_version
|
|
575
|
+
and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
|
|
576
|
+
]
|
|
577
|
+
for md in visible_idxs:
|
|
578
|
+
idx = idxs[md.id]
|
|
579
|
+
indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
|
|
580
|
+
idx_col = self._lookup_column(indexed_col_id)
|
|
581
|
+
info = self.IndexInfo(
|
|
582
|
+
id=md.id,
|
|
583
|
+
name=md.name,
|
|
584
|
+
idx=idx,
|
|
585
|
+
col=idx_col,
|
|
586
|
+
val_col=self.cols_by_id[md.index_val_col_id],
|
|
587
|
+
undo_col=self.cols_by_id[md.index_val_undo_col_id],
|
|
588
|
+
)
|
|
589
|
+
self.idxs[md.id] = info
|
|
590
|
+
self.idxs_by_name[md.name] = info
|
|
591
|
+
self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
|
|
592
|
+
|
|
593
|
+
# create value exprs, now that we have all lookup structures in place
|
|
594
|
+
tvp: TableVersionPath | None = None
|
|
595
|
+
if self.effective_version is not None:
|
|
596
|
+
# for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
|
|
597
|
+
# otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
|
|
598
|
+
# use for retargeting.
|
|
599
|
+
tvp = Catalog.get().construct_tvp(
|
|
600
|
+
self.id, self.effective_version, self.tbl_md.ancestors, self.version_md.created_at
|
|
601
|
+
)
|
|
602
|
+
elif self.anchor_tbl_id is not None:
|
|
603
|
+
# for replica TableVersion instances, we also need to retarget the value_exprs, this time to the
|
|
604
|
+
# "anchored" TableVersionPath.
|
|
605
|
+
assert self.path is not None
|
|
606
|
+
tvp = self.path
|
|
607
|
+
for col in self.cols_by_id.values():
|
|
608
|
+
col.init_value_expr(tvp)
|
|
434
609
|
|
|
610
|
+
# create the sqlalchemy schema, after instantiating all Columns
|
|
435
611
|
if self.is_component_view:
|
|
436
612
|
self.store_tbl = StoreComponentView(self)
|
|
437
613
|
elif self.is_view:
|
|
@@ -439,54 +615,50 @@ class TableVersion:
|
|
|
439
615
|
else:
|
|
440
616
|
self.store_tbl = StoreTable(self)
|
|
441
617
|
|
|
442
|
-
def
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
618
|
+
def _lookup_column(self, qid: QColumnId) -> Column | None:
|
|
619
|
+
"""
|
|
620
|
+
Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
|
|
621
|
+
to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
|
|
446
622
|
|
|
447
|
-
|
|
448
|
-
timestamp: timestamp of the change
|
|
449
|
-
conn: database connection to use
|
|
450
|
-
update_tbl_version: if `True`, will also write `TableVersion` metadata
|
|
451
|
-
preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
|
|
452
|
-
specified preceding schema version
|
|
623
|
+
This will search through *all* known columns, including columns that are not visible in this TableVersion.
|
|
453
624
|
"""
|
|
454
|
-
|
|
625
|
+
if qid.tbl_id == self.id:
|
|
626
|
+
return next(col for col in self.cols if col.id == qid.col_id)
|
|
627
|
+
elif self.base is not None:
|
|
628
|
+
return self.base.get()._lookup_column(qid)
|
|
629
|
+
else:
|
|
630
|
+
return None
|
|
631
|
+
|
|
632
|
+
def _write_md(self, new_version: bool, new_schema_version: bool) -> None:
|
|
455
633
|
from pixeltable.catalog import Catalog
|
|
456
634
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
self.
|
|
635
|
+
Catalog.get().write_tbl_md(
|
|
636
|
+
self.id,
|
|
637
|
+
None,
|
|
638
|
+
self._tbl_md,
|
|
639
|
+
self._version_md if new_version else None,
|
|
640
|
+
self._schema_version_md if new_schema_version else None,
|
|
461
641
|
)
|
|
462
642
|
|
|
463
|
-
Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
|
|
464
|
-
|
|
465
|
-
def ensure_md_loaded(self) -> None:
|
|
466
|
-
"""Ensure that table metadata is loaded."""
|
|
467
|
-
for col in self.cols_by_id.values():
|
|
468
|
-
_ = col.value_expr
|
|
469
|
-
|
|
470
643
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
471
644
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
472
645
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
473
646
|
|
|
474
|
-
def add_index(self, col: Column, idx_name:
|
|
647
|
+
def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
475
648
|
# we're creating a new schema version
|
|
476
|
-
self.
|
|
477
|
-
preceding_schema_version = self.schema_version
|
|
478
|
-
self.schema_version = self.version
|
|
649
|
+
self.bump_version(bump_schema_version=True)
|
|
479
650
|
status = self._add_index(col, idx_name, idx)
|
|
480
|
-
self.
|
|
651
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
481
652
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
482
653
|
return status
|
|
483
654
|
|
|
484
|
-
|
|
655
|
+
@classmethod
|
|
656
|
+
def _is_btree_indexable(cls, col: Column) -> bool:
|
|
485
657
|
if not col.stored:
|
|
486
658
|
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
487
659
|
return False
|
|
488
660
|
# Skip index for stored media columns produced by an iterator
|
|
489
|
-
if col.col_type.is_media_type() and
|
|
661
|
+
if col.col_type.is_media_type() and col.is_iterator_col:
|
|
490
662
|
return False
|
|
491
663
|
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
492
664
|
# wrong type for a B-tree
|
|
@@ -496,53 +668,58 @@ class TableVersion:
|
|
|
496
668
|
return False
|
|
497
669
|
return True
|
|
498
670
|
|
|
499
|
-
def _add_default_index(self, col: Column) ->
|
|
671
|
+
def _add_default_index(self, col: Column) -> UpdateStatus | None:
|
|
500
672
|
"""Add a B-tree index on this column if it has a compatible type"""
|
|
501
673
|
if not self._is_btree_indexable(col):
|
|
502
674
|
return None
|
|
503
|
-
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(
|
|
675
|
+
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
|
|
504
676
|
return status
|
|
505
677
|
|
|
506
|
-
|
|
678
|
+
@classmethod
|
|
679
|
+
def _create_index_columns(
|
|
680
|
+
cls,
|
|
681
|
+
col: Column,
|
|
682
|
+
idx: index.IndexBase,
|
|
683
|
+
schema_version: int,
|
|
684
|
+
tbl_handle: TableVersionHandle,
|
|
685
|
+
id_cb: Callable[[], int],
|
|
686
|
+
) -> tuple[Column, Column]:
|
|
507
687
|
"""Create value and undo columns for the given index.
|
|
508
688
|
Args:
|
|
509
689
|
idx: index for which columns will be created.
|
|
510
690
|
Returns:
|
|
511
|
-
A tuple containing the value column and the undo column.
|
|
691
|
+
A tuple containing the value column and the undo column, both of which are nullable.
|
|
512
692
|
"""
|
|
513
|
-
|
|
514
|
-
# add the index value and undo columns (which need to be nullable)
|
|
693
|
+
value_expr = idx.create_value_expr(col)
|
|
515
694
|
val_col = Column(
|
|
516
|
-
col_id=
|
|
695
|
+
col_id=id_cb(),
|
|
517
696
|
name=None,
|
|
518
|
-
computed_with=
|
|
519
|
-
sa_col_type=idx.
|
|
697
|
+
computed_with=value_expr,
|
|
698
|
+
sa_col_type=idx.get_index_sa_type(value_expr.col_type),
|
|
520
699
|
stored=True,
|
|
521
|
-
|
|
700
|
+
stores_cellmd=idx.records_value_errors(),
|
|
701
|
+
schema_version_add=schema_version,
|
|
522
702
|
schema_version_drop=None,
|
|
523
|
-
records_errors=idx.records_value_errors(),
|
|
524
703
|
)
|
|
525
|
-
val_col.tbl = self.create_handle()
|
|
526
704
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
527
|
-
|
|
705
|
+
val_col.tbl_handle = tbl_handle
|
|
528
706
|
|
|
529
707
|
undo_col = Column(
|
|
530
|
-
col_id=
|
|
708
|
+
col_id=id_cb(),
|
|
531
709
|
name=None,
|
|
532
710
|
col_type=val_col.col_type,
|
|
533
711
|
sa_col_type=val_col.sa_col_type,
|
|
534
712
|
stored=True,
|
|
535
|
-
|
|
713
|
+
stores_cellmd=False,
|
|
714
|
+
schema_version_add=schema_version,
|
|
536
715
|
schema_version_drop=None,
|
|
537
|
-
records_errors=False,
|
|
538
716
|
)
|
|
539
|
-
undo_col.tbl = self.create_handle()
|
|
540
717
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
541
|
-
|
|
718
|
+
undo_col.tbl_handle = tbl_handle
|
|
542
719
|
return val_col, undo_col
|
|
543
720
|
|
|
544
721
|
def _create_index(
|
|
545
|
-
self, col: Column, val_col: Column, undo_col: Column, idx_name:
|
|
722
|
+
self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
|
|
546
723
|
) -> None:
|
|
547
724
|
"""Create the given index along with index md"""
|
|
548
725
|
idx_id = self.next_idx_id
|
|
@@ -551,14 +728,14 @@ class TableVersion:
|
|
|
551
728
|
idx_name = f'idx{idx_id}'
|
|
552
729
|
else:
|
|
553
730
|
assert is_valid_identifier(idx_name)
|
|
554
|
-
assert idx_name not in [i.name for i in self.
|
|
731
|
+
assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
|
|
555
732
|
# create and register the index metadata
|
|
556
733
|
idx_cls = type(idx)
|
|
557
734
|
idx_md = schema.IndexMd(
|
|
558
735
|
id=idx_id,
|
|
559
736
|
name=idx_name,
|
|
560
737
|
indexed_col_id=col.id,
|
|
561
|
-
indexed_col_tbl_id=str(col.
|
|
738
|
+
indexed_col_tbl_id=str(col.get_tbl().id),
|
|
562
739
|
index_val_col_id=val_col.id,
|
|
563
740
|
index_val_undo_col_id=undo_col.id,
|
|
564
741
|
schema_version_add=self.schema_version,
|
|
@@ -567,85 +744,80 @@ class TableVersion:
|
|
|
567
744
|
init_args=idx.as_dict(),
|
|
568
745
|
)
|
|
569
746
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
570
|
-
self.
|
|
747
|
+
self._tbl_md.index_md[idx_id] = idx_md
|
|
748
|
+
self.idxs[idx_id] = idx_info
|
|
571
749
|
self.idxs_by_name[idx_name] = idx_info
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
finally:
|
|
575
|
-
|
|
576
|
-
def cleanup_index() -> None:
|
|
577
|
-
"""Delete the newly added in-memory index structure"""
|
|
578
|
-
del self.idxs_by_name[idx_name]
|
|
579
|
-
del self.idx_md[idx_id]
|
|
580
|
-
self.next_idx_id = idx_id
|
|
750
|
+
self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
|
|
751
|
+
self.store_tbl.create_index(idx_id)
|
|
581
752
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
val_col, undo_vol = self._create_index_columns(idx)
|
|
753
|
+
def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
754
|
+
val_col, undo_col = self._create_index_columns(
|
|
755
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
756
|
+
)
|
|
587
757
|
# add the columns and update the metadata
|
|
588
758
|
# TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
|
|
589
759
|
# with the database operations
|
|
590
|
-
status = self._add_columns([val_col,
|
|
760
|
+
status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
|
|
591
761
|
# now create the index structure
|
|
592
|
-
self._create_index(col, val_col,
|
|
762
|
+
self._create_index(col, val_col, undo_col, idx_name, idx)
|
|
593
763
|
return status
|
|
594
764
|
|
|
595
765
|
def drop_index(self, idx_id: int) -> None:
|
|
596
|
-
assert
|
|
597
|
-
assert idx_id in self.
|
|
766
|
+
assert self.is_mutable
|
|
767
|
+
assert idx_id in self._tbl_md.index_md
|
|
598
768
|
|
|
599
769
|
# we're creating a new schema version
|
|
600
|
-
self.
|
|
601
|
-
|
|
602
|
-
self.schema_version = self.version
|
|
603
|
-
idx_md = self.idx_md[idx_id]
|
|
770
|
+
self.bump_version(bump_schema_version=True)
|
|
771
|
+
idx_md = self._tbl_md.index_md[idx_id]
|
|
604
772
|
idx_md.schema_version_drop = self.schema_version
|
|
605
773
|
assert idx_md.name in self.idxs_by_name
|
|
606
774
|
idx_info = self.idxs_by_name[idx_md.name]
|
|
607
775
|
# remove this index entry from the active indexes (in memory)
|
|
608
776
|
# and the index metadata (in persistent table metadata)
|
|
777
|
+
# TODO: this is wrong, it breaks revert()
|
|
778
|
+
del self.idxs[idx_id]
|
|
609
779
|
del self.idxs_by_name[idx_md.name]
|
|
610
|
-
|
|
780
|
+
if idx_info.col.qid in self.idxs_by_col:
|
|
781
|
+
self.idxs_by_col[idx_info.col.qid].remove(idx_info)
|
|
782
|
+
del self._tbl_md.index_md[idx_id]
|
|
611
783
|
|
|
612
784
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
613
|
-
self.
|
|
785
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
614
786
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
615
787
|
|
|
616
788
|
def add_columns(
|
|
617
789
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
618
790
|
) -> UpdateStatus:
|
|
619
|
-
"""Adds
|
|
620
|
-
assert
|
|
621
|
-
assert all(is_valid_identifier(col.name) for col in cols)
|
|
791
|
+
"""Adds columns to the table."""
|
|
792
|
+
assert self.is_mutable
|
|
793
|
+
assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
|
|
622
794
|
assert all(col.stored is not None for col in cols)
|
|
623
|
-
assert all(col.name not in self.cols_by_name for col in cols)
|
|
795
|
+
assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
|
|
624
796
|
for col in cols:
|
|
625
|
-
col.
|
|
626
|
-
col.id = self.next_col_id
|
|
627
|
-
self.next_col_id += 1
|
|
797
|
+
col.tbl_handle = self.handle
|
|
798
|
+
col.id = self.next_col_id()
|
|
628
799
|
|
|
629
800
|
# we're creating a new schema version
|
|
630
|
-
self.
|
|
631
|
-
preceding_schema_version = self.schema_version
|
|
632
|
-
self.schema_version = self.version
|
|
801
|
+
self.bump_version(bump_schema_version=True)
|
|
633
802
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
634
803
|
all_cols: list[Column] = []
|
|
635
804
|
for col in cols:
|
|
636
805
|
all_cols.append(col)
|
|
637
|
-
if self._is_btree_indexable(col):
|
|
638
|
-
idx = index.BtreeIndex(
|
|
639
|
-
val_col, undo_col = self._create_index_columns(
|
|
806
|
+
if col.name is not None and self._is_btree_indexable(col):
|
|
807
|
+
idx = index.BtreeIndex()
|
|
808
|
+
val_col, undo_col = self._create_index_columns(
|
|
809
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
810
|
+
)
|
|
640
811
|
index_cols[col] = (idx, val_col, undo_col)
|
|
641
812
|
all_cols.append(val_col)
|
|
642
813
|
all_cols.append(undo_col)
|
|
643
814
|
# Add all columns
|
|
644
815
|
status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
|
|
645
|
-
# Create indices and their
|
|
816
|
+
# Create indices and their md records
|
|
646
817
|
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
647
818
|
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
648
|
-
self.
|
|
819
|
+
self.update_status = status
|
|
820
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
649
821
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
650
822
|
|
|
651
823
|
msg = (
|
|
@@ -660,28 +832,39 @@ class TableVersion:
|
|
|
660
832
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
661
833
|
) -> UpdateStatus:
|
|
662
834
|
"""Add and populate columns within the current transaction"""
|
|
835
|
+
from pixeltable.catalog import Catalog
|
|
836
|
+
from pixeltable.plan import Planner
|
|
837
|
+
|
|
663
838
|
cols_to_add = list(cols)
|
|
839
|
+
|
|
664
840
|
row_count = self.store_tbl.count()
|
|
665
841
|
for col in cols_to_add:
|
|
842
|
+
assert col.tbl_handle.id == self.id
|
|
666
843
|
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
667
844
|
raise excs.Error(
|
|
668
845
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
669
846
|
)
|
|
670
847
|
|
|
848
|
+
computed_values = 0
|
|
671
849
|
num_excs = 0
|
|
672
850
|
cols_with_excs: list[Column] = []
|
|
673
851
|
for col in cols_to_add:
|
|
852
|
+
assert col.id is not None
|
|
674
853
|
excs_per_col = 0
|
|
675
854
|
col.schema_version_add = self.schema_version
|
|
676
855
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
677
856
|
# because it might be referenced by the next column's value_expr
|
|
678
857
|
self.cols.append(col)
|
|
858
|
+
self.cols_by_id[col.id] = col
|
|
679
859
|
if col.name is not None:
|
|
680
860
|
self.cols_by_name[col.name] = col
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
col.
|
|
684
|
-
self.
|
|
861
|
+
col_md, sch_md = col.to_md(len(self.cols_by_name))
|
|
862
|
+
assert sch_md is not None, 'Schema column metadata must be created for user-facing columns'
|
|
863
|
+
self._tbl_md.column_md[col.id] = col_md
|
|
864
|
+
self._schema_version_md.columns[col.id] = sch_md
|
|
865
|
+
else:
|
|
866
|
+
col_md, _ = col.to_md()
|
|
867
|
+
self._tbl_md.column_md[col.id] = col_md
|
|
685
868
|
|
|
686
869
|
if col.is_stored:
|
|
687
870
|
self.store_tbl.add_column(col)
|
|
@@ -690,120 +873,121 @@ class TableVersion:
|
|
|
690
873
|
continue
|
|
691
874
|
|
|
692
875
|
# populate the column
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
|
|
876
|
+
plan = Planner.create_add_column_plan(self.path, col)
|
|
696
877
|
plan.ctx.num_rows = row_count
|
|
697
878
|
try:
|
|
698
879
|
plan.open()
|
|
699
880
|
try:
|
|
700
|
-
excs_per_col = self.store_tbl.load_column(col, plan,
|
|
701
|
-
except
|
|
702
|
-
|
|
703
|
-
|
|
881
|
+
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
882
|
+
except sql_exc.DBAPIError as exc:
|
|
883
|
+
Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
|
|
884
|
+
# If it wasn't converted, re-raise as a generic Pixeltable error
|
|
885
|
+
# (this means it's not a known concurrency error; it's something else)
|
|
886
|
+
raise excs.Error(
|
|
887
|
+
f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
|
|
888
|
+
) from exc
|
|
704
889
|
if excs_per_col > 0:
|
|
705
890
|
cols_with_excs.append(col)
|
|
706
891
|
num_excs += excs_per_col
|
|
892
|
+
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
707
893
|
finally:
|
|
708
|
-
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
709
|
-
def cleanup_on_error() -> None:
|
|
710
|
-
"""Delete columns that are added as part of current add_columns operation and re-initialize
|
|
711
|
-
the sqlalchemy schema"""
|
|
712
|
-
self.cols = [col for col in self.cols if col not in cols_to_add]
|
|
713
|
-
for col in cols_to_add:
|
|
714
|
-
# remove columns that we already added
|
|
715
|
-
if col.id in self.cols_by_id:
|
|
716
|
-
del self.cols_by_id[col.id]
|
|
717
|
-
if col.name is not None and col.name in self.cols_by_name:
|
|
718
|
-
del self.cols_by_name[col.name]
|
|
719
|
-
self.store_tbl.create_sa_tbl()
|
|
720
|
-
|
|
721
|
-
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
722
|
-
run_cleanup_on_exception(cleanup_on_error)
|
|
723
894
|
plan.close()
|
|
724
895
|
|
|
896
|
+
Catalog.get().record_column_dependencies(self)
|
|
897
|
+
|
|
725
898
|
if print_stats:
|
|
726
899
|
plan.ctx.profile.print(num_rows=row_count)
|
|
727
|
-
|
|
900
|
+
|
|
901
|
+
# TODO: what to do about system columns with exceptions?
|
|
902
|
+
row_counts = RowCountStats(
|
|
903
|
+
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
904
|
+
) # add_columns
|
|
728
905
|
return UpdateStatus(
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
num_excs=num_excs,
|
|
732
|
-
cols_with_excs=[f'{col.tbl.get().name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
906
|
+
cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
907
|
+
row_count_stats=row_counts,
|
|
733
908
|
)
|
|
734
909
|
|
|
735
910
|
def drop_column(self, col: Column) -> None:
|
|
736
911
|
"""Drop a column from the table."""
|
|
737
912
|
|
|
738
|
-
assert
|
|
913
|
+
assert self.is_mutable
|
|
739
914
|
|
|
740
915
|
# we're creating a new schema version
|
|
741
|
-
self.
|
|
742
|
-
preceding_schema_version = self.schema_version
|
|
743
|
-
self.schema_version = self.version
|
|
916
|
+
self.bump_version(bump_schema_version=True)
|
|
744
917
|
|
|
745
918
|
# drop this column and all dependent index columns and indices
|
|
746
919
|
dropped_cols = [col]
|
|
747
|
-
|
|
920
|
+
dropped_idx_info: list[TableVersion.IndexInfo] = []
|
|
748
921
|
for idx_info in self.idxs_by_name.values():
|
|
749
922
|
if idx_info.col != col:
|
|
750
923
|
continue
|
|
751
924
|
dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
|
|
752
|
-
idx_md = self.
|
|
925
|
+
idx_md = self._tbl_md.index_md[idx_info.id]
|
|
753
926
|
idx_md.schema_version_drop = self.schema_version
|
|
754
927
|
assert idx_md.name in self.idxs_by_name
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
928
|
+
dropped_idx_info.append(idx_info)
|
|
929
|
+
|
|
930
|
+
# update index lookup structures
|
|
931
|
+
for info in dropped_idx_info:
|
|
932
|
+
del self.idxs[info.id]
|
|
933
|
+
del self.idxs_by_name[info.name]
|
|
934
|
+
if col.qid in self.idxs_by_col:
|
|
935
|
+
del self.idxs_by_col[col.qid]
|
|
936
|
+
|
|
759
937
|
self._drop_columns(dropped_cols)
|
|
760
|
-
self.
|
|
938
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
761
939
|
_logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
|
|
762
940
|
|
|
763
941
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
764
942
|
"""Mark columns as dropped"""
|
|
765
|
-
|
|
943
|
+
from pixeltable.catalog import Catalog
|
|
766
944
|
|
|
767
|
-
|
|
768
|
-
if col.value_expr is not None:
|
|
769
|
-
# update Column.dependent_cols
|
|
770
|
-
for c in self.cols:
|
|
771
|
-
if c == col:
|
|
772
|
-
break
|
|
773
|
-
c.dependent_cols.discard(col)
|
|
945
|
+
assert self.is_mutable
|
|
774
946
|
|
|
947
|
+
for col in cols:
|
|
775
948
|
col.schema_version_drop = self.schema_version
|
|
776
949
|
if col.name is not None:
|
|
777
950
|
assert col.name in self.cols_by_name
|
|
778
951
|
del self.cols_by_name[col.name]
|
|
779
952
|
assert col.id in self.cols_by_id
|
|
780
953
|
del self.cols_by_id[col.id]
|
|
954
|
+
# update stored md
|
|
955
|
+
self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
|
|
956
|
+
if col.name is not None:
|
|
957
|
+
del self._schema_version_md.columns[col.id]
|
|
958
|
+
|
|
959
|
+
# update positions
|
|
960
|
+
for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
|
|
961
|
+
schema_col.pos = pos
|
|
781
962
|
|
|
782
963
|
self.store_tbl.create_sa_tbl()
|
|
964
|
+
Catalog.get().record_column_dependencies(self)
|
|
783
965
|
|
|
784
966
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
785
967
|
"""Rename a column."""
|
|
786
|
-
|
|
787
|
-
|
|
968
|
+
if not self.is_mutable:
|
|
969
|
+
raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
|
|
970
|
+
col = self.path.get_column(old_name)
|
|
971
|
+
if col is None:
|
|
788
972
|
raise excs.Error(f'Unknown column: {old_name}')
|
|
973
|
+
if col.get_tbl().id != self.id:
|
|
974
|
+
raise excs.Error(f'Cannot rename base table column {col.name!r}')
|
|
789
975
|
if not is_valid_identifier(new_name):
|
|
790
|
-
raise excs.Error(f
|
|
976
|
+
raise excs.Error(f'Invalid column name: {new_name}')
|
|
791
977
|
if new_name in self.cols_by_name:
|
|
792
|
-
raise excs.Error(f'Column {new_name} already exists')
|
|
793
|
-
col = self.cols_by_name[old_name]
|
|
978
|
+
raise excs.Error(f'Column {new_name!r} already exists')
|
|
794
979
|
del self.cols_by_name[old_name]
|
|
795
980
|
col.name = new_name
|
|
796
981
|
self.cols_by_name[new_name] = col
|
|
982
|
+
self._schema_version_md.columns[col.id].name = new_name
|
|
797
983
|
|
|
798
984
|
# we're creating a new schema version
|
|
799
|
-
self.
|
|
800
|
-
preceding_schema_version = self.schema_version
|
|
801
|
-
self.schema_version = self.version
|
|
985
|
+
self.bump_version(bump_schema_version=True)
|
|
802
986
|
|
|
803
|
-
self.
|
|
987
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
804
988
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
805
989
|
|
|
806
|
-
def set_comment(self, new_comment:
|
|
990
|
+
def set_comment(self, new_comment: str | None) -> None:
|
|
807
991
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
808
992
|
self.comment = new_comment
|
|
809
993
|
self._create_schema_version()
|
|
@@ -818,82 +1002,79 @@ class TableVersion:
|
|
|
818
1002
|
|
|
819
1003
|
def _create_schema_version(self) -> None:
|
|
820
1004
|
# we're creating a new schema version
|
|
821
|
-
self.
|
|
822
|
-
|
|
823
|
-
self.schema_version = self.version
|
|
824
|
-
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
1005
|
+
self.bump_version(bump_schema_version=True)
|
|
1006
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
825
1007
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
826
1008
|
|
|
827
1009
|
def insert(
|
|
828
1010
|
self,
|
|
829
|
-
rows:
|
|
830
|
-
|
|
1011
|
+
rows: list[dict[str, Any]] | None,
|
|
1012
|
+
query: Query | None,
|
|
831
1013
|
print_stats: bool = False,
|
|
832
1014
|
fail_on_exception: bool = True,
|
|
833
1015
|
) -> UpdateStatus:
|
|
834
1016
|
"""
|
|
835
|
-
Insert rows into this table, either from an explicit list of dicts or from a `
|
|
1017
|
+
Insert rows into this table, either from an explicit list of dicts or from a `Query`.
|
|
836
1018
|
"""
|
|
837
1019
|
from pixeltable.plan import Planner
|
|
838
1020
|
|
|
839
|
-
assert self.is_insertable
|
|
840
|
-
assert (rows is None) != (
|
|
1021
|
+
assert self.is_insertable
|
|
1022
|
+
assert (rows is None) != (query is None) # Exactly one must be specified
|
|
841
1023
|
if rows is not None:
|
|
842
1024
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
1025
|
+
|
|
843
1026
|
else:
|
|
844
|
-
plan = Planner.
|
|
1027
|
+
plan = Planner.create_query_insert_plan(self, query, ignore_errors=not fail_on_exception)
|
|
845
1028
|
|
|
846
1029
|
# this is a base table; we generate rowids during the insert
|
|
847
1030
|
def rowids() -> Iterator[int]:
|
|
848
1031
|
while True:
|
|
849
|
-
rowid = self.
|
|
850
|
-
self.
|
|
1032
|
+
rowid = self.next_row_id
|
|
1033
|
+
self.next_row_id += 1
|
|
851
1034
|
yield rowid
|
|
852
1035
|
|
|
853
|
-
|
|
1036
|
+
result = self._insert(
|
|
1037
|
+
plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
|
|
1038
|
+
)
|
|
1039
|
+
return result
|
|
854
1040
|
|
|
855
1041
|
def _insert(
|
|
856
1042
|
self,
|
|
857
1043
|
exec_plan: 'exec.ExecNode',
|
|
858
1044
|
timestamp: float,
|
|
859
1045
|
*,
|
|
860
|
-
rowids:
|
|
1046
|
+
rowids: Iterator[int] | None = None,
|
|
861
1047
|
print_stats: bool = False,
|
|
862
1048
|
abort_on_exc: bool = False,
|
|
863
1049
|
) -> UpdateStatus:
|
|
864
1050
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
865
1051
|
# we're creating a new version
|
|
866
|
-
self.
|
|
867
|
-
|
|
868
|
-
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
|
|
1052
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1053
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
869
1054
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
870
1055
|
)
|
|
871
|
-
result
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
self._update_md(timestamp)
|
|
1056
|
+
result = UpdateStatus(
|
|
1057
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
1058
|
+
row_count_stats=row_counts,
|
|
1059
|
+
)
|
|
876
1060
|
|
|
877
1061
|
# update views
|
|
878
1062
|
for view in self.mutable_views:
|
|
879
1063
|
from pixeltable.plan import Planner
|
|
880
1064
|
|
|
881
|
-
|
|
882
|
-
status = view.get()._insert(
|
|
883
|
-
result
|
|
884
|
-
result.num_excs += status.num_excs
|
|
885
|
-
result.num_computed_values += status.num_computed_values
|
|
886
|
-
result.cols_with_excs += status.cols_with_excs
|
|
1065
|
+
plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
|
|
1066
|
+
status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
|
|
1067
|
+
result += status.to_cascade()
|
|
887
1068
|
|
|
888
|
-
|
|
1069
|
+
# Use the net status after all propagations
|
|
1070
|
+
self.update_status = result
|
|
1071
|
+
self._write_md(new_version=True, new_schema_version=False)
|
|
889
1072
|
if print_stats:
|
|
890
|
-
|
|
1073
|
+
exec_plan.ctx.profile.print(num_rows=result.num_rows)
|
|
891
1074
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
892
1075
|
return result
|
|
893
1076
|
|
|
894
|
-
def update(
|
|
895
|
-
self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
|
|
896
|
-
) -> UpdateStatus:
|
|
1077
|
+
def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
|
|
897
1078
|
"""Update rows in this TableVersionPath.
|
|
898
1079
|
Args:
|
|
899
1080
|
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
@@ -901,22 +1082,21 @@ class TableVersion:
|
|
|
901
1082
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
902
1083
|
including within views.
|
|
903
1084
|
"""
|
|
904
|
-
|
|
905
|
-
raise excs.Error('Cannot update a snapshot')
|
|
906
|
-
|
|
1085
|
+
from pixeltable.exprs import SqlElementCache
|
|
907
1086
|
from pixeltable.plan import Planner
|
|
908
1087
|
|
|
1088
|
+
assert self.is_mutable
|
|
1089
|
+
|
|
909
1090
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
910
1091
|
if where is not None:
|
|
911
1092
|
if not isinstance(where, exprs.Expr):
|
|
912
|
-
raise excs.Error(f
|
|
1093
|
+
raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
|
|
913
1094
|
analysis_info = Planner.analyze(self.path, where)
|
|
914
1095
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
915
1096
|
if analysis_info.filter is not None:
|
|
916
|
-
raise excs.Error(f'Filter
|
|
1097
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
917
1098
|
|
|
918
1099
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
919
|
-
from pixeltable.exprs import SqlElementCache
|
|
920
1100
|
|
|
921
1101
|
result = self.propagate_update(
|
|
922
1102
|
plan,
|
|
@@ -927,7 +1107,7 @@ class TableVersion:
|
|
|
927
1107
|
cascade=cascade,
|
|
928
1108
|
show_progress=True,
|
|
929
1109
|
)
|
|
930
|
-
result
|
|
1110
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
931
1111
|
return result
|
|
932
1112
|
|
|
933
1113
|
def batch_update(
|
|
@@ -943,18 +1123,18 @@ class TableVersion:
|
|
|
943
1123
|
batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
|
|
944
1124
|
rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
|
|
945
1125
|
"""
|
|
1126
|
+
from pixeltable.plan import Planner
|
|
1127
|
+
|
|
946
1128
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
947
1129
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
948
1130
|
|
|
949
|
-
from pixeltable.plan import Planner
|
|
950
|
-
|
|
951
1131
|
plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
|
|
952
1132
|
self.path, batch, rowids, cascade=cascade
|
|
953
1133
|
)
|
|
954
1134
|
result = self.propagate_update(
|
|
955
1135
|
plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
|
|
956
1136
|
)
|
|
957
|
-
result
|
|
1137
|
+
result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
|
|
958
1138
|
|
|
959
1139
|
unmatched_rows = row_update_node.unmatched_rows()
|
|
960
1140
|
if len(unmatched_rows) > 0:
|
|
@@ -962,7 +1142,7 @@ class TableVersion:
|
|
|
962
1142
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
963
1143
|
if insert_if_not_exists:
|
|
964
1144
|
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
965
|
-
result += insert_status
|
|
1145
|
+
result += insert_status.to_cascade()
|
|
966
1146
|
return result
|
|
967
1147
|
|
|
968
1148
|
def _validate_update_spec(
|
|
@@ -971,23 +1151,24 @@ class TableVersion:
|
|
|
971
1151
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
972
1152
|
for col_name, val in value_spec.items():
|
|
973
1153
|
if not isinstance(col_name, str):
|
|
974
|
-
raise excs.Error(f'Update specification: dict key must be column name
|
|
1154
|
+
raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
|
|
975
1155
|
if col_name == _ROWID_COLUMN_NAME:
|
|
976
1156
|
# a valid rowid is a list of ints, one per rowid column
|
|
977
1157
|
assert len(val) == len(self.store_tbl.rowid_columns())
|
|
978
1158
|
for el in val:
|
|
979
1159
|
assert isinstance(el, int)
|
|
980
1160
|
continue
|
|
981
|
-
col = self.path.get_column(col_name
|
|
1161
|
+
col = self.path.get_column(col_name)
|
|
982
1162
|
if col is None:
|
|
983
|
-
|
|
984
|
-
|
|
1163
|
+
raise excs.Error(f'Unknown column: {col_name}')
|
|
1164
|
+
if col.get_tbl().id != self.id:
|
|
1165
|
+
raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
|
|
985
1166
|
if col.is_computed:
|
|
986
|
-
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
1167
|
+
raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
|
|
987
1168
|
if col.is_pk and not allow_pk:
|
|
988
|
-
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
1169
|
+
raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
|
|
989
1170
|
if col.col_type.is_media_type() and not allow_media:
|
|
990
|
-
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
1171
|
+
raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
|
|
991
1172
|
|
|
992
1173
|
# make sure that the value is compatible with the column type
|
|
993
1174
|
value_expr: exprs.Expr
|
|
@@ -997,132 +1178,180 @@ class TableVersion:
|
|
|
997
1178
|
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
998
1179
|
if not allow_exprs:
|
|
999
1180
|
raise excs.Error(
|
|
1000
|
-
f'Column {col_name}: value
|
|
1001
|
-
f'(expected {col.col_type})'
|
|
1181
|
+
f'Column {col_name!r}: value is not a valid literal for this column '
|
|
1182
|
+
f'(expected `{col.col_type}`): {val!r}'
|
|
1002
1183
|
) from exc
|
|
1003
1184
|
# it's not a literal, let's try to create an expr from it
|
|
1004
1185
|
value_expr = exprs.Expr.from_object(val)
|
|
1005
1186
|
if value_expr is None:
|
|
1006
1187
|
raise excs.Error(
|
|
1007
|
-
f'Column {col_name}: value
|
|
1188
|
+
f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
|
|
1008
1189
|
) from exc
|
|
1009
1190
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
1010
1191
|
raise excs.Error(
|
|
1011
|
-
f'Type of value {val!r}
|
|
1012
|
-
f'{
|
|
1192
|
+
f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
|
|
1193
|
+
f'`{col.col_type}` of column {col_name!r}'
|
|
1013
1194
|
) from exc
|
|
1014
1195
|
update_targets[col] = value_expr
|
|
1015
1196
|
|
|
1016
1197
|
return update_targets
|
|
1017
1198
|
|
|
1199
|
+
def recompute_columns(
|
|
1200
|
+
self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
|
|
1201
|
+
) -> UpdateStatus:
|
|
1202
|
+
from pixeltable.exprs import CompoundPredicate, SqlElementCache
|
|
1203
|
+
from pixeltable.plan import Planner
|
|
1204
|
+
|
|
1205
|
+
assert self.is_mutable
|
|
1206
|
+
assert all(name in self.cols_by_name for name in col_names)
|
|
1207
|
+
assert len(col_names) > 0
|
|
1208
|
+
assert len(col_names) == 1 or not errors_only
|
|
1209
|
+
|
|
1210
|
+
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1211
|
+
where_clause: exprs.Expr | None = None
|
|
1212
|
+
if where is not None:
|
|
1213
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1214
|
+
where_clause = where
|
|
1215
|
+
if errors_only:
|
|
1216
|
+
errortype_pred = (
|
|
1217
|
+
exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
|
|
1218
|
+
!= None
|
|
1219
|
+
)
|
|
1220
|
+
where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
|
|
1221
|
+
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1222
|
+
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1223
|
+
)
|
|
1224
|
+
|
|
1225
|
+
result = self.propagate_update(
|
|
1226
|
+
plan,
|
|
1227
|
+
where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
|
|
1228
|
+
recomputed_cols,
|
|
1229
|
+
base_versions=[],
|
|
1230
|
+
timestamp=time.time(),
|
|
1231
|
+
cascade=cascade,
|
|
1232
|
+
show_progress=True,
|
|
1233
|
+
)
|
|
1234
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
1235
|
+
return result
|
|
1236
|
+
|
|
1018
1237
|
def propagate_update(
|
|
1019
1238
|
self,
|
|
1020
|
-
plan:
|
|
1021
|
-
where_clause:
|
|
1239
|
+
plan: exec.ExecNode | None,
|
|
1240
|
+
where_clause: sql.ColumnElement | None,
|
|
1022
1241
|
recomputed_view_cols: list[Column],
|
|
1023
|
-
base_versions: list[
|
|
1242
|
+
base_versions: list[int | None],
|
|
1024
1243
|
timestamp: float,
|
|
1025
1244
|
cascade: bool,
|
|
1026
1245
|
show_progress: bool = True,
|
|
1027
1246
|
) -> UpdateStatus:
|
|
1247
|
+
from pixeltable.catalog import Catalog
|
|
1248
|
+
from pixeltable.plan import Planner
|
|
1249
|
+
|
|
1250
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1028
1251
|
result = UpdateStatus()
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
self.
|
|
1032
|
-
|
|
1252
|
+
create_new_table_version = plan is not None
|
|
1253
|
+
if create_new_table_version:
|
|
1254
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1255
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1033
1256
|
plan, v_min=self.version, show_progress=show_progress
|
|
1034
1257
|
)
|
|
1035
|
-
result
|
|
1258
|
+
result += UpdateStatus(
|
|
1259
|
+
row_count_stats=row_counts.insert_to_update(),
|
|
1260
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
1261
|
+
)
|
|
1036
1262
|
self.store_tbl.delete_rows(
|
|
1037
1263
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1038
1264
|
)
|
|
1039
|
-
self._update_md(timestamp)
|
|
1040
1265
|
|
|
1041
1266
|
if cascade:
|
|
1042
1267
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
1043
1268
|
# propagate to views
|
|
1044
1269
|
for view in self.mutable_views:
|
|
1045
|
-
recomputed_cols = [col for col in recomputed_view_cols if col.
|
|
1270
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
|
|
1046
1271
|
plan = None
|
|
1047
1272
|
if len(recomputed_cols) > 0:
|
|
1048
|
-
from pixeltable.plan import Planner
|
|
1049
|
-
|
|
1050
1273
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
1051
1274
|
status = view.get().propagate_update(
|
|
1052
1275
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
1053
1276
|
)
|
|
1054
|
-
result
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
1277
|
+
result += status.to_cascade()
|
|
1278
|
+
if create_new_table_version:
|
|
1279
|
+
self.update_status = result
|
|
1280
|
+
self._write_md(new_version=True, new_schema_version=False)
|
|
1059
1281
|
return result
|
|
1060
1282
|
|
|
1061
|
-
def
|
|
1062
|
-
"""
|
|
1063
|
-
|
|
1064
|
-
where: a predicate to filter rows to delete.
|
|
1065
|
-
"""
|
|
1066
|
-
assert self.is_insertable()
|
|
1283
|
+
def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
|
|
1284
|
+
"""Validates that pred can be expressed as a SQL Where clause"""
|
|
1285
|
+
assert self.is_insertable
|
|
1067
1286
|
from pixeltable.exprs import Expr
|
|
1068
1287
|
from pixeltable.plan import Planner
|
|
1069
1288
|
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
if analysis_info.filter is not None:
|
|
1077
|
-
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1078
|
-
sql_where_clause = analysis_info.sql_where_clause
|
|
1079
|
-
|
|
1080
|
-
num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
|
|
1289
|
+
if not isinstance(pred, Expr):
|
|
1290
|
+
raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
|
|
1291
|
+
analysis_info = Planner.analyze(self.path, pred)
|
|
1292
|
+
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
1293
|
+
if analysis_info.filter is not None:
|
|
1294
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
1081
1295
|
|
|
1082
|
-
|
|
1296
|
+
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
1297
|
+
assert self.is_insertable
|
|
1298
|
+
if where is not None:
|
|
1299
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1300
|
+
status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
|
|
1083
1301
|
return status
|
|
1084
1302
|
|
|
1085
1303
|
def propagate_delete(
|
|
1086
|
-
self, where:
|
|
1087
|
-
) ->
|
|
1088
|
-
"""Delete rows in this table and propagate to views
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1304
|
+
self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
|
|
1305
|
+
) -> UpdateStatus:
|
|
1306
|
+
"""Delete rows in this table and propagate to views"""
|
|
1307
|
+
from pixeltable.catalog import Catalog
|
|
1308
|
+
|
|
1309
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1310
|
+
|
|
1311
|
+
# print(f'calling sql_expr()')
|
|
1094
1312
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1095
|
-
|
|
1313
|
+
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
1314
|
+
# sql_cols: list[sql.Column] = []
|
|
1315
|
+
# def collect_cols(col) -> None:
|
|
1316
|
+
# sql_cols.append(col)
|
|
1317
|
+
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1318
|
+
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1319
|
+
# print(f'where_clause cols: {x}')
|
|
1320
|
+
del_rows = self.store_tbl.delete_rows(
|
|
1096
1321
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1097
1322
|
)
|
|
1098
|
-
|
|
1323
|
+
row_counts = RowCountStats(del_rows=del_rows) # delete
|
|
1324
|
+
result = UpdateStatus(row_count_stats=row_counts)
|
|
1325
|
+
if del_rows > 0:
|
|
1099
1326
|
# we're creating a new version
|
|
1100
|
-
self.
|
|
1101
|
-
self._update_md(timestamp)
|
|
1327
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1102
1328
|
for view in self.mutable_views:
|
|
1103
|
-
|
|
1329
|
+
status = view.get().propagate_delete(
|
|
1104
1330
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1105
1331
|
)
|
|
1106
|
-
|
|
1332
|
+
result += status.to_cascade()
|
|
1333
|
+
self.update_status = result
|
|
1334
|
+
|
|
1335
|
+
if del_rows > 0:
|
|
1336
|
+
self._write_md(new_version=True, new_schema_version=False)
|
|
1337
|
+
return result
|
|
1107
1338
|
|
|
1108
1339
|
def revert(self) -> None:
|
|
1109
1340
|
"""Reverts the table to the previous version."""
|
|
1110
|
-
assert
|
|
1341
|
+
assert self.is_mutable
|
|
1111
1342
|
if self.version == 0:
|
|
1112
1343
|
raise excs.Error('Cannot revert version 0')
|
|
1113
1344
|
self._revert()
|
|
1114
1345
|
|
|
1115
|
-
def _delete_column(self, col: Column) -> None:
|
|
1116
|
-
"""Physically remove the column from the schema and the store table"""
|
|
1117
|
-
if col.is_stored:
|
|
1118
|
-
self.store_tbl.drop_column(col)
|
|
1119
|
-
self.cols.remove(col)
|
|
1120
|
-
if col.name is not None:
|
|
1121
|
-
del self.cols_by_name[col.name]
|
|
1122
|
-
del self.cols_by_id[col.id]
|
|
1123
|
-
|
|
1124
1346
|
def _revert(self) -> None:
|
|
1125
|
-
"""
|
|
1347
|
+
"""
|
|
1348
|
+
Reverts the stored metadata for this table version and propagates to views.
|
|
1349
|
+
|
|
1350
|
+
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1351
|
+
and relies on Catalog to reload it
|
|
1352
|
+
"""
|
|
1353
|
+
from pixeltable.catalog import Catalog
|
|
1354
|
+
|
|
1126
1355
|
conn = Env.get().conn
|
|
1127
1356
|
# make sure we don't have a snapshot referencing this version
|
|
1128
1357
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1137,127 +1366,270 @@ class TableVersion:
|
|
|
1137
1366
|
names = [row[1] for row in result]
|
|
1138
1367
|
raise excs.Error(
|
|
1139
1368
|
(
|
|
1140
|
-
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
|
|
1369
|
+
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
|
|
1141
1370
|
f'({", ".join(names)})'
|
|
1142
1371
|
)
|
|
1143
1372
|
)
|
|
1144
1373
|
|
|
1145
|
-
# delete newly-added data
|
|
1146
|
-
MediaStore.delete(self.id, version=self.version)
|
|
1147
1374
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1148
1375
|
|
|
1149
1376
|
# revert new deletions
|
|
1150
1377
|
set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
|
|
1151
|
-
for index_info in self.
|
|
1378
|
+
for index_info in self.idxs.values():
|
|
1152
1379
|
# copy the index value back from the undo column and reset the undo column to NULL
|
|
1153
1380
|
set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
|
|
1154
1381
|
set_clause[index_info.undo_col.sa_col] = None
|
|
1155
1382
|
stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
|
|
1156
1383
|
conn.execute(stmt)
|
|
1157
1384
|
|
|
1158
|
-
# revert schema changes
|
|
1385
|
+
# revert schema changes:
|
|
1386
|
+
# - undo changes to self._tbl_md and write that back
|
|
1387
|
+
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1388
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1389
|
+
old_version = self.version
|
|
1159
1390
|
if self.version == self.schema_version:
|
|
1160
|
-
# delete newly-added columns
|
|
1391
|
+
# physically delete newly-added columns and remove them from the stored md
|
|
1161
1392
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
1162
1393
|
if len(added_cols) > 0:
|
|
1163
|
-
next_col_id = min(col.id for col in added_cols)
|
|
1394
|
+
self._tbl_md.next_col_id = min(col.id for col in added_cols)
|
|
1164
1395
|
for col in added_cols:
|
|
1165
|
-
|
|
1166
|
-
|
|
1396
|
+
if col.is_stored:
|
|
1397
|
+
self.store_tbl.drop_column(col)
|
|
1398
|
+
del self._tbl_md.column_md[col.id]
|
|
1167
1399
|
|
|
1168
1400
|
# remove newly-added indices from the lookup structures
|
|
1169
1401
|
# (the value and undo columns got removed in the preceding step)
|
|
1170
|
-
added_idx_md = [md for md in self.
|
|
1402
|
+
added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
|
|
1171
1403
|
if len(added_idx_md) > 0:
|
|
1172
|
-
next_idx_id = min(md.id for md in added_idx_md)
|
|
1404
|
+
self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
|
|
1173
1405
|
for md in added_idx_md:
|
|
1174
|
-
|
|
1175
|
-
del self.
|
|
1176
|
-
self.next_idx_id = next_idx_id
|
|
1406
|
+
# TODO: drop the index
|
|
1407
|
+
del self._tbl_md.index_md[md.id]
|
|
1177
1408
|
|
|
1178
1409
|
# make newly-dropped columns visible again
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1410
|
+
dropped_col_md = [
|
|
1411
|
+
md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
|
|
1412
|
+
]
|
|
1413
|
+
for col_md in dropped_col_md:
|
|
1414
|
+
col_md.schema_version_drop = None
|
|
1182
1415
|
|
|
1183
1416
|
# make newly-dropped indices visible again
|
|
1184
|
-
dropped_idx_md = [
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
# we need to determine the preceding schema version and reload the schema
|
|
1190
|
-
schema_version_md_dict = (
|
|
1191
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1192
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1193
|
-
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1194
|
-
.scalar()
|
|
1195
|
-
)
|
|
1196
|
-
preceding_schema_version = schema_version_md_dict['preceding_schema_version']
|
|
1197
|
-
preceding_schema_version_md_dict = (
|
|
1198
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1199
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1200
|
-
.where(schema.TableSchemaVersion.schema_version == preceding_schema_version)
|
|
1201
|
-
.scalar()
|
|
1202
|
-
)
|
|
1203
|
-
preceding_schema_version_md = schema.md_from_dict(
|
|
1204
|
-
schema.TableSchemaVersionMd, preceding_schema_version_md_dict
|
|
1205
|
-
)
|
|
1206
|
-
tbl_md = self._create_tbl_md()
|
|
1207
|
-
self._init_schema(tbl_md, preceding_schema_version_md)
|
|
1417
|
+
dropped_idx_md = [
|
|
1418
|
+
md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
|
|
1419
|
+
]
|
|
1420
|
+
for idx_md in dropped_idx_md:
|
|
1421
|
+
idx_md.schema_version_drop = None
|
|
1208
1422
|
|
|
1209
1423
|
conn.execute(
|
|
1210
1424
|
sql.delete(schema.TableSchemaVersion.__table__)
|
|
1211
1425
|
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1212
1426
|
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1213
1427
|
)
|
|
1214
|
-
self.
|
|
1215
|
-
self.comment = preceding_schema_version_md.comment
|
|
1216
|
-
self.num_retained_versions = preceding_schema_version_md.num_retained_versions
|
|
1428
|
+
self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
|
|
1217
1429
|
|
|
1218
1430
|
conn.execute(
|
|
1219
1431
|
sql.delete(schema.TableVersion.__table__)
|
|
1220
1432
|
.where(schema.TableVersion.tbl_id == self.id)
|
|
1221
1433
|
.where(schema.TableVersion.version == self.version)
|
|
1222
1434
|
)
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
.where(schema.Table.id == self.id)
|
|
1228
|
-
)
|
|
1435
|
+
|
|
1436
|
+
self._tbl_md.current_version = self._version_md.version = self.version - 1
|
|
1437
|
+
|
|
1438
|
+
self._write_md(new_version=False, new_schema_version=False)
|
|
1229
1439
|
|
|
1230
1440
|
# propagate to views
|
|
1231
1441
|
for view in self.mutable_views:
|
|
1232
1442
|
view.get()._revert()
|
|
1233
|
-
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1234
1443
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1444
|
+
# force reload on next operation
|
|
1445
|
+
self.is_validated = False
|
|
1446
|
+
Catalog.get().remove_tbl_version(self.key)
|
|
1447
|
+
|
|
1448
|
+
# delete newly-added data
|
|
1449
|
+
# Do this at the end, after all DB operations have completed.
|
|
1450
|
+
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1451
|
+
self.delete_media(tbl_version=old_version)
|
|
1452
|
+
_logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
|
|
1453
|
+
|
|
1454
|
+
def _init_external_stores(self) -> None:
|
|
1455
|
+
from pixeltable.io.external_store import ExternalStore
|
|
1456
|
+
|
|
1457
|
+
for store_md in self.tbl_md.external_stores:
|
|
1237
1458
|
store_cls = resolve_symbol(store_md['class'])
|
|
1238
|
-
assert isinstance(store_cls, type) and issubclass(store_cls,
|
|
1459
|
+
assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
|
|
1239
1460
|
store = store_cls.from_dict(store_md['md'])
|
|
1240
1461
|
self.external_stores[store.name] = store
|
|
1241
1462
|
|
|
1242
|
-
def link_external_store(self, store:
|
|
1243
|
-
|
|
1463
|
+
def link_external_store(self, store: ExternalStore) -> None:
|
|
1464
|
+
self.bump_version(bump_schema_version=True)
|
|
1465
|
+
|
|
1244
1466
|
self.external_stores[store.name] = store
|
|
1245
|
-
self.
|
|
1467
|
+
self._tbl_md.external_stores.append(
|
|
1468
|
+
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
|
|
1469
|
+
)
|
|
1470
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
1471
|
+
|
|
1472
|
+
def unlink_external_store(self, store: ExternalStore) -> None:
|
|
1473
|
+
del self.external_stores[store.name]
|
|
1474
|
+
self.bump_version(bump_schema_version=True)
|
|
1475
|
+
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1476
|
+
self._tbl_md.external_stores.pop(idx)
|
|
1477
|
+
self._write_md(new_version=True, new_schema_version=True)
|
|
1478
|
+
|
|
1479
|
+
@property
|
|
1480
|
+
def id(self) -> UUID:
|
|
1481
|
+
return self.key.tbl_id
|
|
1246
1482
|
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
store.unlink(self) # May result in additional metadata changes
|
|
1251
|
-
del self.external_stores[store_name]
|
|
1252
|
-
self._update_md(time.time(), update_tbl_version=False)
|
|
1483
|
+
@property
|
|
1484
|
+
def effective_version(self) -> int | None:
|
|
1485
|
+
return self.key.effective_version
|
|
1253
1486
|
|
|
1254
|
-
|
|
1255
|
-
|
|
1487
|
+
@property
|
|
1488
|
+
def anchor_tbl_id(self) -> UUID | None:
|
|
1489
|
+
return self.key.anchor_tbl_id
|
|
1490
|
+
|
|
1491
|
+
@property
|
|
1492
|
+
def tbl_md(self) -> schema.TableMd:
|
|
1493
|
+
return self._tbl_md
|
|
1494
|
+
|
|
1495
|
+
@property
|
|
1496
|
+
def version_md(self) -> schema.VersionMd:
|
|
1497
|
+
return self._version_md
|
|
1498
|
+
|
|
1499
|
+
@property
|
|
1500
|
+
def schema_version_md(self) -> schema.SchemaVersionMd:
|
|
1501
|
+
return self._schema_version_md
|
|
1502
|
+
|
|
1503
|
+
@property
|
|
1504
|
+
def view_md(self) -> schema.ViewMd | None:
|
|
1505
|
+
return self._tbl_md.view_md
|
|
1506
|
+
|
|
1507
|
+
@property
|
|
1508
|
+
def name(self) -> str:
|
|
1509
|
+
return self._tbl_md.name
|
|
1510
|
+
|
|
1511
|
+
@property
|
|
1512
|
+
def user(self) -> str | None:
|
|
1513
|
+
return self._tbl_md.user
|
|
1514
|
+
|
|
1515
|
+
@property
|
|
1516
|
+
def is_replica(self) -> bool:
|
|
1517
|
+
return self._tbl_md.is_replica
|
|
1518
|
+
|
|
1519
|
+
@property
|
|
1520
|
+
def comment(self) -> str:
|
|
1521
|
+
return self._schema_version_md.comment
|
|
1522
|
+
|
|
1523
|
+
@comment.setter
|
|
1524
|
+
def comment(self, c: str) -> None:
|
|
1525
|
+
assert self.effective_version is None
|
|
1526
|
+
self._schema_version_md.comment = c
|
|
1527
|
+
|
|
1528
|
+
@property
|
|
1529
|
+
def num_retained_versions(self) -> int:
|
|
1530
|
+
return self._schema_version_md.num_retained_versions
|
|
1531
|
+
|
|
1532
|
+
@num_retained_versions.setter
|
|
1533
|
+
def num_retained_versions(self, n: int) -> None:
|
|
1534
|
+
assert self.effective_version is None
|
|
1535
|
+
self._schema_version_md.num_retained_versions = n
|
|
1536
|
+
|
|
1537
|
+
@property
|
|
1538
|
+
def version(self) -> int:
|
|
1539
|
+
return self._version_md.version
|
|
1540
|
+
|
|
1541
|
+
@property
|
|
1542
|
+
def created_at(self) -> float:
|
|
1543
|
+
return self._version_md.created_at
|
|
1544
|
+
|
|
1545
|
+
@property
|
|
1546
|
+
def schema_version(self) -> int:
|
|
1547
|
+
return self._schema_version_md.schema_version
|
|
1548
|
+
|
|
1549
|
+
def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
|
|
1550
|
+
"""
|
|
1551
|
+
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1552
|
+
_write_md() must be called separately to persist the changes.
|
|
1553
|
+
|
|
1554
|
+
Args:
|
|
1555
|
+
timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
|
|
1556
|
+
to the same timestamp. If `None`, then defaults to `time.time()`.
|
|
1557
|
+
bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
|
|
1558
|
+
and associated metadata.
|
|
1559
|
+
"""
|
|
1560
|
+
from pixeltable.catalog import Catalog
|
|
1561
|
+
|
|
1562
|
+
assert self.effective_version is None
|
|
1563
|
+
|
|
1564
|
+
if timestamp is None:
|
|
1565
|
+
timestamp = time.time()
|
|
1566
|
+
|
|
1567
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1568
|
+
|
|
1569
|
+
old_version = self._tbl_md.current_version
|
|
1570
|
+
assert self._version_md.version == old_version
|
|
1571
|
+
new_version = old_version + 1
|
|
1572
|
+
self._tbl_md.current_version = new_version
|
|
1573
|
+
self._version_md.version = new_version
|
|
1574
|
+
self._version_md.created_at = timestamp
|
|
1575
|
+
|
|
1576
|
+
if bump_schema_version:
|
|
1577
|
+
old_schema_version = self._tbl_md.current_schema_version
|
|
1578
|
+
assert self._version_md.schema_version == old_schema_version
|
|
1579
|
+
assert self._schema_version_md.schema_version == old_schema_version
|
|
1580
|
+
self._tbl_md.current_schema_version = new_version
|
|
1581
|
+
self._version_md.schema_version = new_version
|
|
1582
|
+
self._schema_version_md.preceding_schema_version = old_schema_version
|
|
1583
|
+
self._schema_version_md.schema_version = new_version
|
|
1584
|
+
|
|
1585
|
+
@property
|
|
1586
|
+
def preceding_schema_version(self) -> int | None:
|
|
1587
|
+
return self._schema_version_md.preceding_schema_version
|
|
1588
|
+
|
|
1589
|
+
@property
|
|
1590
|
+
def update_status(self) -> UpdateStatus | None:
|
|
1591
|
+
return self._version_md.update_status
|
|
1592
|
+
|
|
1593
|
+
@update_status.setter
|
|
1594
|
+
def update_status(self, status: UpdateStatus) -> None:
|
|
1595
|
+
assert self.effective_version is None
|
|
1596
|
+
self._version_md.update_status = status
|
|
1597
|
+
|
|
1598
|
+
@property
|
|
1599
|
+
def media_validation(self) -> MediaValidation:
|
|
1600
|
+
return MediaValidation[self._schema_version_md.media_validation.upper()]
|
|
1601
|
+
|
|
1602
|
+
def next_col_id(self) -> int:
|
|
1603
|
+
val = self._tbl_md.next_col_id
|
|
1604
|
+
self._tbl_md.next_col_id += 1
|
|
1605
|
+
return val
|
|
1606
|
+
|
|
1607
|
+
@property
|
|
1608
|
+
def next_idx_id(self) -> int:
|
|
1609
|
+
return self._tbl_md.next_idx_id
|
|
1610
|
+
|
|
1611
|
+
@next_idx_id.setter
|
|
1612
|
+
def next_idx_id(self, id: int) -> None:
|
|
1613
|
+
assert self.effective_version is None
|
|
1614
|
+
self._tbl_md.next_idx_id = id
|
|
1615
|
+
|
|
1616
|
+
@property
|
|
1617
|
+
def next_row_id(self) -> int:
|
|
1618
|
+
return self._tbl_md.next_row_id
|
|
1619
|
+
|
|
1620
|
+
@next_row_id.setter
|
|
1621
|
+
def next_row_id(self, id: int) -> None:
|
|
1622
|
+
assert self.effective_version is None
|
|
1623
|
+
self._tbl_md.next_row_id = id
|
|
1256
1624
|
|
|
1257
1625
|
@property
|
|
1258
1626
|
def is_snapshot(self) -> bool:
|
|
1259
1627
|
return self.effective_version is not None
|
|
1260
1628
|
|
|
1629
|
+
@property
|
|
1630
|
+
def is_mutable(self) -> bool:
|
|
1631
|
+
return not self.is_snapshot and not self.is_replica
|
|
1632
|
+
|
|
1261
1633
|
@property
|
|
1262
1634
|
def is_view(self) -> bool:
|
|
1263
1635
|
return self.view_md is not None
|
|
@@ -1270,9 +1642,10 @@ class TableVersion:
|
|
|
1270
1642
|
def is_component_view(self) -> bool:
|
|
1271
1643
|
return self.iterator_cls is not None
|
|
1272
1644
|
|
|
1645
|
+
@property
|
|
1273
1646
|
def is_insertable(self) -> bool:
|
|
1274
1647
|
"""Returns True if this corresponds to an InsertableTable"""
|
|
1275
|
-
return
|
|
1648
|
+
return self.is_mutable and not self.is_view
|
|
1276
1649
|
|
|
1277
1650
|
def is_iterator_column(self, col: Column) -> bool:
|
|
1278
1651
|
"""Returns True if col is produced by an iterator"""
|
|
@@ -1283,6 +1656,10 @@ class TableVersion:
|
|
|
1283
1656
|
"""Return True if column was created by Pixeltable"""
|
|
1284
1657
|
return col.name == _POS_COLUMN_NAME and self.is_component_view
|
|
1285
1658
|
|
|
1659
|
+
def iterator_columns(self) -> list[Column]:
|
|
1660
|
+
"""Return all iterator-produced columns"""
|
|
1661
|
+
return self.cols[1 : self.num_iterator_cols + 1]
|
|
1662
|
+
|
|
1286
1663
|
def user_columns(self) -> list[Column]:
|
|
1287
1664
|
"""Return all non-system columns"""
|
|
1288
1665
|
return [c for c in self.cols if not self.is_system_column(c)]
|
|
@@ -1307,27 +1684,36 @@ class TableVersion:
|
|
|
1307
1684
|
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
1308
1685
|
return names
|
|
1309
1686
|
|
|
1310
|
-
def _record_refd_columns(self, col: Column) -> None:
|
|
1311
|
-
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1312
|
-
from pixeltable import exprs
|
|
1313
|
-
|
|
1314
|
-
if col.value_expr_dict is not None:
|
|
1315
|
-
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
1316
|
-
refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
|
|
1317
|
-
else:
|
|
1318
|
-
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
|
|
1319
|
-
for refd_col in refd_cols:
|
|
1320
|
-
refd_col.dependent_cols.add(col)
|
|
1321
|
-
|
|
1322
1687
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1323
|
-
|
|
1324
|
-
|
|
1688
|
+
# assumes that the indexed columns are all in this table
|
|
1689
|
+
assert all(col.get_tbl().id == self.id for col in cols)
|
|
1690
|
+
col_ids = {col.id for col in cols}
|
|
1691
|
+
return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
|
|
1692
|
+
|
|
1693
|
+
def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
|
|
1694
|
+
if not self.supports_idxs:
|
|
1695
|
+
raise excs.Error('Snapshot does not support indices')
|
|
1696
|
+
if col.qid not in self.idxs_by_col:
|
|
1697
|
+
raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
|
|
1698
|
+
candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
|
|
1699
|
+
if len(candidates) == 0:
|
|
1700
|
+
raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
|
|
1701
|
+
if len(candidates) > 1 and idx_name is None:
|
|
1702
|
+
raise excs.Error(
|
|
1703
|
+
f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
|
|
1704
|
+
)
|
|
1705
|
+
if idx_name is not None and idx_name not in [info.name for info in candidates]:
|
|
1706
|
+
raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
|
|
1707
|
+
return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
|
|
1325
1708
|
|
|
1326
1709
|
def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1327
1710
|
"""
|
|
1328
1711
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1329
1712
|
"""
|
|
1330
|
-
|
|
1713
|
+
from pixeltable.catalog import Catalog
|
|
1714
|
+
|
|
1715
|
+
cat = Catalog.get()
|
|
1716
|
+
result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
|
|
1331
1717
|
if len(result) > 0:
|
|
1332
1718
|
result.update(self.get_dependent_columns(result))
|
|
1333
1719
|
return result
|
|
@@ -1339,82 +1725,17 @@ class TableVersion:
|
|
|
1339
1725
|
return 1
|
|
1340
1726
|
|
|
1341
1727
|
@classmethod
|
|
1342
|
-
def
|
|
1343
|
-
column_md: dict[int, schema.ColumnMd] = {}
|
|
1344
|
-
for col in cols:
|
|
1345
|
-
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1346
|
-
assert col.is_pk is not None
|
|
1347
|
-
column_md[col.id] = schema.ColumnMd(
|
|
1348
|
-
id=col.id,
|
|
1349
|
-
col_type=col.col_type.as_dict(),
|
|
1350
|
-
is_pk=col.is_pk,
|
|
1351
|
-
schema_version_add=col.schema_version_add,
|
|
1352
|
-
schema_version_drop=col.schema_version_drop,
|
|
1353
|
-
value_expr=value_expr_dict,
|
|
1354
|
-
stored=col.stored,
|
|
1355
|
-
)
|
|
1356
|
-
return column_md
|
|
1357
|
-
|
|
1358
|
-
@classmethod
|
|
1359
|
-
def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
|
|
1728
|
+
def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
|
|
1360
1729
|
return [
|
|
1361
1730
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1362
1731
|
]
|
|
1363
1732
|
|
|
1364
|
-
def _create_tbl_md(self) -> schema.TableMd:
|
|
1365
|
-
return schema.TableMd(
|
|
1366
|
-
tbl_id=str(self.id),
|
|
1367
|
-
name=self.name,
|
|
1368
|
-
user=self.user,
|
|
1369
|
-
is_replica=self.is_replica,
|
|
1370
|
-
current_version=self.version,
|
|
1371
|
-
current_schema_version=self.schema_version,
|
|
1372
|
-
next_col_id=self.next_col_id,
|
|
1373
|
-
next_idx_id=self.next_idx_id,
|
|
1374
|
-
next_row_id=self.next_rowid,
|
|
1375
|
-
column_md=self._create_column_md(self.cols),
|
|
1376
|
-
index_md=self.idx_md,
|
|
1377
|
-
external_stores=self._create_stores_md(self.external_stores.values()),
|
|
1378
|
-
view_md=self.view_md,
|
|
1379
|
-
additional_md={},
|
|
1380
|
-
)
|
|
1381
|
-
|
|
1382
|
-
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1383
|
-
return schema.TableVersionMd(
|
|
1384
|
-
tbl_id=str(self.id),
|
|
1385
|
-
created_at=timestamp,
|
|
1386
|
-
version=self.version,
|
|
1387
|
-
schema_version=self.schema_version,
|
|
1388
|
-
additional_md={},
|
|
1389
|
-
)
|
|
1390
|
-
|
|
1391
|
-
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1392
|
-
column_md: dict[int, schema.SchemaColumn] = {}
|
|
1393
|
-
for pos, col in enumerate(self.cols_by_name.values()):
|
|
1394
|
-
column_md[col.id] = schema.SchemaColumn(
|
|
1395
|
-
pos=pos,
|
|
1396
|
-
name=col.name,
|
|
1397
|
-
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
1398
|
-
)
|
|
1399
|
-
# preceding_schema_version to be set by the caller
|
|
1400
|
-
return schema.TableSchemaVersionMd(
|
|
1401
|
-
tbl_id=str(self.id),
|
|
1402
|
-
schema_version=self.schema_version,
|
|
1403
|
-
preceding_schema_version=preceding_schema_version,
|
|
1404
|
-
columns=column_md,
|
|
1405
|
-
num_retained_versions=self.num_retained_versions,
|
|
1406
|
-
comment=self.comment,
|
|
1407
|
-
media_validation=self.media_validation.name.lower(),
|
|
1408
|
-
additional_md={},
|
|
1409
|
-
)
|
|
1410
|
-
|
|
1411
1733
|
def as_dict(self) -> dict:
|
|
1412
|
-
return
|
|
1734
|
+
return self.key.as_dict()
|
|
1413
1735
|
|
|
1414
1736
|
@classmethod
|
|
1415
1737
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1416
|
-
from pixeltable import
|
|
1738
|
+
from pixeltable.catalog import Catalog
|
|
1417
1739
|
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
return catalog.Catalog.get().get_tbl_version(id, effective_version)
|
|
1740
|
+
key = TableVersionKey.from_dict(d)
|
|
1741
|
+
return Catalog.get().get_tbl_version(key)
|