pixeltable 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +53 -0
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +181 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +192 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +695 -0
- pixeltable/catalog/table_version.py +1026 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/dataframe.py +749 -0
- pixeltable/env.py +466 -0
- pixeltable/exceptions.py +17 -0
- pixeltable/exec/__init__.py +10 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +94 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +73 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +226 -0
- pixeltable/exprs/__init__.py +25 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +114 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +199 -0
- pixeltable/exprs/expr.py +594 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +382 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +96 -0
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +109 -0
- pixeltable/exprs/inline_dict.py +103 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +66 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +329 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/__init__.py +7 -0
- pixeltable/func/aggregate_function.py +197 -0
- pixeltable/func/callable_function.py +113 -0
- pixeltable/func/expr_template_function.py +99 -0
- pixeltable/func/function.py +141 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +46 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +162 -0
- pixeltable/func/udf.py +164 -0
- pixeltable/functions/__init__.py +95 -0
- pixeltable/functions/eval.py +215 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +167 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +289 -0
- pixeltable/functions/pil/image.py +147 -0
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +143 -0
- pixeltable/functions/util.py +52 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +51 -0
- pixeltable/index/embedding_index.py +168 -0
- pixeltable/io/__init__.py +3 -0
- pixeltable/io/hf_datasets.py +188 -0
- pixeltable/io/pandas.py +148 -0
- pixeltable/io/parquet.py +192 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +52 -0
- pixeltable/iterators/document.py +432 -0
- pixeltable/iterators/video.py +88 -0
- pixeltable/metadata/__init__.py +58 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/schema.py +234 -0
- pixeltable/plan.py +620 -0
- pixeltable/store.py +424 -0
- pixeltable/tool/create_test_db_dump.py +184 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +846 -0
- pixeltable/utils/__init__.py +17 -0
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +18 -0
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +69 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/http_server.py +70 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.0.0.dist-info/LICENSE +18 -0
- pixeltable-0.0.0.dist-info/METADATA +131 -0
- pixeltable-0.0.0.dist-info/RECORD +119 -0
- pixeltable-0.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1026 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import importlib
|
|
5
|
+
import inspect
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from typing import Optional, List, Dict, Any, Tuple, Type, Set
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sql
|
|
12
|
+
import sqlalchemy.orm as orm
|
|
13
|
+
|
|
14
|
+
import pixeltable
|
|
15
|
+
import pixeltable.func as func
|
|
16
|
+
import pixeltable.type_system as ts
|
|
17
|
+
import pixeltable.exceptions as excs
|
|
18
|
+
import pixeltable.index as index
|
|
19
|
+
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.iterators import ComponentIterator
|
|
21
|
+
from pixeltable.metadata import schema
|
|
22
|
+
from pixeltable.utils.filecache import FileCache
|
|
23
|
+
from pixeltable.utils.media_store import MediaStore
|
|
24
|
+
from .column import Column
|
|
25
|
+
from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
|
|
26
|
+
|
|
27
|
+
_logger = logging.getLogger('pixeltable')
|
|
28
|
+
|
|
29
|
+
class TableVersion:
|
|
30
|
+
"""
|
|
31
|
+
TableVersion represents a particular version of a table/view along with its physical representation:
|
|
32
|
+
- the physical representation is a store table with indices
|
|
33
|
+
- the version can be mutable or a snapshot
|
|
34
|
+
- tables and their recursive views form a tree, and a mutable TableVersion also records its own
|
|
35
|
+
mutable views in order to propagate updates
|
|
36
|
+
- each view TableVersion records its base:
|
|
37
|
+
* the base is correct only for mutable views (snapshot versions form a DAG, not a tree)
|
|
38
|
+
* the base is useful for getting access to the StoreTable and the base id
|
|
39
|
+
* TODO: create a separate hierarchy of objects that records the version-independent tree of tables/views, and
|
|
40
|
+
have TableVersions reference those
|
|
41
|
+
- mutable TableVersions record their TableVersionPath, which is needed for expr evaluation in updates
|
|
42
|
+
"""
|
|
43
|
+
@dataclasses.dataclass
|
|
44
|
+
class IndexInfo:
|
|
45
|
+
id: int
|
|
46
|
+
name: str
|
|
47
|
+
idx: index.IndexBase
|
|
48
|
+
col: Column
|
|
49
|
+
val_col: Column
|
|
50
|
+
undo_col: Column
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self, id: UUID, tbl_md: schema.TableMd, version: int, schema_version_md: schema.TableSchemaVersionMd,
|
|
55
|
+
base: Optional[TableVersion] = None, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
|
|
56
|
+
is_snapshot: Optional[bool] = None
|
|
57
|
+
):
|
|
58
|
+
# only one of base and base_path can be non-None
|
|
59
|
+
assert base is None or base_path is None
|
|
60
|
+
self.id = id
|
|
61
|
+
self.name = tbl_md.name
|
|
62
|
+
self.version = version
|
|
63
|
+
self.comment = schema_version_md.comment
|
|
64
|
+
self.num_retained_versions = schema_version_md.num_retained_versions
|
|
65
|
+
self.schema_version = schema_version_md.schema_version
|
|
66
|
+
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
67
|
+
is_view = tbl_md.view_md is not None
|
|
68
|
+
self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
|
|
69
|
+
# a mutable TableVersion doesn't have a static version
|
|
70
|
+
self.effective_version = self.version if self.is_snapshot else None
|
|
71
|
+
|
|
72
|
+
# mutable tables need their TableVersionPath for expr eval during updates
|
|
73
|
+
from .table_version_path import TableVersionPath
|
|
74
|
+
if self.is_snapshot:
|
|
75
|
+
self.path = None
|
|
76
|
+
else:
|
|
77
|
+
self.path = TableVersionPath(self, base=base_path) if base_path is not None else TableVersionPath(self)
|
|
78
|
+
|
|
79
|
+
self.base = base_path.tbl_version if base_path is not None else base
|
|
80
|
+
if self.is_snapshot:
|
|
81
|
+
self.next_col_id = -1
|
|
82
|
+
self.next_idx_id = -1 # TODO: can snapshots have separate indices?
|
|
83
|
+
self.next_rowid = -1
|
|
84
|
+
else:
|
|
85
|
+
assert tbl_md.current_version == self.version
|
|
86
|
+
self.next_col_id = tbl_md.next_col_id
|
|
87
|
+
self.next_idx_id = tbl_md.next_idx_id
|
|
88
|
+
self.next_rowid = tbl_md.next_row_id
|
|
89
|
+
|
|
90
|
+
# view-specific initialization
|
|
91
|
+
from pixeltable import exprs
|
|
92
|
+
predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
|
|
93
|
+
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
94
|
+
self.mutable_views: List[TableVersion] = [] # targets for update propagation
|
|
95
|
+
if self.base is not None and not self.base.is_snapshot and not self.is_snapshot:
|
|
96
|
+
self.base.mutable_views.append(self)
|
|
97
|
+
|
|
98
|
+
# component view-specific initialization
|
|
99
|
+
self.iterator_cls: Optional[Type[ComponentIterator]] = None
|
|
100
|
+
self.iterator_args: Optional[exprs.InlineDict] = None
|
|
101
|
+
self.num_iterator_cols = 0
|
|
102
|
+
if is_view and tbl_md.view_md.iterator_class_fqn is not None:
|
|
103
|
+
module_name, class_name = tbl_md.view_md.iterator_class_fqn.rsplit('.', 1)
|
|
104
|
+
module = importlib.import_module(module_name)
|
|
105
|
+
self.iterator_cls = getattr(module, class_name)
|
|
106
|
+
self.iterator_args = exprs.Expr.from_dict(tbl_md.view_md.iterator_args)
|
|
107
|
+
assert isinstance(self.iterator_args, exprs.InlineDict)
|
|
108
|
+
output_schema, _ = self.iterator_cls.output_schema(**self.iterator_args.to_dict())
|
|
109
|
+
self.num_iterator_cols = len(output_schema)
|
|
110
|
+
assert tbl_md.view_md.iterator_args is not None
|
|
111
|
+
|
|
112
|
+
# register this table version now so that it's available when we're re-creating value exprs
|
|
113
|
+
import pixeltable.catalog as catalog
|
|
114
|
+
cat = catalog.Catalog.get()
|
|
115
|
+
cat.tbl_versions[(self.id, self.effective_version)] = self
|
|
116
|
+
|
|
117
|
+
# init schema after we determined whether we're a component view, and before we create the store table
|
|
118
|
+
self.cols: List[Column] = [] # contains complete history of columns, incl dropped ones
|
|
119
|
+
self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
|
|
120
|
+
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
|
|
121
|
+
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
122
|
+
self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
|
|
123
|
+
self._init_schema(tbl_md, schema_version_md)
|
|
124
|
+
|
|
125
|
+
def __hash__(self) -> int:
|
|
126
|
+
return hash(self.id)
|
|
127
|
+
|
|
128
|
+
def create_snapshot_copy(self) -> TableVersion:
|
|
129
|
+
"""Create a snapshot copy of this TableVersion"""
|
|
130
|
+
assert not self.is_snapshot
|
|
131
|
+
return TableVersion(
|
|
132
|
+
self.id, self._create_tbl_md(), self.version,
|
|
133
|
+
self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
|
|
134
|
+
is_snapshot=True, base=self.base)
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def create(
|
|
138
|
+
cls, session: orm.Session, dir_id: UUID, name: str, cols: List[Column], num_retained_versions: int,
|
|
139
|
+
comment: str, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
|
|
140
|
+
view_md: Optional[schema.ViewMd] = None
|
|
141
|
+
) -> Tuple[UUID, Optional[TableVersion]]:
|
|
142
|
+
# assign ids
|
|
143
|
+
cols_by_name: Dict[str, Column] = {}
|
|
144
|
+
for pos, col in enumerate(cols):
|
|
145
|
+
col.id = pos
|
|
146
|
+
col.schema_version_add = 0
|
|
147
|
+
cols_by_name[col.name] = col
|
|
148
|
+
if col.value_expr is None and col.compute_func is not None:
|
|
149
|
+
cls._create_value_expr(col, base_path)
|
|
150
|
+
if col.is_computed:
|
|
151
|
+
col.check_value_expr()
|
|
152
|
+
|
|
153
|
+
ts = time.time()
|
|
154
|
+
# create schema.Table
|
|
155
|
+
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
156
|
+
column_md = cls._create_column_md(cols)
|
|
157
|
+
table_md = schema.TableMd(
|
|
158
|
+
name=name, current_version=0, current_schema_version=0,
|
|
159
|
+
next_col_id=len(cols), next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, view_md=view_md)
|
|
160
|
+
tbl_record = schema.Table(dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
161
|
+
session.add(tbl_record)
|
|
162
|
+
session.flush() # sets tbl_record.id
|
|
163
|
+
assert tbl_record.id is not None
|
|
164
|
+
|
|
165
|
+
# create schema.TableVersion
|
|
166
|
+
table_version_md = schema.TableVersionMd(created_at=ts, version=0, schema_version=0)
|
|
167
|
+
tbl_version_record = schema.TableVersion(
|
|
168
|
+
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
|
|
169
|
+
session.add(tbl_version_record)
|
|
170
|
+
|
|
171
|
+
# create schema.TableSchemaVersion
|
|
172
|
+
schema_col_md = {col.id: schema.SchemaColumn(pos=pos, name=col.name) for pos, col in enumerate(cols)}
|
|
173
|
+
|
|
174
|
+
schema_version_md = schema.TableSchemaVersionMd(
|
|
175
|
+
schema_version=0, preceding_schema_version=None, columns=schema_col_md,
|
|
176
|
+
num_retained_versions=num_retained_versions, comment=comment)
|
|
177
|
+
schema_version_record = schema.TableSchemaVersion(
|
|
178
|
+
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
|
|
179
|
+
session.add(schema_version_record)
|
|
180
|
+
|
|
181
|
+
# if this is purely a snapshot (it doesn't require any additional storage for columns and it # doesn't have a
|
|
182
|
+
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
183
|
+
if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
|
|
184
|
+
return tbl_record.id, None
|
|
185
|
+
|
|
186
|
+
assert (base_path is not None) == (view_md is not None)
|
|
187
|
+
base = base_path.tbl_version if base_path is not None and view_md.is_snapshot else None
|
|
188
|
+
base_path = base_path if base_path is not None and not view_md.is_snapshot else None
|
|
189
|
+
tbl_version = cls(tbl_record.id, table_md, 0, schema_version_md, base=base, base_path=base_path)
|
|
190
|
+
tbl_version.store_tbl.create(session.connection())
|
|
191
|
+
# TODO: create pgvector indices
|
|
192
|
+
return tbl_record.id, tbl_version
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def delete_md(cls, tbl_id: UUID, conn: sql.Connection) -> None:
|
|
196
|
+
conn.execute(
|
|
197
|
+
sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
198
|
+
conn.execute(
|
|
199
|
+
sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
200
|
+
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
201
|
+
|
|
202
|
+
def drop(self) -> None:
|
|
203
|
+
with Env.get().engine.begin() as conn:
|
|
204
|
+
# delete this table and all associated data
|
|
205
|
+
MediaStore.delete(self.id)
|
|
206
|
+
FileCache.get().clear(tbl_id=self.id)
|
|
207
|
+
self.delete_md(self.id, conn)
|
|
208
|
+
self.store_tbl.drop(conn)
|
|
209
|
+
|
|
210
|
+
# de-register table version from catalog
|
|
211
|
+
from .catalog import Catalog
|
|
212
|
+
cat = Catalog.get()
|
|
213
|
+
del cat.tbl_versions[(self.id, self.effective_version)]
|
|
214
|
+
# TODO: remove from tbl_dependents
|
|
215
|
+
|
|
216
|
+
def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
217
|
+
# create columns first, so the indices can reference them
|
|
218
|
+
self._init_cols(tbl_md, schema_version_md)
|
|
219
|
+
self._init_idxs(tbl_md)
|
|
220
|
+
# create the sa schema only after creating the columns and indices
|
|
221
|
+
self._init_sa_schema()
|
|
222
|
+
|
|
223
|
+
def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
224
|
+
"""Initialize self.cols with the columns visible in our effective version"""
|
|
225
|
+
import pixeltable.exprs as exprs
|
|
226
|
+
self.cols = []
|
|
227
|
+
self.cols_by_name = {}
|
|
228
|
+
self.cols_by_id = {}
|
|
229
|
+
for col_md in tbl_md.column_md.values():
|
|
230
|
+
col_name = schema_version_md.columns[col_md.id].name if col_md.id in schema_version_md.columns else None
|
|
231
|
+
col = Column(
|
|
232
|
+
col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
233
|
+
is_pk=col_md.is_pk, stored=col_md.stored,
|
|
234
|
+
schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop)
|
|
235
|
+
col.tbl = self
|
|
236
|
+
self.cols.append(col)
|
|
237
|
+
|
|
238
|
+
# populate the lookup structures before Expr.from_dict()
|
|
239
|
+
if col_md.schema_version_add > self.schema_version:
|
|
240
|
+
# column was added after this version
|
|
241
|
+
continue
|
|
242
|
+
if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
|
|
243
|
+
# column was dropped
|
|
244
|
+
continue
|
|
245
|
+
if col.name is not None:
|
|
246
|
+
self.cols_by_name[col.name] = col
|
|
247
|
+
self.cols_by_id[col.id] = col
|
|
248
|
+
|
|
249
|
+
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
250
|
+
# this guarantees that references always point backwards
|
|
251
|
+
if col_md.value_expr is not None:
|
|
252
|
+
col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
|
|
253
|
+
self._record_value_expr(col)
|
|
254
|
+
|
|
255
|
+
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
256
|
+
self.idx_md = tbl_md.index_md
|
|
257
|
+
self.idxs_by_name = {}
|
|
258
|
+
import pixeltable.index as index_module
|
|
259
|
+
for md in tbl_md.index_md.values():
|
|
260
|
+
if md.schema_version_add > self.schema_version \
|
|
261
|
+
or md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version:
|
|
262
|
+
# column not visible in this schema version
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# instantiate index object
|
|
266
|
+
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
267
|
+
cls = getattr(index_module, cls_name)
|
|
268
|
+
idx_col = self.cols_by_id[md.indexed_col_id]
|
|
269
|
+
idx = cls.from_dict(idx_col, md.init_args)
|
|
270
|
+
|
|
271
|
+
# fix up the sa column type of the index value and undo columns
|
|
272
|
+
val_col = self.cols_by_id[md.index_val_col_id]
|
|
273
|
+
val_col.sa_col_type = idx.index_sa_type()
|
|
274
|
+
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
275
|
+
undo_col.sa_col_type = idx.index_sa_type()
|
|
276
|
+
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
277
|
+
self.idxs_by_name[md.name] = idx_info
|
|
278
|
+
|
|
279
|
+
def _init_sa_schema(self) -> None:
|
|
280
|
+
# create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
|
|
281
|
+
# need to record errors
|
|
282
|
+
from pixeltable.store import StoreBase, StoreTable, StoreView, StoreComponentView
|
|
283
|
+
if self.is_component_view():
|
|
284
|
+
self.store_tbl: StoreBase = StoreComponentView(self)
|
|
285
|
+
elif self.is_view():
|
|
286
|
+
self.store_tbl: StoreBase = StoreView(self)
|
|
287
|
+
else:
|
|
288
|
+
self.store_tbl: StoreBase = StoreTable(self)
|
|
289
|
+
|
|
290
|
+
def _update_md(self, ts: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection) -> None:
|
|
291
|
+
"""Update all recorded metadata in response to a data or schema change.
|
|
292
|
+
Args:
|
|
293
|
+
ts: timestamp of the change
|
|
294
|
+
preceding_schema_version: last schema version if schema change, else None
|
|
295
|
+
"""
|
|
296
|
+
conn.execute(
|
|
297
|
+
sql.update(schema.Table.__table__)
|
|
298
|
+
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
299
|
+
.where(schema.Table.id == self.id))
|
|
300
|
+
|
|
301
|
+
version_md = self._create_version_md(ts)
|
|
302
|
+
conn.execute(
|
|
303
|
+
sql.insert(schema.TableVersion.__table__)
|
|
304
|
+
.values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
|
|
305
|
+
if preceding_schema_version is not None:
|
|
306
|
+
schema_version_md = self._create_schema_version_md(preceding_schema_version)
|
|
307
|
+
conn.execute(
|
|
308
|
+
sql.insert(schema.TableSchemaVersion.__table__)
|
|
309
|
+
.values(
|
|
310
|
+
tbl_id=self.id, schema_version=self.schema_version,
|
|
311
|
+
md=dataclasses.asdict(schema_version_md)))
|
|
312
|
+
|
|
313
|
+
def _store_idx_name(self, idx_id: int) -> str:
|
|
314
|
+
"""Return name of index in the store, which needs to be globally unique"""
|
|
315
|
+
return f'idx_{self.id.hex}_{idx_id}'
|
|
316
|
+
|
|
317
|
+
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
318
|
+
assert not self.is_snapshot
|
|
319
|
+
idx_id = self.next_idx_id
|
|
320
|
+
self.next_idx_id += 1
|
|
321
|
+
if idx_name is None:
|
|
322
|
+
idx_name = f'idx{idx_id}'
|
|
323
|
+
else:
|
|
324
|
+
assert is_valid_identifier(idx_name)
|
|
325
|
+
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
326
|
+
|
|
327
|
+
# we're creating a new schema version
|
|
328
|
+
self.version += 1
|
|
329
|
+
preceding_schema_version = self.schema_version
|
|
330
|
+
self.schema_version = self.version
|
|
331
|
+
with Env.get().engine.begin() as conn:
|
|
332
|
+
# add the index value and undo columns (which need to be nullable);
|
|
333
|
+
# we don't create a new schema version, because indices aren't part of the logical schema
|
|
334
|
+
val_col = Column(
|
|
335
|
+
col_id=self.next_col_id, name=None, computed_with=idx.index_value_expr(),
|
|
336
|
+
sa_col_type=idx.index_sa_type(), stored=True,
|
|
337
|
+
schema_version_add=self.schema_version, schema_version_drop=None)
|
|
338
|
+
val_col.tbl = self
|
|
339
|
+
val_col.col_type.nullable = True
|
|
340
|
+
self.next_col_id += 1
|
|
341
|
+
|
|
342
|
+
undo_col = Column(
|
|
343
|
+
col_id=self.next_col_id, name=None, col_type=val_col.col_type,
|
|
344
|
+
sa_col_type=val_col.sa_col_type, stored=True,
|
|
345
|
+
schema_version_add=self.schema_version, schema_version_drop=None)
|
|
346
|
+
undo_col.tbl = self
|
|
347
|
+
undo_col.col_type.nullable = True
|
|
348
|
+
self.next_col_id += 1
|
|
349
|
+
|
|
350
|
+
# create and register the index metadata
|
|
351
|
+
idx_cls = type(idx)
|
|
352
|
+
idx_md = schema.IndexMd(
|
|
353
|
+
id=idx_id, name=idx_name,
|
|
354
|
+
indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
|
|
355
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
356
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
|
|
357
|
+
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
358
|
+
self.idx_md[idx_id] = idx_md
|
|
359
|
+
self.idxs_by_name[idx_name] = idx_info
|
|
360
|
+
|
|
361
|
+
# add the columns and update the metadata
|
|
362
|
+
status = self._add_columns([val_col, undo_col], conn, preceding_schema_version=preceding_schema_version)
|
|
363
|
+
# now create the index structure
|
|
364
|
+
idx.create_index(self._store_idx_name(idx_id), val_col, conn)
|
|
365
|
+
|
|
366
|
+
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
367
|
+
return status
|
|
368
|
+
|
|
369
|
+
def drop_index(self, idx_id: int) -> None:
|
|
370
|
+
assert not self.is_snapshot
|
|
371
|
+
assert idx_id in self.idx_md
|
|
372
|
+
|
|
373
|
+
# we're creating a new schema version
|
|
374
|
+
self.version += 1
|
|
375
|
+
preceding_schema_version = self.schema_version
|
|
376
|
+
self.schema_version = self.version
|
|
377
|
+
idx_md = self.idx_md[idx_id]
|
|
378
|
+
idx_md.schema_version_drop = self.schema_version
|
|
379
|
+
assert idx_md.name in self.idxs_by_name
|
|
380
|
+
idx_info = self.idxs_by_name[idx_md.name]
|
|
381
|
+
del self.idxs_by_name[idx_md.name]
|
|
382
|
+
|
|
383
|
+
with Env.get().engine.begin() as conn:
|
|
384
|
+
self._drop_columns([idx_info.val_col, idx_info.undo_col], conn, preceding_schema_version)
|
|
385
|
+
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
386
|
+
|
|
387
|
+
def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
|
|
388
|
+
"""Adds a column to the table.
|
|
389
|
+
"""
|
|
390
|
+
assert not self.is_snapshot
|
|
391
|
+
assert is_valid_identifier(col.name)
|
|
392
|
+
assert col.stored is not None
|
|
393
|
+
assert col.name not in self.cols_by_name
|
|
394
|
+
col.tbl = self
|
|
395
|
+
col.id = self.next_col_id
|
|
396
|
+
self.next_col_id += 1
|
|
397
|
+
|
|
398
|
+
if col.compute_func is not None:
|
|
399
|
+
# create value_expr from compute_func
|
|
400
|
+
self._create_value_expr(col, self.path)
|
|
401
|
+
if col.value_expr is not None:
|
|
402
|
+
col.check_value_expr()
|
|
403
|
+
self._record_value_expr(col)
|
|
404
|
+
|
|
405
|
+
# we're creating a new schema version
|
|
406
|
+
self.version += 1
|
|
407
|
+
preceding_schema_version = self.schema_version
|
|
408
|
+
self.schema_version = self.version
|
|
409
|
+
with Env.get().engine.begin() as conn:
|
|
410
|
+
status = self._add_columns([col], conn, preceding_schema_version, print_stats=print_stats)
|
|
411
|
+
_logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
|
|
412
|
+
|
|
413
|
+
msg = (
|
|
414
|
+
f'Added {status.num_rows} column value{"" if status.num_rows == 1 else "s"} '
|
|
415
|
+
f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}.'
|
|
416
|
+
)
|
|
417
|
+
print(msg)
|
|
418
|
+
_logger.info(f'Column {col.name}: {msg}')
|
|
419
|
+
return status
|
|
420
|
+
|
|
421
|
+
def _add_columns(
|
|
422
|
+
self, cols: List[Column], conn: sql.engine.Connection, preceding_schema_version: Optional[int] = None,
|
|
423
|
+
print_stats: bool = False
|
|
424
|
+
) -> UpdateStatus:
|
|
425
|
+
"""Add and populate columns within the current transaction"""
|
|
426
|
+
ts = time.time()
|
|
427
|
+
|
|
428
|
+
row_count = self.store_tbl.count(conn=conn)
|
|
429
|
+
for col in cols:
|
|
430
|
+
if not col.col_type.nullable and not col.is_computed:
|
|
431
|
+
if row_count > 0:
|
|
432
|
+
raise excs.Error(
|
|
433
|
+
f'Cannot add non-nullable column "{col.name}" to table {self.name} with existing rows')
|
|
434
|
+
|
|
435
|
+
num_excs = 0
|
|
436
|
+
cols_with_excs: List[Column] = []
|
|
437
|
+
for col in cols:
|
|
438
|
+
col.schema_version_add = self.schema_version
|
|
439
|
+
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
440
|
+
# because it might be referenced by the next column's value_expr
|
|
441
|
+
self.cols.append(col)
|
|
442
|
+
if col.name is not None:
|
|
443
|
+
self.cols_by_name[col.name] = col
|
|
444
|
+
self.cols_by_id[col.id] = col
|
|
445
|
+
|
|
446
|
+
if col.is_stored:
|
|
447
|
+
self.store_tbl.add_column(col, conn)
|
|
448
|
+
|
|
449
|
+
if not col.is_computed or not col.is_stored or row_count == 0:
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
# populate the column
|
|
453
|
+
from pixeltable.plan import Planner
|
|
454
|
+
plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
|
|
455
|
+
plan.ctx.num_rows = row_count
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
plan.ctx.conn = conn
|
|
459
|
+
plan.open()
|
|
460
|
+
num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
|
|
461
|
+
if num_excs > 0:
|
|
462
|
+
cols_with_excs.append(col)
|
|
463
|
+
except sql.exc.DBAPIError as e:
|
|
464
|
+
self.cols.pop()
|
|
465
|
+
for col in cols:
|
|
466
|
+
# remove columns that we already added
|
|
467
|
+
if col.id not in self.cols_by_id:
|
|
468
|
+
continue
|
|
469
|
+
if col.name is not None:
|
|
470
|
+
del self.cols_by_name[col.name]
|
|
471
|
+
del self.cols_by_id[col.id]
|
|
472
|
+
# we need to re-initialize the sqlalchemy schema
|
|
473
|
+
self.store_tbl.create_sa_tbl()
|
|
474
|
+
raise excs.Error(f'Error during SQL execution:\n{e}')
|
|
475
|
+
finally:
|
|
476
|
+
plan.close()
|
|
477
|
+
|
|
478
|
+
self._update_md(ts, preceding_schema_version, conn)
|
|
479
|
+
if print_stats:
|
|
480
|
+
plan.ctx.profile.print(num_rows=row_count)
|
|
481
|
+
# TODO(mkornacker): what to do about system columns with exceptions?
|
|
482
|
+
return UpdateStatus(
|
|
483
|
+
num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
|
|
484
|
+
cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
|
|
485
|
+
|
|
486
|
+
def drop_column(self, name: str) -> None:
|
|
487
|
+
"""Drop a column from the table.
|
|
488
|
+
"""
|
|
489
|
+
assert not self.is_snapshot
|
|
490
|
+
if name not in self.cols_by_name:
|
|
491
|
+
raise excs.Error(f'Unknown column: {name}')
|
|
492
|
+
col = self.cols_by_name[name]
|
|
493
|
+
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
494
|
+
if len(dependent_user_cols) > 0:
|
|
495
|
+
raise excs.Error(
|
|
496
|
+
f'Cannot drop column {name} because the following columns depend on it:\n',
|
|
497
|
+
f'{", ".join([c.name for c in dependent_user_cols])}')
|
|
498
|
+
|
|
499
|
+
# we're creating a new schema version
|
|
500
|
+
self.version += 1
|
|
501
|
+
preceding_schema_version = self.schema_version
|
|
502
|
+
self.schema_version = self.version
|
|
503
|
+
|
|
504
|
+
with Env.get().engine.begin() as conn:
|
|
505
|
+
# drop this column and all dependent index columns and indices
|
|
506
|
+
dropped_cols = [col]
|
|
507
|
+
dropped_idx_names: List[str] = []
|
|
508
|
+
for idx_info in self.idxs_by_name.values():
|
|
509
|
+
if idx_info.col != col:
|
|
510
|
+
continue
|
|
511
|
+
dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
|
|
512
|
+
idx_md = self.idx_md[idx_info.id]
|
|
513
|
+
idx_md.schema_version_drop = self.schema_version
|
|
514
|
+
assert idx_md.name in self.idxs_by_name
|
|
515
|
+
dropped_idx_names.append(idx_md.name)
|
|
516
|
+
# update idxs_by_name
|
|
517
|
+
for idx_name in dropped_idx_names:
|
|
518
|
+
del self.idxs_by_name[idx_name]
|
|
519
|
+
self._drop_columns(dropped_cols, conn, preceding_schema_version)
|
|
520
|
+
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
521
|
+
|
|
522
|
+
def _drop_columns(self, cols: list[Column], conn: sql.engine.Connection, preceding_schema_version: int) -> None:
|
|
523
|
+
"""Mark columns as dropped"""
|
|
524
|
+
assert not self.is_snapshot
|
|
525
|
+
|
|
526
|
+
ts = time.time()
|
|
527
|
+
for col in cols:
|
|
528
|
+
if col.value_expr is not None:
|
|
529
|
+
# update Column.dependent_cols
|
|
530
|
+
for c in self.cols:
|
|
531
|
+
if c == col:
|
|
532
|
+
break
|
|
533
|
+
c.dependent_cols.discard(col)
|
|
534
|
+
|
|
535
|
+
col.schema_version_drop = self.schema_version
|
|
536
|
+
if col.name is not None:
|
|
537
|
+
assert col.name in self.cols_by_name
|
|
538
|
+
del self.cols_by_name[col.name]
|
|
539
|
+
assert col.id in self.cols_by_id
|
|
540
|
+
del self.cols_by_id[col.id]
|
|
541
|
+
|
|
542
|
+
self._update_md(ts, preceding_schema_version, conn)
|
|
543
|
+
self.store_tbl.create_sa_tbl()
|
|
544
|
+
|
|
545
|
+
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
546
|
+
"""Rename a column.
|
|
547
|
+
"""
|
|
548
|
+
assert not self.is_snapshot
|
|
549
|
+
if old_name not in self.cols_by_name:
|
|
550
|
+
raise excs.Error(f'Unknown column: {old_name}')
|
|
551
|
+
if not is_valid_identifier(new_name):
|
|
552
|
+
raise excs.Error(f"Invalid column name: '{new_name}'")
|
|
553
|
+
if new_name in self.cols_by_name:
|
|
554
|
+
raise excs.Error(f'Column {new_name} already exists')
|
|
555
|
+
col = self.cols_by_name[old_name]
|
|
556
|
+
del self.cols_by_name[old_name]
|
|
557
|
+
col.name = new_name
|
|
558
|
+
self.cols_by_name[new_name] = col
|
|
559
|
+
|
|
560
|
+
# we're creating a new schema version
|
|
561
|
+
ts = time.time()
|
|
562
|
+
self.version += 1
|
|
563
|
+
preceding_schema_version = self.schema_version
|
|
564
|
+
self.schema_version = self.version
|
|
565
|
+
|
|
566
|
+
with Env.get().engine.begin() as conn:
|
|
567
|
+
self._update_md(ts, preceding_schema_version, conn)
|
|
568
|
+
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
569
|
+
|
|
570
|
+
def set_comment(self, new_comment: Optional[str]):
|
|
571
|
+
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
572
|
+
self.comment = new_comment
|
|
573
|
+
self._create_schema_version()
|
|
574
|
+
|
|
575
|
+
def set_num_retained_versions(self, new_num_retained_versions: int):
|
|
576
|
+
_logger.info(f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} (was {self.num_retained_versions})')
|
|
577
|
+
self.num_retained_versions = new_num_retained_versions
|
|
578
|
+
self._create_schema_version()
|
|
579
|
+
|
|
580
|
+
def _create_schema_version(self):
|
|
581
|
+
# we're creating a new schema version
|
|
582
|
+
ts = time.time()
|
|
583
|
+
self.version += 1
|
|
584
|
+
preceding_schema_version = self.schema_version
|
|
585
|
+
self.schema_version = self.version
|
|
586
|
+
with Env.get().engine.begin() as conn:
|
|
587
|
+
self._update_md(ts, preceding_schema_version, conn)
|
|
588
|
+
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
589
|
+
|
|
590
|
+
def insert(
|
|
591
|
+
self, rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception : bool = True
|
|
592
|
+
) -> UpdateStatus:
|
|
593
|
+
"""Insert rows into this table.
|
|
594
|
+
"""
|
|
595
|
+
assert self.is_insertable()
|
|
596
|
+
from pixeltable.plan import Planner
|
|
597
|
+
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
598
|
+
ts = time.time()
|
|
599
|
+
with Env.get().engine.begin() as conn:
|
|
600
|
+
return self._insert(plan, conn, ts, print_stats)
|
|
601
|
+
|
|
602
|
+
def _insert(
|
|
603
|
+
self, exec_plan: exec.ExecNode, conn: sql.engine.Connection, ts: float, print_stats: bool = False,
|
|
604
|
+
) -> UpdateStatus:
|
|
605
|
+
"""Insert rows produced by exec_plan and propagate to views"""
|
|
606
|
+
# we're creating a new version
|
|
607
|
+
self.version += 1
|
|
608
|
+
result = UpdateStatus()
|
|
609
|
+
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(exec_plan, conn, v_min=self.version)
|
|
610
|
+
self.next_rowid = num_rows
|
|
611
|
+
result.num_rows = num_rows
|
|
612
|
+
result.num_excs = num_excs
|
|
613
|
+
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
614
|
+
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
615
|
+
self._update_md(ts, None, conn)
|
|
616
|
+
|
|
617
|
+
# update views
|
|
618
|
+
for view in self.mutable_views:
|
|
619
|
+
from pixeltable.plan import Planner
|
|
620
|
+
plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
|
|
621
|
+
status = view._insert(plan, conn, ts, print_stats)
|
|
622
|
+
result.num_rows += status.num_rows
|
|
623
|
+
result.num_excs += status.num_excs
|
|
624
|
+
result.num_computed_values += status.num_computed_values
|
|
625
|
+
result.cols_with_excs += status.cols_with_excs
|
|
626
|
+
|
|
627
|
+
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
628
|
+
if print_stats:
|
|
629
|
+
plan.ctx.profile.print(num_rows=num_rows)
|
|
630
|
+
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
631
|
+
return result
|
|
632
|
+
|
|
633
|
+
def update(
|
|
634
|
+
self, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
635
|
+
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
636
|
+
) -> UpdateStatus:
|
|
637
|
+
with Env.get().engine.begin() as conn:
|
|
638
|
+
return self._update(conn, update_targets, where_clause, cascade)
|
|
639
|
+
|
|
640
|
+
def batch_update(
|
|
641
|
+
self, batch: list[dict[Column, 'pixeltable.exprs.Expr']], rowids: list[Tuple[int, ...]],
|
|
642
|
+
cascade: bool = True
|
|
643
|
+
) -> UpdateStatus:
|
|
644
|
+
"""Update rows in batch.
|
|
645
|
+
Args:
|
|
646
|
+
batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
|
|
647
|
+
rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
|
|
648
|
+
"""
|
|
649
|
+
# if we do lookups of rowids, we must have one for each row in the batch
|
|
650
|
+
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
651
|
+
import pixeltable.exprs as exprs
|
|
652
|
+
result_status = UpdateStatus()
|
|
653
|
+
cols_with_excs: set[str] = set()
|
|
654
|
+
updated_cols: set[str] = set()
|
|
655
|
+
pk_cols = self.primary_key_columns()
|
|
656
|
+
use_rowids = len(rowids) > 0
|
|
657
|
+
|
|
658
|
+
with Env.get().engine.begin() as conn:
|
|
659
|
+
for i, row in enumerate(batch):
|
|
660
|
+
where_clause: Optional[exprs.Expr] = None
|
|
661
|
+
if use_rowids:
|
|
662
|
+
# construct Where clause to match rowid
|
|
663
|
+
num_rowid_cols = len(self.store_tbl.rowid_columns())
|
|
664
|
+
for col_idx in range(num_rowid_cols):
|
|
665
|
+
assert len(rowids[i]) == num_rowid_cols
|
|
666
|
+
clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
|
|
667
|
+
if where_clause is None:
|
|
668
|
+
where_clause = clause
|
|
669
|
+
else:
|
|
670
|
+
where_clause = where_clause & clause
|
|
671
|
+
else:
|
|
672
|
+
# construct Where clause for primary key columns
|
|
673
|
+
for col in pk_cols:
|
|
674
|
+
assert col in row
|
|
675
|
+
clause = exprs.ColumnRef(col) == row[col]
|
|
676
|
+
if where_clause is None:
|
|
677
|
+
where_clause = clause
|
|
678
|
+
else:
|
|
679
|
+
where_clause = where_clause & clause
|
|
680
|
+
|
|
681
|
+
update_targets = {col: row[col] for col in row if col not in pk_cols}
|
|
682
|
+
status = self._update(conn, update_targets, where_clause, cascade)
|
|
683
|
+
result_status.num_rows += status.num_rows
|
|
684
|
+
result_status.num_excs += status.num_excs
|
|
685
|
+
result_status.num_computed_values += status.num_computed_values
|
|
686
|
+
cols_with_excs.update(status.cols_with_excs)
|
|
687
|
+
updated_cols.update(status.updated_cols)
|
|
688
|
+
|
|
689
|
+
result_status.cols_with_excs = list(cols_with_excs)
|
|
690
|
+
result_status.updated_cols = list(updated_cols)
|
|
691
|
+
return result_status
|
|
692
|
+
|
|
693
|
+
def _update(
|
|
694
|
+
self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
695
|
+
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
696
|
+
) -> UpdateStatus:
|
|
697
|
+
"""Update rows in this table.
|
|
698
|
+
Args:
|
|
699
|
+
update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
700
|
+
where_clause: a Predicate to filter rows to update.
|
|
701
|
+
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
702
|
+
including within views.
|
|
703
|
+
"""
|
|
704
|
+
assert not self.is_snapshot
|
|
705
|
+
from pixeltable.plan import Planner
|
|
706
|
+
plan, updated_cols, recomputed_cols = \
|
|
707
|
+
Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
|
|
708
|
+
ts = time.time()
|
|
709
|
+
result = self._propagate_update(
|
|
710
|
+
plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
|
|
711
|
+
base_versions=[], conn=conn, ts=ts, cascade=cascade)
|
|
712
|
+
result.updated_cols = updated_cols
|
|
713
|
+
return result
|
|
714
|
+
|
|
715
|
+
def _propagate_update(
|
|
716
|
+
self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
|
|
717
|
+
recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
|
|
718
|
+
ts: float, cascade: bool
|
|
719
|
+
) -> UpdateStatus:
|
|
720
|
+
result = UpdateStatus()
|
|
721
|
+
if plan is not None:
|
|
722
|
+
# we're creating a new version
|
|
723
|
+
self.version += 1
|
|
724
|
+
result.num_rows, result.num_excs, cols_with_excs = \
|
|
725
|
+
self.store_tbl.insert_rows(plan, conn, v_min=self.version)
|
|
726
|
+
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
727
|
+
self.store_tbl.delete_rows(
|
|
728
|
+
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
|
|
729
|
+
self._update_md(ts, None, conn)
|
|
730
|
+
|
|
731
|
+
if cascade:
|
|
732
|
+
base_versions = [None if plan is None else self.version] + base_versions # don't update in place
|
|
733
|
+
# propagate to views
|
|
734
|
+
for view in self.mutable_views:
|
|
735
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.tbl is view]
|
|
736
|
+
plan: Optional[exec.ExecNode] = None
|
|
737
|
+
if len(recomputed_cols) > 0:
|
|
738
|
+
from pixeltable.plan import Planner
|
|
739
|
+
plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
|
|
740
|
+
status = view._propagate_update(
|
|
741
|
+
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, ts=ts, cascade=True)
|
|
742
|
+
result.num_rows += status.num_rows
|
|
743
|
+
result.num_excs += status.num_excs
|
|
744
|
+
result.cols_with_excs += status.cols_with_excs
|
|
745
|
+
|
|
746
|
+
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
747
|
+
return result
|
|
748
|
+
|
|
749
|
+
def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
|
|
750
|
+
"""Delete rows in this table.
|
|
751
|
+
Args:
|
|
752
|
+
where: a Predicate to filter rows to delete.
|
|
753
|
+
"""
|
|
754
|
+
assert self.is_insertable()
|
|
755
|
+
from pixeltable.plan import Planner
|
|
756
|
+
analysis_info = Planner.analyze(self, where)
|
|
757
|
+
ts = time.time()
|
|
758
|
+
with Env.get().engine.begin() as conn:
|
|
759
|
+
num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, ts=ts)
|
|
760
|
+
|
|
761
|
+
status = UpdateStatus(num_rows=num_rows)
|
|
762
|
+
return status
|
|
763
|
+
|
|
764
|
+
def _delete(
|
|
765
|
+
self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
|
|
766
|
+
conn: sql.engine.Connection, ts: float) -> int:
|
|
767
|
+
"""Delete rows in this table and propagate to views.
|
|
768
|
+
Args:
|
|
769
|
+
where: a Predicate to filter rows to delete.
|
|
770
|
+
Returns:
|
|
771
|
+
number of deleted rows
|
|
772
|
+
"""
|
|
773
|
+
sql_where_clause = where.sql_expr() if where is not None else None
|
|
774
|
+
num_rows = self.store_tbl.delete_rows(
|
|
775
|
+
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause,
|
|
776
|
+
conn=conn)
|
|
777
|
+
if num_rows > 0:
|
|
778
|
+
# we're creating a new version
|
|
779
|
+
self.version += 1
|
|
780
|
+
self._update_md(ts, None, conn)
|
|
781
|
+
else:
|
|
782
|
+
pass
|
|
783
|
+
for view in self.mutable_views:
|
|
784
|
+
num_rows += view._delete(where=None, base_versions=[self.version] + base_versions, conn=conn, ts=ts)
|
|
785
|
+
return num_rows
|
|
786
|
+
|
|
787
|
+
def revert(self) -> None:
|
|
788
|
+
"""Reverts the table to the previous version.
|
|
789
|
+
"""
|
|
790
|
+
assert not self.is_snapshot
|
|
791
|
+
if self.version == 0:
|
|
792
|
+
raise excs.Error('Cannot revert version 0')
|
|
793
|
+
with orm.Session(Env.get().engine, future=True) as session:
|
|
794
|
+
self._revert(session)
|
|
795
|
+
session.commit()
|
|
796
|
+
|
|
797
|
+
def _delete_column(self, col: Column, conn: sql.engine.Connection) -> None:
|
|
798
|
+
"""Physically remove the column from the schema and the store table"""
|
|
799
|
+
if col.is_stored:
|
|
800
|
+
self.store_tbl.drop_column(col, conn)
|
|
801
|
+
self.cols.remove(col)
|
|
802
|
+
if col.name is not None:
|
|
803
|
+
del self.cols_by_name[col.name]
|
|
804
|
+
del self.cols_by_id[col.id]
|
|
805
|
+
|
|
806
|
+
def _revert(self, session: orm.Session) -> None:
|
|
807
|
+
"""Reverts this table version and propagates to views"""
|
|
808
|
+
conn = session.connection()
|
|
809
|
+
# make sure we don't have a snapshot referencing this version
|
|
810
|
+
# (unclear how to express this with sqlalchemy)
|
|
811
|
+
query = (
|
|
812
|
+
f"select ts.dir_id, ts.md->'name' "
|
|
813
|
+
f"from {schema.Table.__tablename__} ts "
|
|
814
|
+
f"cross join lateral jsonb_path_query(md, '$.view_md.base_versions[*]') as tbl_version "
|
|
815
|
+
f"where tbl_version->>0 = '{self.id.hex}' and (tbl_version->>1)::int = {self.version}"
|
|
816
|
+
)
|
|
817
|
+
result = list(conn.execute(sql.text(query)))
|
|
818
|
+
if len(result) > 0:
|
|
819
|
+
names = [row[1] for row in result]
|
|
820
|
+
raise excs.Error((
|
|
821
|
+
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
|
|
822
|
+
f'({", ".join(names)})'
|
|
823
|
+
))
|
|
824
|
+
|
|
825
|
+
conn = session.connection()
|
|
826
|
+
# delete newly-added data
|
|
827
|
+
MediaStore.delete(self.id, version=self.version)
|
|
828
|
+
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
829
|
+
|
|
830
|
+
# revert new deletions
|
|
831
|
+
set_clause = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
|
|
832
|
+
for index_info in self.idxs_by_name.values():
|
|
833
|
+
# copy the index value back from the undo column and reset the undo column to NULL
|
|
834
|
+
set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
|
|
835
|
+
set_clause[index_info.undo_col.sa_col] = None
|
|
836
|
+
stmt = sql.update(self.store_tbl.sa_tbl) \
|
|
837
|
+
.values(set_clause) \
|
|
838
|
+
.where(self.store_tbl.sa_tbl.c.v_max == self.version)
|
|
839
|
+
conn.execute(stmt)
|
|
840
|
+
|
|
841
|
+
# revert schema changes
|
|
842
|
+
if self.version == self.schema_version:
|
|
843
|
+
# delete newly-added columns
|
|
844
|
+
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
845
|
+
if len(added_cols) > 0:
|
|
846
|
+
next_col_id = min(col.id for col in added_cols)
|
|
847
|
+
for col in added_cols:
|
|
848
|
+
self._delete_column(col, conn)
|
|
849
|
+
self.next_col_id = next_col_id
|
|
850
|
+
|
|
851
|
+
# remove newly-added indices from the lookup structures
|
|
852
|
+
# (the value and undo columns got removed in the preceding step)
|
|
853
|
+
added_idx_md = [md for md in self.idx_md.values() if md.schema_version_add == self.schema_version]
|
|
854
|
+
if len(added_idx_md) > 0:
|
|
855
|
+
next_idx_id = min(md.id for md in added_idx_md)
|
|
856
|
+
for md in added_idx_md:
|
|
857
|
+
del self.idx_md[md.id]
|
|
858
|
+
del self.idxs_by_name[md.name]
|
|
859
|
+
self.next_idx_id = next_idx_id
|
|
860
|
+
|
|
861
|
+
# make newly-dropped columns visible again
|
|
862
|
+
dropped_cols = [col for col in self.cols if col.schema_version_drop == self.schema_version]
|
|
863
|
+
for col in dropped_cols:
|
|
864
|
+
col.schema_version_drop = None
|
|
865
|
+
|
|
866
|
+
# make newly-dropped indices visible again
|
|
867
|
+
dropped_idx_md = [md for md in self.idx_md.values() if md.schema_version_drop == self.schema_version]
|
|
868
|
+
for md in dropped_idx_md:
|
|
869
|
+
md.schema_version_drop = None
|
|
870
|
+
|
|
871
|
+
# we need to determine the preceding schema version and reload the schema
|
|
872
|
+
schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
|
|
873
|
+
.where(schema.TableSchemaVersion.tbl_id == self.id) \
|
|
874
|
+
.where(schema.TableSchemaVersion.schema_version == self.schema_version) \
|
|
875
|
+
.scalar()
|
|
876
|
+
preceding_schema_version = schema_version_md_dict['preceding_schema_version']
|
|
877
|
+
preceding_schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
|
|
878
|
+
.where(schema.TableSchemaVersion.tbl_id == self.id) \
|
|
879
|
+
.where(schema.TableSchemaVersion.schema_version == preceding_schema_version) \
|
|
880
|
+
.scalar()
|
|
881
|
+
preceding_schema_version_md = schema.md_from_dict(
|
|
882
|
+
schema.TableSchemaVersionMd, preceding_schema_version_md_dict)
|
|
883
|
+
tbl_md = self._create_tbl_md()
|
|
884
|
+
self._init_schema(tbl_md, preceding_schema_version_md)
|
|
885
|
+
|
|
886
|
+
conn.execute(
|
|
887
|
+
sql.delete(schema.TableSchemaVersion.__table__)
|
|
888
|
+
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
889
|
+
.where(schema.TableSchemaVersion.schema_version == self.schema_version))
|
|
890
|
+
self.schema_version = preceding_schema_version
|
|
891
|
+
self.comment = preceding_schema_version_md.comment
|
|
892
|
+
self.num_retained_versions = preceding_schema_version_md.num_retained_versions
|
|
893
|
+
|
|
894
|
+
conn.execute(
|
|
895
|
+
sql.delete(schema.TableVersion.__table__)
|
|
896
|
+
.where(schema.TableVersion.tbl_id == self.id)
|
|
897
|
+
.where(schema.TableVersion.version == self.version)
|
|
898
|
+
)
|
|
899
|
+
self.version -= 1
|
|
900
|
+
conn.execute(
|
|
901
|
+
sql.update(schema.Table.__table__)
|
|
902
|
+
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
903
|
+
.where(schema.Table.id == self.id))
|
|
904
|
+
|
|
905
|
+
# propagate to views
|
|
906
|
+
for view in self.mutable_views:
|
|
907
|
+
view._revert(session)
|
|
908
|
+
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
909
|
+
|
|
910
|
+
def is_view(self) -> bool:
|
|
911
|
+
return self.base is not None
|
|
912
|
+
|
|
913
|
+
def is_component_view(self) -> bool:
|
|
914
|
+
return self.iterator_cls is not None
|
|
915
|
+
|
|
916
|
+
def is_insertable(self) -> bool:
|
|
917
|
+
"""Returns True if this corresponds to an InsertableTable"""
|
|
918
|
+
return not self.is_snapshot and not self.is_view()
|
|
919
|
+
|
|
920
|
+
def is_iterator_column(self, col: Column) -> bool:
|
|
921
|
+
"""Returns True if col is produced by an iterator"""
|
|
922
|
+
# the iterator columns directly follow the pos column
|
|
923
|
+
return self.is_component_view() and col.id > 0 and col.id < self.num_iterator_cols + 1
|
|
924
|
+
|
|
925
|
+
def is_system_column(self, col: Column) -> bool:
|
|
926
|
+
"""Return True if column was created by Pixeltable"""
|
|
927
|
+
if col.name == POS_COLUMN_NAME and self.is_component_view():
|
|
928
|
+
return True
|
|
929
|
+
return False
|
|
930
|
+
|
|
931
|
+
def user_columns(self) -> List[Column]:
|
|
932
|
+
"""Return all non-system columns"""
|
|
933
|
+
return [c for c in self.cols if not self.is_system_column(c)]
|
|
934
|
+
|
|
935
|
+
def primary_key_columns(self) -> List[Column]:
|
|
936
|
+
"""Return all non-system columns"""
|
|
937
|
+
return [c for c in self.cols if c.is_pk]
|
|
938
|
+
|
|
939
|
+
def get_required_col_names(self) -> List[str]:
|
|
940
|
+
"""Return the names of all columns for which values must be specified in insert()"""
|
|
941
|
+
assert not self.is_view()
|
|
942
|
+
names = [c.name for c in self.cols if not c.is_computed and not c.col_type.nullable]
|
|
943
|
+
return names
|
|
944
|
+
|
|
945
|
+
def get_computed_col_names(self) -> List[str]:
|
|
946
|
+
"""Return the names of all computed columns"""
|
|
947
|
+
names = [c.name for c in self.cols if c.is_computed]
|
|
948
|
+
return names
|
|
949
|
+
|
|
950
|
+
@classmethod
|
|
951
|
+
def _create_value_expr(cls, col: Column, path: 'TableVersionPath') -> None:
|
|
952
|
+
"""
|
|
953
|
+
Create col.value_expr, given col.compute_func.
|
|
954
|
+
Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
|
|
955
|
+
Does not update Column.dependent_cols.
|
|
956
|
+
"""
|
|
957
|
+
assert col.value_expr is None
|
|
958
|
+
assert col.compute_func is not None
|
|
959
|
+
from pixeltable import exprs
|
|
960
|
+
params = inspect.signature(col.compute_func).parameters
|
|
961
|
+
args: List[exprs.ColumnRef] = []
|
|
962
|
+
for param_name in params:
|
|
963
|
+
param = path.get_column(param_name)
|
|
964
|
+
if param is None:
|
|
965
|
+
raise excs.Error(
|
|
966
|
+
f'Column {col.name}: Callable parameter refers to an unknown column: {param_name}')
|
|
967
|
+
args.append(exprs.ColumnRef(param))
|
|
968
|
+
fn = func.make_function(
|
|
969
|
+
col.compute_func, return_type=col.col_type, param_types=[arg.col_type for arg in args])
|
|
970
|
+
col.value_expr = fn(*args)
|
|
971
|
+
|
|
972
|
+
def _record_value_expr(self, col: Column) -> None:
|
|
973
|
+
"""Update Column.dependent_cols for all cols referenced in col.value_expr.
|
|
974
|
+
"""
|
|
975
|
+
assert col.value_expr is not None
|
|
976
|
+
from pixeltable.exprs import ColumnRef
|
|
977
|
+
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=ColumnRef)]
|
|
978
|
+
for refd_col in refd_cols:
|
|
979
|
+
refd_col.dependent_cols.add(col)
|
|
980
|
+
|
|
981
|
+
def get_dependent_columns(self, cols: List[Column]) -> Set[Column]:
|
|
982
|
+
"""
|
|
983
|
+
Return the set of columns that transitively depend on any of the given ones.
|
|
984
|
+
"""
|
|
985
|
+
if len(cols) == 0:
|
|
986
|
+
return []
|
|
987
|
+
result: Set[Column] = set()
|
|
988
|
+
for col in cols:
|
|
989
|
+
result.update(col.dependent_cols)
|
|
990
|
+
result.update(self.get_dependent_columns(result))
|
|
991
|
+
return result
|
|
992
|
+
|
|
993
|
+
def num_rowid_columns(self) -> int:
|
|
994
|
+
"""Return the number of columns of the rowids, without accessing store_tbl"""
|
|
995
|
+
if self.is_component_view():
|
|
996
|
+
return 1 + self.base.num_rowid_columns()
|
|
997
|
+
return 1
|
|
998
|
+
|
|
999
|
+
@classmethod
|
|
1000
|
+
def _create_column_md(cls, cols: List[Column]) -> dict[int, schema.ColumnMd]:
|
|
1001
|
+
column_md: Dict[int, schema.ColumnMd] = {}
|
|
1002
|
+
for col in cols:
|
|
1003
|
+
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1004
|
+
column_md[col.id] = schema.ColumnMd(
|
|
1005
|
+
id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
|
|
1006
|
+
schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
|
|
1007
|
+
value_expr=value_expr_dict, stored=col.stored)
|
|
1008
|
+
return column_md
|
|
1009
|
+
|
|
1010
|
+
def _create_tbl_md(self) -> schema.TableMd:
|
|
1011
|
+
return schema.TableMd(
|
|
1012
|
+
name=self.name, current_version=self.version, current_schema_version=self.schema_version,
|
|
1013
|
+
next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
|
|
1014
|
+
column_md=self._create_column_md(self.cols), index_md=self.idx_md, view_md=self.view_md)
|
|
1015
|
+
|
|
1016
|
+
def _create_version_md(self, ts: float) -> schema.TableVersionMd:
|
|
1017
|
+
return schema.TableVersionMd(created_at=ts, version=self.version, schema_version=self.schema_version)
|
|
1018
|
+
|
|
1019
|
+
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1020
|
+
column_md: Dict[int, schema.SchemaColumn] = {}
|
|
1021
|
+
for pos, col in enumerate(self.cols_by_name.values()):
|
|
1022
|
+
column_md[col.id] = schema.SchemaColumn(pos=pos, name=col.name)
|
|
1023
|
+
# preceding_schema_version to be set by the caller
|
|
1024
|
+
return schema.TableSchemaVersionMd(
|
|
1025
|
+
schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
|
|
1026
|
+
columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)
|