pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/store.py
CHANGED
|
@@ -2,23 +2,23 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import logging
|
|
5
|
-
import os
|
|
6
5
|
import sys
|
|
7
|
-
import
|
|
8
|
-
import urllib.request
|
|
6
|
+
import time
|
|
9
7
|
import warnings
|
|
10
|
-
from typing import Any, Iterable, Iterator
|
|
8
|
+
from typing import Any, Iterable, Iterator
|
|
9
|
+
from uuid import UUID
|
|
11
10
|
|
|
12
11
|
import more_itertools
|
|
12
|
+
import psycopg
|
|
13
13
|
import sqlalchemy as sql
|
|
14
14
|
from tqdm import TqdmWarning, tqdm
|
|
15
15
|
|
|
16
|
-
from pixeltable import catalog, exceptions as excs
|
|
16
|
+
from pixeltable import catalog, exceptions as excs
|
|
17
|
+
from pixeltable.catalog.update_status import RowCountStats
|
|
17
18
|
from pixeltable.env import Env
|
|
18
19
|
from pixeltable.exec import ExecNode
|
|
19
20
|
from pixeltable.metadata import schema
|
|
20
21
|
from pixeltable.utils.exception_handler import run_cleanup
|
|
21
|
-
from pixeltable.utils.media_store import MediaStore
|
|
22
22
|
from pixeltable.utils.sql import log_explain, log_stmt
|
|
23
23
|
|
|
24
24
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -35,24 +35,46 @@ class StoreBase:
|
|
|
35
35
|
|
|
36
36
|
tbl_version: catalog.TableVersionHandle
|
|
37
37
|
sa_md: sql.MetaData
|
|
38
|
-
sa_tbl:
|
|
38
|
+
sa_tbl: sql.Table | None
|
|
39
39
|
_pk_cols: list[sql.Column]
|
|
40
40
|
v_min_col: sql.Column
|
|
41
41
|
v_max_col: sql.Column
|
|
42
|
-
base: Optional[StoreBase]
|
|
43
42
|
|
|
44
|
-
|
|
43
|
+
# We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
|
|
44
|
+
# since it's referenced by various methods of `StoreBase`
|
|
45
|
+
_base: StoreBase | None
|
|
46
|
+
|
|
47
|
+
# In my cursory experiments this was the optimal batch size: it was an improvement over 5_000 and there was no real
|
|
48
|
+
# benefit to going higher.
|
|
49
|
+
# TODO: Perform more rigorous experiments with different table structures and OS environments to refine this.
|
|
50
|
+
__INSERT_BATCH_SIZE = 10_000
|
|
45
51
|
|
|
46
52
|
def __init__(self, tbl_version: catalog.TableVersion):
|
|
47
|
-
self.tbl_version =
|
|
48
|
-
tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version
|
|
49
|
-
)
|
|
53
|
+
self.tbl_version = tbl_version.handle
|
|
50
54
|
self.sa_md = sql.MetaData()
|
|
51
55
|
self.sa_tbl = None
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
self._pk_cols = []
|
|
57
|
+
|
|
58
|
+
# we initialize _base lazily, because the base may not exist anymore at this point
|
|
59
|
+
# (but we might still need sa_table to access our store table); do this before create_sa_tbl()
|
|
60
|
+
self._base = None
|
|
61
|
+
|
|
62
|
+
# we're passing in tbl_version to avoid a circular call to TableVersionHandle.get()
|
|
63
|
+
self.create_sa_tbl(tbl_version)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def base(self) -> StoreBase | None:
|
|
67
|
+
if self._base is None:
|
|
68
|
+
tv = self.tbl_version.get()
|
|
69
|
+
self._base = tv.base.get().store_tbl if tv.base is not None else None
|
|
70
|
+
return self._base
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def storage_name(cls, tbl_id: UUID, is_view: bool) -> str:
|
|
74
|
+
return f'{"view" if is_view else "tbl"}_{tbl_id.hex}'
|
|
75
|
+
|
|
76
|
+
def system_columns(self) -> list[sql.Column]:
|
|
77
|
+
return [*self._pk_cols, self.v_max_col]
|
|
56
78
|
|
|
57
79
|
def pk_columns(self) -> list[sql.Column]:
|
|
58
80
|
return self._pk_cols
|
|
@@ -66,7 +88,23 @@ class StoreBase:
|
|
|
66
88
|
|
|
67
89
|
def _create_system_columns(self) -> list[sql.Column]:
|
|
68
90
|
"""Create and return system columns"""
|
|
69
|
-
rowid_cols
|
|
91
|
+
rowid_cols: list[sql.Column]
|
|
92
|
+
if self._store_tbl_exists():
|
|
93
|
+
# derive our rowid Columns from the existing table, without having to access self.base.store_tbl:
|
|
94
|
+
# self.base may not exist anymore (both this table and our base got dropped in the same transaction, and
|
|
95
|
+
# the base was finalized before this table)
|
|
96
|
+
with Env.get().begin_xact(for_write=False) as conn:
|
|
97
|
+
q = (
|
|
98
|
+
f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r} '
|
|
99
|
+
'ORDER BY ordinal_position'
|
|
100
|
+
)
|
|
101
|
+
col_names = [row[0] for row in conn.execute(sql.text(q)).fetchall()]
|
|
102
|
+
num_rowid_cols = col_names.index('v_min')
|
|
103
|
+
rowid_cols = [
|
|
104
|
+
sql.Column(col_name, sql.BigInteger, nullable=False) for col_name in col_names[:num_rowid_cols]
|
|
105
|
+
]
|
|
106
|
+
else:
|
|
107
|
+
rowid_cols = self._create_rowid_columns()
|
|
70
108
|
self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
|
|
71
109
|
self.v_max_col = sql.Column(
|
|
72
110
|
'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
|
|
@@ -74,18 +112,20 @@ class StoreBase:
|
|
|
74
112
|
self._pk_cols = [*rowid_cols, self.v_min_col]
|
|
75
113
|
return [*rowid_cols, self.v_min_col, self.v_max_col]
|
|
76
114
|
|
|
77
|
-
def create_sa_tbl(self) -> None:
|
|
115
|
+
def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
|
|
78
116
|
"""Create self.sa_tbl from self.tbl_version."""
|
|
117
|
+
if tbl_version is None:
|
|
118
|
+
tbl_version = self.tbl_version.get()
|
|
79
119
|
system_cols = self._create_system_columns()
|
|
80
120
|
all_cols = system_cols.copy()
|
|
81
|
-
|
|
121
|
+
# we captured all columns, including dropped ones: they're still part of the physical table
|
|
122
|
+
for col in [c for c in tbl_version.cols if c.is_stored]:
|
|
82
123
|
# re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
|
|
83
124
|
# to the last sql.Table version we created and cannot be reused
|
|
84
125
|
col.create_sa_cols()
|
|
85
126
|
all_cols.append(col.sa_col)
|
|
86
|
-
if col.
|
|
87
|
-
all_cols.append(col.
|
|
88
|
-
all_cols.append(col.sa_errortype_col)
|
|
127
|
+
if col.stores_cellmd:
|
|
128
|
+
all_cols.append(col.sa_cellmd_col)
|
|
89
129
|
|
|
90
130
|
if self.sa_tbl is not None:
|
|
91
131
|
# if we're called in response to a schema change, we need to remove the old table first
|
|
@@ -96,16 +136,17 @@ class StoreBase:
|
|
|
96
136
|
# - base x view joins can be executed as merge joins
|
|
97
137
|
# - speeds up ORDER BY rowid DESC
|
|
98
138
|
# - allows filtering for a particular table version in index scan
|
|
99
|
-
idx_name = f'sys_cols_idx_{
|
|
139
|
+
idx_name = f'sys_cols_idx_{tbl_version.id.hex}'
|
|
100
140
|
idxs.append(sql.Index(idx_name, *system_cols))
|
|
101
141
|
|
|
102
142
|
# v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
|
|
103
|
-
idx_name = f'vmin_idx_{
|
|
143
|
+
idx_name = f'vmin_idx_{tbl_version.id.hex}'
|
|
104
144
|
idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
105
|
-
idx_name = f'vmax_idx_{
|
|
145
|
+
idx_name = f'vmax_idx_{tbl_version.id.hex}'
|
|
106
146
|
idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
107
147
|
|
|
108
148
|
self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
|
|
149
|
+
# _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
|
|
109
150
|
|
|
110
151
|
@abc.abstractmethod
|
|
111
152
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
@@ -115,51 +156,6 @@ class StoreBase:
|
|
|
115
156
|
def _storage_name(self) -> str:
|
|
116
157
|
"""Return the name of the data store table"""
|
|
117
158
|
|
|
118
|
-
def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
|
|
119
|
-
"""Move tmp media file with given url to Env.media_dir and return new url, or given url if not a tmp_dir file"""
|
|
120
|
-
pxt_tmp_dir = str(Env.get().tmp_dir)
|
|
121
|
-
if file_url is None:
|
|
122
|
-
return None
|
|
123
|
-
parsed = urllib.parse.urlparse(file_url)
|
|
124
|
-
# We should never be passed a local file path here. The "len > 1" ensures that Windows
|
|
125
|
-
# file paths aren't mistaken for URLs with a single-character scheme.
|
|
126
|
-
assert len(parsed.scheme) > 1
|
|
127
|
-
if parsed.scheme != 'file':
|
|
128
|
-
# remote url
|
|
129
|
-
return file_url
|
|
130
|
-
file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
|
131
|
-
if not file_path.startswith(pxt_tmp_dir):
|
|
132
|
-
# not a tmp file
|
|
133
|
-
return file_url
|
|
134
|
-
_, ext = os.path.splitext(file_path)
|
|
135
|
-
new_path = str(MediaStore.prepare_media_path(self.tbl_version.id, col.id, v_min, ext=ext))
|
|
136
|
-
os.rename(file_path, new_path)
|
|
137
|
-
new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(new_path))
|
|
138
|
-
return new_file_url
|
|
139
|
-
|
|
140
|
-
def _move_tmp_media_files(
|
|
141
|
-
self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
|
|
142
|
-
) -> None:
|
|
143
|
-
"""Move tmp media files that we generated to a permanent location"""
|
|
144
|
-
for c in media_cols:
|
|
145
|
-
for table_row in table_rows:
|
|
146
|
-
file_url = table_row[c.store_name()]
|
|
147
|
-
table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
|
|
148
|
-
|
|
149
|
-
def _create_table_row(
|
|
150
|
-
self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
|
|
151
|
-
) -> tuple[dict[str, Any], int]:
|
|
152
|
-
"""Return Tuple[complete table row, # of exceptions] for insert()
|
|
153
|
-
Creates a row that includes the PK columns, with the values from input_row.pk.
|
|
154
|
-
Returns:
|
|
155
|
-
Tuple[complete table row, # of exceptions]
|
|
156
|
-
"""
|
|
157
|
-
table_row, num_excs = row_builder.create_table_row(input_row, exc_col_ids)
|
|
158
|
-
assert len(pk) == len(self._pk_cols)
|
|
159
|
-
for pk_col, pk_val in zip(self._pk_cols, pk):
|
|
160
|
-
table_row[pk_col.name] = pk_val
|
|
161
|
-
return table_row, num_excs
|
|
162
|
-
|
|
163
159
|
def count(self) -> int:
|
|
164
160
|
"""Return the number of rows visible in self.tbl_version"""
|
|
165
161
|
stmt = (
|
|
@@ -173,14 +169,123 @@ class StoreBase:
|
|
|
173
169
|
assert isinstance(result, int)
|
|
174
170
|
return result
|
|
175
171
|
|
|
172
|
+
def _exec_if_not_exists(self, stmt: str, wait_for_table: bool) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Execute a statement containing 'IF NOT EXISTS' and ignore any duplicate object-related errors.
|
|
175
|
+
|
|
176
|
+
The statement needs to run in a separate transaction, because the expected error conditions will abort the
|
|
177
|
+
enclosing transaction (and the ability to run additional statements in that same transaction).
|
|
178
|
+
"""
|
|
179
|
+
while True:
|
|
180
|
+
with Env.get().begin_xact(for_write=True) as conn:
|
|
181
|
+
try:
|
|
182
|
+
if wait_for_table and not Env.get().is_using_cockroachdb:
|
|
183
|
+
# Try to lock the table to make sure that it exists. This needs to run in the same transaction
|
|
184
|
+
# as 'stmt' to avoid a race condition.
|
|
185
|
+
# TODO: adapt this for CockroachDB
|
|
186
|
+
lock_stmt = f'LOCK TABLE {self._storage_name()} IN ACCESS EXCLUSIVE MODE'
|
|
187
|
+
conn.execute(sql.text(lock_stmt))
|
|
188
|
+
conn.execute(sql.text(stmt))
|
|
189
|
+
return
|
|
190
|
+
except (sql.exc.IntegrityError, sql.exc.ProgrammingError) as e:
|
|
191
|
+
Env.get().console_logger.info(f'{stmt} failed with: {e}')
|
|
192
|
+
if (
|
|
193
|
+
isinstance(e.orig, psycopg.errors.UniqueViolation)
|
|
194
|
+
and 'duplicate key value violates unique constraint' in str(e.orig)
|
|
195
|
+
) or (
|
|
196
|
+
isinstance(e.orig, (psycopg.errors.DuplicateObject, psycopg.errors.DuplicateTable))
|
|
197
|
+
and 'already exists' in str(e.orig)
|
|
198
|
+
):
|
|
199
|
+
# table already exists
|
|
200
|
+
return
|
|
201
|
+
elif isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
202
|
+
# the Lock Table failed because the table doesn't exist yet; try again
|
|
203
|
+
time.sleep(1)
|
|
204
|
+
continue
|
|
205
|
+
else:
|
|
206
|
+
raise
|
|
207
|
+
|
|
208
|
+
def _store_tbl_exists(self) -> bool:
|
|
209
|
+
"""Returns True if the store table exists, False otherwise."""
|
|
210
|
+
with Env.get().begin_xact(for_write=False) as conn:
|
|
211
|
+
q = (
|
|
212
|
+
'SELECT COUNT(*) FROM pg_catalog.pg_tables '
|
|
213
|
+
f"WHERE schemaname = 'public' AND tablename = {self._storage_name()!r}"
|
|
214
|
+
)
|
|
215
|
+
res = conn.execute(sql.text(q)).scalar_one()
|
|
216
|
+
return res == 1
|
|
217
|
+
|
|
176
218
|
def create(self) -> None:
|
|
177
|
-
|
|
178
|
-
self.
|
|
219
|
+
"""
|
|
220
|
+
Create or update store table to bring it in sync with self.sa_tbl. Idempotent.
|
|
221
|
+
|
|
222
|
+
This runs a sequence of DDL statements (Create Table, Alter Table Add Column, Create Index), each of which
|
|
223
|
+
is run in its own transaction.
|
|
224
|
+
|
|
225
|
+
The exception to that are local replicas, for which TableRestorer creates an enclosing transaction. In theory,
|
|
226
|
+
this should avoid the potential for race conditions that motivate the error handling present in
|
|
227
|
+
_exec_if_not_exists() (meaning: we shouldn't see those errors when creating local replicas).
|
|
228
|
+
TODO: remove the special case for local replicas in order to make the logic easier to reason about.
|
|
229
|
+
"""
|
|
230
|
+
postgres_dialect = sql.dialects.postgresql.dialect()
|
|
231
|
+
|
|
232
|
+
if not self._store_tbl_exists():
|
|
233
|
+
# run Create Table If Not Exists; we always need If Not Exists to avoid race conditions between concurrent
|
|
234
|
+
# Pixeltable processes
|
|
235
|
+
create_stmt = sql.schema.CreateTable(self.sa_tbl, if_not_exists=True).compile(dialect=postgres_dialect)
|
|
236
|
+
self._exec_if_not_exists(str(create_stmt), wait_for_table=False)
|
|
237
|
+
else:
|
|
238
|
+
# ensure that all columns exist by running Alter Table Add Column If Not Exists for all columns
|
|
239
|
+
for col in self.sa_tbl.columns:
|
|
240
|
+
stmt = self._add_column_stmt(col)
|
|
241
|
+
self._exec_if_not_exists(stmt, wait_for_table=True)
|
|
242
|
+
# TODO: do we also need to ensure that these columns are now visible (ie, is there another potential race
|
|
243
|
+
# condition here?)
|
|
244
|
+
|
|
245
|
+
# ensure that all system indices exist by running Create Index If Not Exists
|
|
246
|
+
for idx in self.sa_tbl.indexes:
|
|
247
|
+
create_idx_stmt = sql.schema.CreateIndex(idx, if_not_exists=True).compile(dialect=postgres_dialect)
|
|
248
|
+
self._exec_if_not_exists(str(create_idx_stmt), wait_for_table=True)
|
|
249
|
+
|
|
250
|
+
# ensure that all visible non-system indices exist by running appropriate create statements
|
|
251
|
+
for id in self.tbl_version.get().idxs:
|
|
252
|
+
self.create_index(id)
|
|
253
|
+
|
|
254
|
+
def create_index(self, idx_id: int) -> None:
|
|
255
|
+
"""Create If Not Exists for this index"""
|
|
256
|
+
idx_info = self.tbl_version.get().idxs[idx_id]
|
|
257
|
+
stmt = idx_info.idx.sa_create_stmt(self.tbl_version.get()._store_idx_name(idx_id), idx_info.val_col.sa_col)
|
|
258
|
+
self._exec_if_not_exists(str(stmt), wait_for_table=True)
|
|
259
|
+
|
|
260
|
+
def validate(self) -> None:
|
|
261
|
+
"""Validate store table against self.table_version"""
|
|
262
|
+
with Env.get().begin_xact() as conn:
|
|
263
|
+
# check that all columns are present
|
|
264
|
+
q = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
|
|
265
|
+
store_col_info = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
|
|
266
|
+
tbl_col_info = {col.store_name() for col in self.tbl_version.get().cols if col.is_stored}
|
|
267
|
+
assert tbl_col_info.issubset(store_col_info)
|
|
268
|
+
|
|
269
|
+
# check that all visible indices are present
|
|
270
|
+
q = f'SELECT indexname FROM pg_indexes WHERE tablename = {self._storage_name()!r}'
|
|
271
|
+
store_idx_names = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
|
|
272
|
+
tbl_index_names = {
|
|
273
|
+
self.tbl_version.get()._store_idx_name(info.id) for info in self.tbl_version.get().idxs.values()
|
|
274
|
+
}
|
|
275
|
+
assert tbl_index_names.issubset(store_idx_names)
|
|
179
276
|
|
|
180
277
|
def drop(self) -> None:
|
|
181
278
|
"""Drop store table"""
|
|
182
279
|
conn = Env.get().conn
|
|
183
|
-
self.
|
|
280
|
+
drop_stmt = f'DROP TABLE IF EXISTS {self._storage_name()}'
|
|
281
|
+
conn.execute(sql.text(drop_stmt))
|
|
282
|
+
|
|
283
|
+
def _add_column_stmt(self, sa_col: sql.Column) -> str:
|
|
284
|
+
col_type_str = sa_col.type.compile(dialect=sql.dialects.postgresql.dialect())
|
|
285
|
+
return (
|
|
286
|
+
f'ALTER TABLE {self._storage_name()} ADD COLUMN IF NOT EXISTS '
|
|
287
|
+
f'{sa_col.name} {col_type_str} {"NOT " if not sa_col.nullable else ""} NULL'
|
|
288
|
+
)
|
|
184
289
|
|
|
185
290
|
def add_column(self, col: catalog.Column) -> None:
|
|
186
291
|
"""Add column(s) to the store-resident table based on a catalog column
|
|
@@ -190,14 +295,13 @@ class StoreBase:
|
|
|
190
295
|
"""
|
|
191
296
|
assert col.is_stored
|
|
192
297
|
conn = Env.get().conn
|
|
193
|
-
col_type_str = col.
|
|
298
|
+
col_type_str = col.sa_col_type.compile(dialect=conn.dialect)
|
|
194
299
|
s_txt = f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL'
|
|
195
300
|
added_storage_cols = [col.store_name()]
|
|
196
|
-
if col.
|
|
197
|
-
|
|
198
|
-
s_txt += f' , ADD COLUMN {col.
|
|
199
|
-
|
|
200
|
-
added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
|
|
301
|
+
if col.stores_cellmd:
|
|
302
|
+
cellmd_type_str = col.sa_cellmd_type().compile(dialect=conn.dialect)
|
|
303
|
+
s_txt += f' , ADD COLUMN {col.cellmd_store_name()} {cellmd_type_str} DEFAULT NULL'
|
|
304
|
+
added_storage_cols.append(col.cellmd_store_name())
|
|
201
305
|
|
|
202
306
|
stmt = sql.text(s_txt)
|
|
203
307
|
log_stmt(_logger, stmt)
|
|
@@ -208,16 +312,13 @@ class StoreBase:
|
|
|
208
312
|
def drop_column(self, col: catalog.Column) -> None:
|
|
209
313
|
"""Execute Alter Table Drop Column statement"""
|
|
210
314
|
s_txt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
|
|
211
|
-
if col.
|
|
212
|
-
s_txt += f' , DROP COLUMN {col.
|
|
213
|
-
s_txt += f' , DROP COLUMN {col.errortype_store_name()}'
|
|
315
|
+
if col.stores_cellmd:
|
|
316
|
+
s_txt += f' , DROP COLUMN {col.cellmd_store_name()}'
|
|
214
317
|
stmt = sql.text(s_txt)
|
|
215
318
|
log_stmt(_logger, stmt)
|
|
216
319
|
Env.get().conn.execute(stmt)
|
|
217
320
|
|
|
218
|
-
def load_column(
|
|
219
|
-
self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, on_error: Literal['abort', 'ignore']
|
|
220
|
-
) -> int:
|
|
321
|
+
def load_column(self, col: catalog.Column, exec_plan: ExecNode, abort_on_exc: bool) -> int:
|
|
221
322
|
"""Update store column of a computed column with values produced by an execution plan
|
|
222
323
|
|
|
223
324
|
Returns:
|
|
@@ -226,84 +327,69 @@ class StoreBase:
|
|
|
226
327
|
sql.exc.DBAPIError if there was a SQL error during execution
|
|
227
328
|
excs.Error if on_error='abort' and there was an exception during row evaluation
|
|
228
329
|
"""
|
|
229
|
-
assert col.
|
|
330
|
+
assert col.get_tbl().id == self.tbl_version.id
|
|
230
331
|
num_excs = 0
|
|
231
332
|
num_rows = 0
|
|
232
333
|
# create temp table to store output of exec_plan, with the same primary key as the store table
|
|
233
334
|
tmp_name = f'temp_{self._storage_name()}'
|
|
234
|
-
tmp_pk_cols =
|
|
235
|
-
tmp_cols = tmp_pk_cols.copy()
|
|
335
|
+
tmp_pk_cols = tuple(sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns())
|
|
236
336
|
tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
|
|
237
|
-
tmp_cols
|
|
337
|
+
tmp_cols = [*tmp_pk_cols, tmp_val_col]
|
|
238
338
|
# add error columns if the store column records errors
|
|
239
|
-
if col.
|
|
240
|
-
|
|
241
|
-
tmp_cols.append(
|
|
242
|
-
|
|
243
|
-
|
|
339
|
+
if col.stores_cellmd:
|
|
340
|
+
tmp_cellmd_col = sql.Column(col.sa_cellmd_col.name, col.sa_cellmd_col.type)
|
|
341
|
+
tmp_cols.append(tmp_cellmd_col)
|
|
342
|
+
tmp_col_names = [col.name for col in tmp_cols]
|
|
343
|
+
|
|
244
344
|
tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
|
|
245
345
|
conn = Env.get().conn
|
|
246
346
|
tmp_tbl.create(bind=conn)
|
|
247
347
|
|
|
348
|
+
row_builder = exec_plan.row_builder
|
|
349
|
+
|
|
248
350
|
try:
|
|
351
|
+
table_rows: list[tuple[Any]] = []
|
|
352
|
+
|
|
249
353
|
# insert rows from exec_plan into temp table
|
|
250
|
-
# TODO: unify the table row construction logic with RowBuilder.create_table_row()
|
|
251
354
|
for row_batch in exec_plan:
|
|
252
355
|
num_rows += len(row_batch)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
tbl_row[col.sa_errortype_col.name] = error_type
|
|
272
|
-
tbl_row[col.sa_errormsg_col.name] = error_msg
|
|
273
|
-
else:
|
|
274
|
-
if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
|
|
275
|
-
# we have yet to store this image
|
|
276
|
-
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
|
|
277
|
-
result_row.flush_img(value_expr_slot_idx, filepath)
|
|
278
|
-
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
279
|
-
if col.col_type.is_media_type():
|
|
280
|
-
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
281
|
-
tbl_row[col.sa_col.name] = val
|
|
282
|
-
if col.records_errors:
|
|
283
|
-
tbl_row[col.sa_errortype_col.name] = None
|
|
284
|
-
tbl_row[col.sa_errormsg_col.name] = None
|
|
285
|
-
|
|
286
|
-
tbl_rows.append(tbl_row)
|
|
287
|
-
conn.execute(sql.insert(tmp_tbl), tbl_rows)
|
|
356
|
+
batch_table_rows: list[tuple[Any]] = []
|
|
357
|
+
|
|
358
|
+
for row in row_batch:
|
|
359
|
+
if abort_on_exc and row.has_exc():
|
|
360
|
+
exc = row.get_first_exc()
|
|
361
|
+
raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
|
|
362
|
+
table_row, num_row_exc = row_builder.create_store_table_row(row, None, row.pk)
|
|
363
|
+
num_excs += num_row_exc
|
|
364
|
+
batch_table_rows.append(tuple(table_row))
|
|
365
|
+
|
|
366
|
+
table_rows.extend(batch_table_rows)
|
|
367
|
+
|
|
368
|
+
if len(table_rows) >= self.__INSERT_BATCH_SIZE:
|
|
369
|
+
self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
|
|
370
|
+
table_rows.clear()
|
|
371
|
+
|
|
372
|
+
if len(table_rows) > 0:
|
|
373
|
+
self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
|
|
288
374
|
|
|
289
375
|
# update store table with values from temp table
|
|
290
376
|
update_stmt = sql.update(self.sa_tbl)
|
|
291
377
|
for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
|
|
292
378
|
update_stmt = update_stmt.where(pk_col == tmp_pk_col)
|
|
293
379
|
update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
|
|
294
|
-
if col.
|
|
295
|
-
update_stmt = update_stmt.values(
|
|
296
|
-
{col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
|
|
297
|
-
)
|
|
380
|
+
if col.stores_cellmd:
|
|
381
|
+
update_stmt = update_stmt.values({col.sa_cellmd_col: tmp_cellmd_col})
|
|
298
382
|
log_explain(_logger, update_stmt, conn)
|
|
299
383
|
conn.execute(update_stmt)
|
|
384
|
+
|
|
300
385
|
finally:
|
|
301
386
|
|
|
302
387
|
def remove_tmp_tbl() -> None:
|
|
303
388
|
self.sa_md.remove(tmp_tbl)
|
|
304
389
|
tmp_tbl.drop(bind=conn)
|
|
305
390
|
|
|
306
|
-
run_cleanup(remove_tmp_tbl, raise_error=
|
|
391
|
+
run_cleanup(remove_tmp_tbl, raise_error=False)
|
|
392
|
+
|
|
307
393
|
return num_excs
|
|
308
394
|
|
|
309
395
|
def insert_rows(
|
|
@@ -311,9 +397,9 @@ class StoreBase:
|
|
|
311
397
|
exec_plan: ExecNode,
|
|
312
398
|
v_min: int,
|
|
313
399
|
show_progress: bool = True,
|
|
314
|
-
rowids:
|
|
400
|
+
rowids: Iterator[int] | None = None,
|
|
315
401
|
abort_on_exc: bool = False,
|
|
316
|
-
) -> tuple[
|
|
402
|
+
) -> tuple[set[int], RowCountStats]:
|
|
317
403
|
"""Insert rows into the store table and update the catalog table's md
|
|
318
404
|
Returns:
|
|
319
405
|
number of inserted rows, number of exceptions, set of column ids that have exceptions
|
|
@@ -323,53 +409,80 @@ class StoreBase:
|
|
|
323
409
|
num_excs = 0
|
|
324
410
|
num_rows = 0
|
|
325
411
|
cols_with_excs: set[int] = set()
|
|
326
|
-
progress_bar:
|
|
412
|
+
progress_bar: tqdm | None = None # create this only after we started executing
|
|
327
413
|
row_builder = exec_plan.row_builder
|
|
328
|
-
|
|
329
|
-
|
|
414
|
+
|
|
415
|
+
store_col_names = row_builder.store_column_names()
|
|
330
416
|
|
|
331
417
|
try:
|
|
418
|
+
table_rows: list[tuple[Any]] = []
|
|
332
419
|
exec_plan.open()
|
|
420
|
+
|
|
333
421
|
for row_batch in exec_plan:
|
|
334
422
|
num_rows += len(row_batch)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
423
|
+
batch_table_rows: list[tuple[Any]] = []
|
|
424
|
+
|
|
425
|
+
# compute batch of rows and convert them into table rows
|
|
426
|
+
for row in row_batch:
|
|
427
|
+
# if abort_on_exc == True, we need to check for media validation exceptions
|
|
428
|
+
if abort_on_exc and row.has_exc():
|
|
429
|
+
exc = row.get_first_exc()
|
|
430
|
+
raise exc
|
|
431
|
+
|
|
432
|
+
rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
|
|
433
|
+
pk = (*rowid, v_min)
|
|
434
|
+
assert len(pk) == len(self._pk_cols)
|
|
435
|
+
table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
|
|
436
|
+
num_excs += num_row_exc
|
|
437
|
+
|
|
438
|
+
if show_progress and Env.get().verbosity >= 1:
|
|
439
|
+
if progress_bar is None:
|
|
440
|
+
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
441
|
+
progress_bar = tqdm(
|
|
442
|
+
desc=f'Inserting rows into `{self.tbl_version.get().name}`',
|
|
443
|
+
unit=' rows',
|
|
444
|
+
ncols=100,
|
|
445
|
+
file=sys.stdout,
|
|
446
|
+
)
|
|
447
|
+
progress_bar.update(1)
|
|
448
|
+
|
|
449
|
+
batch_table_rows.append(tuple(table_row))
|
|
450
|
+
|
|
451
|
+
table_rows.extend(batch_table_rows)
|
|
452
|
+
|
|
453
|
+
# if a batch is ready for insertion into the database, insert it
|
|
454
|
+
if len(table_rows) >= self.__INSERT_BATCH_SIZE:
|
|
455
|
+
self.sql_insert(self.sa_tbl, store_col_names, table_rows)
|
|
456
|
+
table_rows.clear()
|
|
457
|
+
|
|
458
|
+
# insert any remaining rows
|
|
459
|
+
if len(table_rows) > 0:
|
|
460
|
+
self.sql_insert(self.sa_tbl, store_col_names, table_rows)
|
|
461
|
+
|
|
366
462
|
if progress_bar is not None:
|
|
367
463
|
progress_bar.close()
|
|
368
|
-
|
|
464
|
+
computed_values = exec_plan.ctx.num_computed_exprs * num_rows
|
|
465
|
+
row_counts = RowCountStats(ins_rows=num_rows, num_excs=num_excs, computed_values=computed_values)
|
|
466
|
+
|
|
467
|
+
return cols_with_excs, row_counts
|
|
369
468
|
finally:
|
|
370
469
|
exec_plan.close()
|
|
371
470
|
|
|
372
|
-
|
|
471
|
+
@classmethod
|
|
472
|
+
def sql_insert(cls, sa_tbl: sql.Table, store_col_names: list[str], table_rows: list[tuple[Any]]) -> None:
|
|
473
|
+
assert len(table_rows) > 0
|
|
474
|
+
conn = Env.get().conn
|
|
475
|
+
conn.execute(sql.insert(sa_tbl), [dict(zip(store_col_names, table_row)) for table_row in table_rows])
|
|
476
|
+
|
|
477
|
+
# TODO: Inserting directly via psycopg delivers a small performance benefit, but is somewhat fraught due to
|
|
478
|
+
# differences in the data representation that SQLAlchemy/psycopg expect. The below code will do the
|
|
479
|
+
# insertion in psycopg and can be used if/when we decide to pursue that optimization.
|
|
480
|
+
# col_names_str = ", ".join(store_col_names)
|
|
481
|
+
# placeholders_str = ", ".join('%s' for _ in store_col_names)
|
|
482
|
+
# stmt_text = f'INSERT INTO {self.sa_tbl.name} ({col_names_str}) VALUES ({placeholders_str})'
|
|
483
|
+
# conn.exec_driver_sql(stmt_text, table_rows)
|
|
484
|
+
|
|
485
|
+
def _versions_clause(self, versions: list[int | None], match_on_vmin: bool) -> sql.ColumnElement[bool]:
|
|
373
486
|
"""Return filter for base versions"""
|
|
374
487
|
v = versions[0]
|
|
375
488
|
if v is None:
|
|
@@ -387,9 +500,9 @@ class StoreBase:
|
|
|
387
500
|
def delete_rows(
|
|
388
501
|
self,
|
|
389
502
|
current_version: int,
|
|
390
|
-
base_versions: list[
|
|
503
|
+
base_versions: list[int | None],
|
|
391
504
|
match_on_vmin: bool,
|
|
392
|
-
where_clause:
|
|
505
|
+
where_clause: sql.ColumnElement[bool] | None,
|
|
393
506
|
) -> int:
|
|
394
507
|
"""Mark rows as deleted that are live and were created prior to current_version.
|
|
395
508
|
Also: populate the undo columns
|
|
@@ -403,23 +516,23 @@ class StoreBase:
|
|
|
403
516
|
number of deleted rows
|
|
404
517
|
"""
|
|
405
518
|
where_clause = sql.true() if where_clause is None else where_clause
|
|
406
|
-
|
|
407
|
-
self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
|
|
408
|
-
)
|
|
519
|
+
version_clause = sql.and_(self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION)
|
|
409
520
|
rowid_join_clause = self._rowid_join_predicate()
|
|
410
521
|
base_versions_clause = (
|
|
411
522
|
sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
|
|
412
523
|
)
|
|
413
|
-
set_clause: dict[sql.Column,
|
|
524
|
+
set_clause: dict[sql.Column, int | sql.Column] = {self.v_max_col: current_version}
|
|
414
525
|
for index_info in self.tbl_version.get().idxs_by_name.values():
|
|
415
526
|
# copy value column to undo column
|
|
416
527
|
set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
|
|
417
528
|
# set value column to NULL
|
|
418
529
|
set_clause[index_info.val_col.sa_col] = None
|
|
530
|
+
|
|
419
531
|
stmt = (
|
|
420
532
|
sql.update(self.sa_tbl)
|
|
421
533
|
.values(set_clause)
|
|
422
534
|
.where(where_clause)
|
|
535
|
+
.where(version_clause)
|
|
423
536
|
.where(rowid_join_clause)
|
|
424
537
|
.where(base_versions_clause)
|
|
425
538
|
)
|
|
@@ -435,8 +548,7 @@ class StoreBase:
|
|
|
435
548
|
*[c1 == c2 for c1, c2 in zip(self.rowid_columns(), filter_view.rowid_columns())],
|
|
436
549
|
)
|
|
437
550
|
stmt = (
|
|
438
|
-
sql.select(
|
|
439
|
-
.select_from(self.sa_tbl)
|
|
551
|
+
sql.select(self.sa_tbl)
|
|
440
552
|
.where(self.v_min_col <= version)
|
|
441
553
|
.where(self.v_max_col > version)
|
|
442
554
|
.where(sql.exists().where(filter_predicate))
|
|
@@ -500,26 +612,31 @@ class StoreComponentView(StoreView):
|
|
|
500
612
|
PK: now also includes pos, the position returned by the ComponentIterator for the base row identified by base_rowid
|
|
501
613
|
"""
|
|
502
614
|
|
|
503
|
-
rowid_cols: list[sql.Column]
|
|
504
|
-
pos_col: sql.Column
|
|
505
|
-
pos_col_idx: int
|
|
506
|
-
|
|
507
615
|
def __init__(self, catalog_view: catalog.TableVersion):
|
|
508
616
|
super().__init__(catalog_view)
|
|
509
617
|
|
|
510
618
|
def _create_rowid_columns(self) -> list[sql.Column]:
|
|
511
619
|
# each base row is expanded into n view rows
|
|
512
|
-
|
|
620
|
+
rowid_cols = [sql.Column(c.name, c.type) for c in self.base.rowid_columns()]
|
|
513
621
|
# name of pos column: avoid collisions with bases' pos columns
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
def
|
|
520
|
-
|
|
622
|
+
pos_col = sql.Column(f'pos_{len(rowid_cols) - 1}', sql.BigInteger, nullable=False)
|
|
623
|
+
rowid_cols.append(pos_col)
|
|
624
|
+
return rowid_cols
|
|
625
|
+
|
|
626
|
+
@property
|
|
627
|
+
def pos_col(self) -> sql.Column:
|
|
628
|
+
return self.rowid_columns()[-1]
|
|
629
|
+
|
|
630
|
+
@property
|
|
631
|
+
def pos_col_idx(self) -> int:
|
|
632
|
+
return len(self.rowid_columns()) - 1
|
|
633
|
+
|
|
634
|
+
def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
|
|
635
|
+
if tbl_version is None:
|
|
636
|
+
tbl_version = self.tbl_version.get()
|
|
637
|
+
super().create_sa_tbl(tbl_version)
|
|
521
638
|
# we need to fix up the 'pos' column in TableVersion
|
|
522
|
-
|
|
639
|
+
tbl_version.cols_by_name['pos'].sa_col = self.pos_col
|
|
523
640
|
|
|
524
641
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
525
642
|
return sql.and_(
|