pixeltable 0.4.12__py3-none-any.whl → 0.4.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +11 -1
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +179 -63
- pixeltable/catalog/column.py +24 -20
- pixeltable/catalog/table.py +96 -124
- pixeltable/catalog/table_metadata.py +96 -0
- pixeltable/catalog/table_version.py +15 -6
- pixeltable/catalog/view.py +22 -22
- pixeltable/config.py +2 -0
- pixeltable/dataframe.py +3 -2
- pixeltable/env.py +43 -21
- pixeltable/exec/__init__.py +1 -0
- pixeltable/exec/aggregation_node.py +0 -1
- pixeltable/exec/cache_prefetch_node.py +74 -98
- pixeltable/exec/data_row_batch.py +2 -18
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/object_store_save_node.py +299 -0
- pixeltable/exec/sql_node.py +28 -33
- pixeltable/exprs/data_row.py +31 -25
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/row_builder.py +6 -12
- pixeltable/functions/gemini.py +1 -1
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/video.py +5 -6
- pixeltable/globals.py +6 -7
- pixeltable/index/embedding_index.py +5 -8
- pixeltable/io/__init__.py +2 -1
- pixeltable/io/fiftyone.py +1 -1
- pixeltable/io/label_studio.py +4 -5
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/parquet.py +9 -89
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/iterators/audio.py +1 -1
- pixeltable/iterators/document.py +10 -12
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/schema.py +7 -0
- pixeltable/plan.py +26 -1
- pixeltable/share/packager.py +8 -2
- pixeltable/share/publish.py +3 -9
- pixeltable/type_system.py +1 -3
- pixeltable/utils/arrow.py +97 -2
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/object_stores.py +497 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +354 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.14.dist-info}/METADATA +162 -127
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.14.dist-info}/RECORD +53 -47
- pixeltable/utils/media_store.py +0 -248
- pixeltable/utils/s3.py +0 -17
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.14.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.14.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.14.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# ruff: noqa: F401
|
|
2
2
|
|
|
3
3
|
from .__version__ import __version__, __version_tuple__
|
|
4
|
-
from .catalog import
|
|
4
|
+
from .catalog import (
|
|
5
|
+
Column,
|
|
6
|
+
ColumnMetadata,
|
|
7
|
+
IndexMetadata,
|
|
8
|
+
InsertableTable,
|
|
9
|
+
Table,
|
|
10
|
+
TableMetadata,
|
|
11
|
+
UpdateStatus,
|
|
12
|
+
VersionMetadata,
|
|
13
|
+
View,
|
|
14
|
+
)
|
|
5
15
|
from .dataframe import DataFrame
|
|
6
16
|
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
7
17
|
from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -8,7 +8,8 @@ from .insertable_table import InsertableTable
|
|
|
8
8
|
from .named_function import NamedFunction
|
|
9
9
|
from .path import Path
|
|
10
10
|
from .schema_object import SchemaObject
|
|
11
|
-
from .table import
|
|
11
|
+
from .table import Table
|
|
12
|
+
from .table_metadata import ColumnMetadata, IndexMetadata, TableMetadata, VersionMetadata
|
|
12
13
|
from .table_version import TableVersion
|
|
13
14
|
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
14
15
|
from .table_version_path import TableVersionPath
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -14,8 +14,6 @@ import psycopg
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import exceptions as excs
|
|
17
|
-
|
|
18
|
-
# from pixeltable import exceptions as excs, UpdateStatus
|
|
19
17
|
from pixeltable.env import Env
|
|
20
18
|
from pixeltable.iterators import ComponentIterator
|
|
21
19
|
from pixeltable.metadata import schema
|
|
@@ -906,9 +904,9 @@ class Catalog:
|
|
|
906
904
|
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
907
905
|
if (tbl_id, version) not in self._tbls:
|
|
908
906
|
if version is None:
|
|
909
|
-
self._load_tbl(tbl_id)
|
|
907
|
+
return self._load_tbl(tbl_id)
|
|
910
908
|
else:
|
|
911
|
-
self._load_tbl_at_version(tbl_id, version)
|
|
909
|
+
return self._load_tbl_at_version(tbl_id, version)
|
|
912
910
|
return self._tbls.get((tbl_id, version))
|
|
913
911
|
|
|
914
912
|
@retry_loop(for_write=True)
|
|
@@ -1040,23 +1038,18 @@ class Catalog:
|
|
|
1040
1038
|
)
|
|
1041
1039
|
|
|
1042
1040
|
# Ensure that the system directory exists.
|
|
1043
|
-
self.
|
|
1041
|
+
self.__ensure_system_dir_exists()
|
|
1044
1042
|
|
|
1045
1043
|
# Now check to see if this table already exists in the catalog.
|
|
1046
1044
|
existing = self.get_table_by_id(tbl_id)
|
|
1047
1045
|
if existing is not None:
|
|
1048
1046
|
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1049
|
-
if existing_path != path:
|
|
1047
|
+
if existing_path != path and not existing_path.is_system_path:
|
|
1050
1048
|
# It does exist, under a different path from the specified one.
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
)
|
|
1056
|
-
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
1057
|
-
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named)
|
|
1058
|
-
# location.
|
|
1059
|
-
self._move(existing_path, path)
|
|
1049
|
+
raise excs.Error(
|
|
1050
|
+
f'That table has already been replicated as {existing_path!r}.\n'
|
|
1051
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
1052
|
+
)
|
|
1060
1053
|
|
|
1061
1054
|
# Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
|
|
1062
1055
|
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
@@ -1084,14 +1077,31 @@ class Catalog:
|
|
|
1084
1077
|
# the new TableVersion instance. This is necessary because computed columns of descendant tables might
|
|
1085
1078
|
# reference columns of the ancestor table that only exist in the new version.
|
|
1086
1079
|
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
1087
|
-
assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
1088
|
-
replica
|
|
1080
|
+
# assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
1081
|
+
if replica is not None:
|
|
1082
|
+
replica._tbl_version_path.clear_cached_md()
|
|
1089
1083
|
|
|
1090
|
-
#
|
|
1091
|
-
#
|
|
1084
|
+
# Store the metadata for the table being replicated; as before, it could be a new version or a known version.
|
|
1085
|
+
# If it's a new version, then a TableVersion record will be created, unless the table being replicated
|
|
1092
1086
|
# is a pure snapshot.
|
|
1093
1087
|
self.__store_replica_md(path, md[0])
|
|
1094
1088
|
|
|
1089
|
+
# Finally, it's possible that the table already exists in the catalog, but as an anonymous system table that
|
|
1090
|
+
# was hidden the last time we checked (and that just became visible when the replica was imported). In this
|
|
1091
|
+
# case, we need to make the existing table visible by moving it to the specified path.
|
|
1092
|
+
# We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
|
|
1093
|
+
# order to be instantiated as a schema object.
|
|
1094
|
+
existing = self.get_table_by_id(tbl_id)
|
|
1095
|
+
if existing is not None:
|
|
1096
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1097
|
+
if existing_path != path:
|
|
1098
|
+
assert existing_path.is_system_path
|
|
1099
|
+
self._move(existing_path, path)
|
|
1100
|
+
|
|
1101
|
+
def __ensure_system_dir_exists(self) -> Dir:
|
|
1102
|
+
system_path = Path.parse('_system', allow_system_path=True)
|
|
1103
|
+
return self._create_dir(system_path, if_exists=IfExistsParam.IGNORE, parents=False)
|
|
1104
|
+
|
|
1095
1105
|
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
1096
1106
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
1097
1107
|
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
@@ -1104,6 +1114,7 @@ class Catalog:
|
|
|
1104
1114
|
new_tbl_md: Optional[schema.TableMd] = None
|
|
1105
1115
|
new_version_md: Optional[schema.TableVersionMd] = None
|
|
1106
1116
|
new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
|
|
1117
|
+
is_new_tbl_version: bool = False
|
|
1107
1118
|
|
|
1108
1119
|
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
1109
1120
|
# this table. (In particular, if this is a base table, then its table metadata need to be consistent
|
|
@@ -1138,14 +1149,21 @@ class Catalog:
|
|
|
1138
1149
|
existing_version_md_row = conn.execute(q).one_or_none()
|
|
1139
1150
|
if existing_version_md_row is None:
|
|
1140
1151
|
new_version_md = md.version_md
|
|
1152
|
+
is_new_tbl_version = True
|
|
1141
1153
|
else:
|
|
1142
1154
|
existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
|
|
1143
|
-
|
|
1155
|
+
# Validate that the existing metadata are identical to the new metadata, except that their is_fragment
|
|
1156
|
+
# flags may differ.
|
|
1157
|
+
if dataclasses.replace(existing_version_md, is_fragment=md.version_md.is_fragment) != md.version_md:
|
|
1144
1158
|
raise excs.Error(
|
|
1145
1159
|
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
1146
1160
|
'the metadata recorded from a prior replica.\n'
|
|
1147
1161
|
'This is likely due to data corruption in the replicated table.'
|
|
1148
1162
|
)
|
|
1163
|
+
if existing_version_md.is_fragment and not md.version_md.is_fragment:
|
|
1164
|
+
# This version exists in the DB as a fragment, but we're importing a complete copy of the same version;
|
|
1165
|
+
# set the is_fragment flag to False in the DB.
|
|
1166
|
+
new_version_md = md.version_md
|
|
1149
1167
|
|
|
1150
1168
|
# Do the same thing for TableSchemaVersion.
|
|
1151
1169
|
q = (
|
|
@@ -1162,6 +1180,7 @@ class Catalog:
|
|
|
1162
1180
|
existing_schema_version_md = schema.md_from_dict(
|
|
1163
1181
|
schema.TableSchemaVersionMd, existing_schema_version_md_row.md
|
|
1164
1182
|
)
|
|
1183
|
+
# Validate that the existing metadata are identical to the new metadata.
|
|
1165
1184
|
if existing_schema_version_md != md.schema_version_md:
|
|
1166
1185
|
raise excs.Error(
|
|
1167
1186
|
f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
|
|
@@ -1171,7 +1190,7 @@ class Catalog:
|
|
|
1171
1190
|
|
|
1172
1191
|
self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
1173
1192
|
|
|
1174
|
-
if
|
|
1193
|
+
if is_new_tbl_version and not md.is_pure_snapshot:
|
|
1175
1194
|
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
1176
1195
|
TableVersion.create_replica(md)
|
|
1177
1196
|
|
|
@@ -1206,41 +1225,72 @@ class Catalog:
|
|
|
1206
1225
|
|
|
1207
1226
|
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
1208
1227
|
|
|
1209
|
-
def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
|
|
1228
|
+
def _drop_tbl(self, tbl: Table | TableVersionPath, force: bool, is_replace: bool) -> None:
|
|
1210
1229
|
"""
|
|
1211
1230
|
Drop the table (and recursively its views, if force == True).
|
|
1212
1231
|
|
|
1232
|
+
`tbl` can be an instance of `Table` for a user table, or `TableVersionPath` for a hidden (system) table.
|
|
1233
|
+
|
|
1213
1234
|
Locking protocol:
|
|
1214
1235
|
- X-lock base before X-locking any view
|
|
1215
1236
|
- deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
|
|
1216
1237
|
- X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
|
|
1217
1238
|
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
1218
1239
|
"""
|
|
1219
|
-
|
|
1220
|
-
|
|
1240
|
+
is_pure_snapshot: bool
|
|
1241
|
+
if isinstance(tbl, TableVersionPath):
|
|
1242
|
+
tvp = tbl
|
|
1243
|
+
tbl_id = tvp.tbl_id
|
|
1244
|
+
tbl = None
|
|
1245
|
+
is_pure_snapshot = False
|
|
1246
|
+
else:
|
|
1247
|
+
tvp = tbl._tbl_version_path
|
|
1248
|
+
tbl_id = tbl._id
|
|
1249
|
+
is_pure_snapshot = tbl._tbl_version is None
|
|
1250
|
+
|
|
1251
|
+
if tbl is not None:
|
|
1252
|
+
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
1253
|
+
self._acquire_tbl_lock(tbl_id=tbl_id, for_write=True, lock_mutable_tree=False)
|
|
1254
|
+
|
|
1255
|
+
view_ids = self.get_view_ids(tbl_id, for_update=True)
|
|
1256
|
+
is_replica = tvp.is_replica()
|
|
1257
|
+
do_drop = True
|
|
1258
|
+
|
|
1259
|
+
_logger.debug(f'Preparing to drop table {tbl_id} (force={force!r}, is_replica={is_replica}).')
|
|
1221
1260
|
|
|
1222
|
-
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
1223
1261
|
if len(view_ids) > 0:
|
|
1224
|
-
if
|
|
1225
|
-
|
|
1226
|
-
|
|
1262
|
+
if force:
|
|
1263
|
+
# recursively drop views first
|
|
1264
|
+
for view_id in view_ids:
|
|
1265
|
+
view = self.get_table_by_id(view_id)
|
|
1266
|
+
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
1267
|
+
|
|
1268
|
+
elif is_replica:
|
|
1269
|
+
# Dropping a replica with dependents and no 'force': just rename it to be a hidden table;
|
|
1270
|
+
# the actual table will not be dropped.
|
|
1271
|
+
assert tbl is not None # can only occur for a user table
|
|
1272
|
+
system_dir = self.__ensure_system_dir_exists()
|
|
1273
|
+
new_name = f'replica_{tbl_id.hex}'
|
|
1274
|
+
_logger.debug(f'{tbl._path()!r} is a replica with dependents; renaming to {new_name!r}.')
|
|
1275
|
+
tbl._move(new_name, system_dir._id)
|
|
1276
|
+
do_drop = False # don't actually clear the catalog for this table
|
|
1277
|
+
|
|
1278
|
+
else:
|
|
1279
|
+
# It has dependents but is not a replica and no 'force', so it's an error to drop it.
|
|
1280
|
+
assert tbl is not None # can only occur for a user table
|
|
1227
1281
|
msg: str
|
|
1228
1282
|
if is_replace:
|
|
1229
1283
|
msg = (
|
|
1230
|
-
f'{
|
|
1284
|
+
f'{tbl._display_name()} {tbl._path()!r} already exists and has dependents. '
|
|
1231
1285
|
"Use `if_exists='replace_force'` to replace it."
|
|
1232
1286
|
)
|
|
1233
1287
|
else:
|
|
1234
|
-
msg = f'{
|
|
1288
|
+
msg = f'{tbl._display_name()} {tbl._path()!r} has dependents.'
|
|
1235
1289
|
raise excs.Error(msg)
|
|
1236
1290
|
|
|
1237
|
-
for view_id in view_ids:
|
|
1238
|
-
view = self.get_table_by_id(view_id)
|
|
1239
|
-
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
1240
|
-
|
|
1241
1291
|
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
1242
|
-
if isinstance(tbl, View) and
|
|
1243
|
-
base_id =
|
|
1292
|
+
if isinstance(tbl, View) and tvp.is_mutable() and tvp.base.is_mutable():
|
|
1293
|
+
base_id = tvp.base.tbl_id
|
|
1244
1294
|
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
1245
1295
|
base_tv.tbl_md.view_sn += 1
|
|
1246
1296
|
self._modified_tvs.add(base_tv.handle)
|
|
@@ -1251,26 +1301,46 @@ class Catalog:
|
|
|
1251
1301
|
)
|
|
1252
1302
|
assert result.rowcount == 1, result.rowcount
|
|
1253
1303
|
|
|
1254
|
-
if
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1304
|
+
if do_drop:
|
|
1305
|
+
if not is_pure_snapshot:
|
|
1306
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1307
|
+
# has been dropped
|
|
1308
|
+
self._modified_tvs.add(tvp.tbl_version)
|
|
1309
|
+
tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
|
|
1310
|
+
if not is_pure_snapshot:
|
|
1311
|
+
# drop the store table before deleting the Table record
|
|
1312
|
+
tv = tvp.tbl_version.get()
|
|
1313
|
+
tv.drop()
|
|
1314
|
+
|
|
1315
|
+
self.delete_tbl_md(tbl_id)
|
|
1316
|
+
tvp.clear_cached_md()
|
|
1317
|
+
|
|
1318
|
+
assert (
|
|
1319
|
+
is_replica
|
|
1320
|
+
or (tbl_id, None) in self._tbls # non-replica tables must have an entry with effective_version=None
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
# Remove visible Table references (we do this even for a replica that was just renamed).
|
|
1324
|
+
versions = [version for id, version in self._tbls if id == tbl_id]
|
|
1271
1325
|
for version in versions:
|
|
1272
|
-
del self._tbls[
|
|
1273
|
-
|
|
1326
|
+
del self._tbls[tbl_id, version]
|
|
1327
|
+
|
|
1328
|
+
_logger.info(f'Dropped table {tbl_id if tbl is None else repr(tbl._path())}.')
|
|
1329
|
+
|
|
1330
|
+
if (
|
|
1331
|
+
is_replica # if this is a replica,
|
|
1332
|
+
and do_drop # and it was actually dropped (not just renamed),
|
|
1333
|
+
and tvp.base is not None # and it has a base table,
|
|
1334
|
+
):
|
|
1335
|
+
base_tbl = self.get_table_by_id(tvp.base.tbl_id)
|
|
1336
|
+
base_tbl_path = None if base_tbl is None else Path.parse(base_tbl._path(), allow_system_path=True)
|
|
1337
|
+
if (
|
|
1338
|
+
(base_tbl_path is None or base_tbl_path.is_system_path) # and the base table is hidden,
|
|
1339
|
+
and len(self.get_view_ids(tvp.base.tbl_id, for_update=True)) == 0 # and has no other dependents,
|
|
1340
|
+
):
|
|
1341
|
+
# then drop the base table as well (possibly recursively).
|
|
1342
|
+
_logger.debug(f'Dropping hidden base table {tvp.base.tbl_id} of dropped replica {tbl_id}.')
|
|
1343
|
+
self._drop_tbl(tvp.base, force=False, is_replace=False)
|
|
1274
1344
|
|
|
1275
1345
|
@retry_loop(for_write=True)
|
|
1276
1346
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
@@ -1456,7 +1526,7 @@ class Catalog:
|
|
|
1456
1526
|
row = conn.execute(q).one_or_none()
|
|
1457
1527
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
1458
1528
|
|
|
1459
|
-
def _load_tbl(self, tbl_id: UUID) ->
|
|
1529
|
+
def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
|
|
1460
1530
|
"""Loads metadata for the table with the given id and caches it."""
|
|
1461
1531
|
_logger.info(f'Loading table {tbl_id}')
|
|
1462
1532
|
from .insertable_table import InsertableTable
|
|
@@ -1470,7 +1540,7 @@ class Catalog:
|
|
|
1470
1540
|
if has_pending_ops:
|
|
1471
1541
|
raise PendingTableOpsError(tbl_id)
|
|
1472
1542
|
|
|
1473
|
-
q = (
|
|
1543
|
+
q: sql.Executable = (
|
|
1474
1544
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1475
1545
|
.join(schema.TableSchemaVersion)
|
|
1476
1546
|
.where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
|
|
@@ -1486,13 +1556,34 @@ class Catalog:
|
|
|
1486
1556
|
|
|
1487
1557
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1488
1558
|
view_md = tbl_md.view_md
|
|
1559
|
+
|
|
1560
|
+
if tbl_md.is_replica and not tbl_md.is_snapshot:
|
|
1561
|
+
# If this is a non-snapshot replica, we have to load it as a specific version handle. This is because:
|
|
1562
|
+
# (1) the head version might be a version fragment that isn't user-accessible, and
|
|
1563
|
+
# (2) the cached data in view_md.base_versions is not reliable, since the replicated version does not
|
|
1564
|
+
# necessarily track the head version of the originally shared table.
|
|
1565
|
+
|
|
1566
|
+
# Query for the latest non-fragment table version.
|
|
1567
|
+
q = (
|
|
1568
|
+
sql.select(schema.TableVersion.version)
|
|
1569
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1570
|
+
.where(schema.TableVersion.md['is_fragment'].astext == 'false')
|
|
1571
|
+
.order_by(schema.TableVersion.md['version'].cast(sql.Integer).desc())
|
|
1572
|
+
.limit(1)
|
|
1573
|
+
)
|
|
1574
|
+
row = conn.execute(q).one_or_none()
|
|
1575
|
+
if row is not None:
|
|
1576
|
+
version = row[0]
|
|
1577
|
+
return self._load_tbl_at_version(tbl_id, version)
|
|
1578
|
+
return None
|
|
1579
|
+
|
|
1489
1580
|
if view_md is None and not tbl_md.is_replica:
|
|
1490
|
-
# this is a base table
|
|
1581
|
+
# this is a base, non-replica table
|
|
1491
1582
|
if (tbl_id, None) not in self._tbl_versions:
|
|
1492
1583
|
_ = self._load_tbl_version(tbl_id, None)
|
|
1493
1584
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
1494
1585
|
self._tbls[tbl_id, None] = tbl
|
|
1495
|
-
return
|
|
1586
|
+
return tbl
|
|
1496
1587
|
|
|
1497
1588
|
# this is a view; determine the sequence of TableVersions to load
|
|
1498
1589
|
tbl_version_path: list[tuple[UUID, Optional[int]]] = []
|
|
@@ -1517,8 +1608,9 @@ class Catalog:
|
|
|
1517
1608
|
base_path = view_path
|
|
1518
1609
|
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1519
1610
|
self._tbls[tbl_id, None] = view
|
|
1611
|
+
return view
|
|
1520
1612
|
|
|
1521
|
-
def _load_tbl_at_version(self, tbl_id: UUID, version: int) ->
|
|
1613
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Optional[Table]:
|
|
1522
1614
|
from .view import View
|
|
1523
1615
|
|
|
1524
1616
|
# Load the specified TableMd and TableVersionMd records from the db.
|
|
@@ -1578,6 +1670,7 @@ class Catalog:
|
|
|
1578
1670
|
|
|
1579
1671
|
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1580
1672
|
self._tbls[tbl_id, version] = view
|
|
1673
|
+
return view
|
|
1581
1674
|
|
|
1582
1675
|
@retry_loop(for_write=False)
|
|
1583
1676
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
@@ -1724,10 +1817,29 @@ class Catalog:
|
|
|
1724
1817
|
assert version_md.tbl_id == str(tbl_id)
|
|
1725
1818
|
if schema_version_md is not None:
|
|
1726
1819
|
assert version_md.schema_version == schema_version_md.schema_version
|
|
1727
|
-
|
|
1728
|
-
|
|
1820
|
+
tv_rows = (
|
|
1821
|
+
session.query(schema.TableVersion)
|
|
1822
|
+
.filter(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
1823
|
+
.all()
|
|
1729
1824
|
)
|
|
1730
|
-
|
|
1825
|
+
if len(tv_rows) == 0:
|
|
1826
|
+
# It's a new table version; insert a new record in the DB for it.
|
|
1827
|
+
tbl_version_record = schema.TableVersion(
|
|
1828
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1829
|
+
)
|
|
1830
|
+
session.add(tbl_version_record)
|
|
1831
|
+
else:
|
|
1832
|
+
# This table version already exists; update it.
|
|
1833
|
+
assert len(tv_rows) == 1 # must be unique
|
|
1834
|
+
tv = tv_rows[0]
|
|
1835
|
+
# Validate that the only field that can change is 'is_fragment'.
|
|
1836
|
+
assert tv.md == dataclasses.asdict(dataclasses.replace(version_md, is_fragment=tv.md['is_fragment']))
|
|
1837
|
+
result = session.execute(
|
|
1838
|
+
sql.update(schema.TableVersion.__table__)
|
|
1839
|
+
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
1840
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
1841
|
+
)
|
|
1842
|
+
assert result.rowcount == 1, result.rowcount
|
|
1731
1843
|
|
|
1732
1844
|
# Construct and insert a new schema version record if requested.
|
|
1733
1845
|
if schema_version_md is not None:
|
|
@@ -1796,6 +1908,10 @@ class Catalog:
|
|
|
1796
1908
|
# destination catalog.
|
|
1797
1909
|
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
1798
1910
|
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
1911
|
+
# Also, the table version of every proper ancestor is emphemeral; it does not represent a queryable
|
|
1912
|
+
# table version (the data might be incomplete, since we have only retrieved one of its views, not
|
|
1913
|
+
# the table itself).
|
|
1914
|
+
ancestor_md.version_md.is_fragment = True
|
|
1799
1915
|
|
|
1800
1916
|
return md
|
|
1801
1917
|
|
pixeltable/catalog/column.py
CHANGED
|
@@ -27,6 +27,25 @@ class Column:
|
|
|
27
27
|
|
|
28
28
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
29
29
|
table/view.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
name: column name; None for system columns (eg, index columns)
|
|
33
|
+
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
34
|
+
computed_with: an Expr that computes the column value
|
|
35
|
+
is_pk: if True, this column is part of the primary key
|
|
36
|
+
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
37
|
+
destination: An object store reference for persisting computed files
|
|
38
|
+
col_id: column ID (only used internally)
|
|
39
|
+
|
|
40
|
+
Computed columns: those have a non-None ``computed_with`` argument
|
|
41
|
+
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
42
|
+
col_type is None
|
|
43
|
+
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
44
|
+
|
|
45
|
+
``stored`` (only valid for computed columns):
|
|
46
|
+
- if True: the column is present in the stored table
|
|
47
|
+
- if False: the column is not present in the stored table and recomputed during a query
|
|
48
|
+
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
30
49
|
"""
|
|
31
50
|
|
|
32
51
|
name: str
|
|
@@ -34,6 +53,7 @@ class Column:
|
|
|
34
53
|
col_type: ts.ColumnType
|
|
35
54
|
stored: bool
|
|
36
55
|
is_pk: bool
|
|
56
|
+
destination: Optional[str] # An object store reference for computed files
|
|
37
57
|
_media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
|
|
38
58
|
schema_version_add: Optional[int]
|
|
39
59
|
schema_version_drop: Optional[int]
|
|
@@ -62,27 +82,8 @@ class Column:
|
|
|
62
82
|
stores_cellmd: Optional[bool] = None,
|
|
63
83
|
value_expr_dict: Optional[dict[str, Any]] = None,
|
|
64
84
|
tbl: Optional[TableVersion] = None,
|
|
85
|
+
destination: Optional[str] = None,
|
|
65
86
|
):
|
|
66
|
-
"""Column constructor.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
name: column name; None for system columns (eg, index columns)
|
|
70
|
-
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
71
|
-
computed_with: an Expr that computes the column value
|
|
72
|
-
is_pk: if True, this column is part of the primary key
|
|
73
|
-
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
74
|
-
col_id: column ID (only used internally)
|
|
75
|
-
|
|
76
|
-
Computed columns: those have a non-None ``computed_with`` argument
|
|
77
|
-
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
78
|
-
col_type is None
|
|
79
|
-
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
80
|
-
|
|
81
|
-
``stored`` (only valid for computed columns):
|
|
82
|
-
- if True: the column is present in the stored table
|
|
83
|
-
- if False: the column is not present in the stored table and recomputed during a query
|
|
84
|
-
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
85
|
-
"""
|
|
86
87
|
if name is not None and not is_valid_identifier(name):
|
|
87
88
|
raise excs.Error(f"Invalid column name: '{name}'")
|
|
88
89
|
self.name = name
|
|
@@ -126,6 +127,7 @@ class Column:
|
|
|
126
127
|
|
|
127
128
|
# computed cols also have storage columns for the exception string and type
|
|
128
129
|
self.sa_cellmd_col = None
|
|
130
|
+
self.destination = destination
|
|
129
131
|
|
|
130
132
|
def to_md(self, pos: Optional[int] = None) -> tuple[schema.ColumnMd, Optional[schema.SchemaColumn]]:
|
|
131
133
|
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
@@ -138,6 +140,7 @@ class Column:
|
|
|
138
140
|
schema_version_drop=self.schema_version_drop,
|
|
139
141
|
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
140
142
|
stored=self.stored,
|
|
143
|
+
destination=self.destination,
|
|
141
144
|
)
|
|
142
145
|
if pos is None:
|
|
143
146
|
return col_md, None
|
|
@@ -172,6 +175,7 @@ class Column:
|
|
|
172
175
|
schema_version_drop=col_md.schema_version_drop,
|
|
173
176
|
value_expr_dict=col_md.value_expr,
|
|
174
177
|
tbl=tbl,
|
|
178
|
+
destination=col_md.destination,
|
|
175
179
|
)
|
|
176
180
|
return col
|
|
177
181
|
|