pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +3 -10
- pixeltable/catalog/catalog.py +139 -59
- pixeltable/catalog/column.py +32 -23
- pixeltable/catalog/globals.py +2 -45
- pixeltable/catalog/insertable_table.py +5 -2
- pixeltable/catalog/path.py +6 -0
- pixeltable/catalog/table.py +173 -23
- pixeltable/catalog/table_version.py +156 -92
- pixeltable/catalog/table_version_handle.py +26 -1
- pixeltable/catalog/update_status.py +179 -0
- pixeltable/catalog/view.py +12 -3
- pixeltable/config.py +76 -12
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +29 -0
- pixeltable/exec/exec_node.py +7 -24
- pixeltable/exec/expr_eval/schedulers.py +134 -7
- pixeltable/exprs/column_property_ref.py +23 -20
- pixeltable/exprs/column_ref.py +24 -18
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/function_call.py +2 -2
- pixeltable/exprs/row_builder.py +46 -14
- pixeltable/exprs/rowid_ref.py +0 -4
- pixeltable/func/function.py +3 -3
- pixeltable/functions/audio.py +36 -9
- pixeltable/functions/video.py +57 -10
- pixeltable/globals.py +61 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +39 -64
- pixeltable/io/globals.py +4 -4
- pixeltable/io/hf_datasets.py +10 -2
- pixeltable/io/label_studio.py +52 -48
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +125 -0
- pixeltable/metadata/converters/util.py +3 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +14 -2
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +26 -18
- pixeltable/share/packager.py +20 -38
- pixeltable/store.py +121 -142
- pixeltable/type_system.py +2 -2
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/media_store.py +39 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.4.
|
|
3
|
-
__version_tuple__ = (0, 4,
|
|
2
|
+
__version__ = '0.4.3'
|
|
3
|
+
__version_tuple__ = (0, 4, 3)
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -3,21 +3,14 @@
|
|
|
3
3
|
from .catalog import Catalog
|
|
4
4
|
from .column import Column
|
|
5
5
|
from .dir import Dir
|
|
6
|
-
from .globals import
|
|
7
|
-
IfExistsParam,
|
|
8
|
-
IfNotExistsParam,
|
|
9
|
-
MediaValidation,
|
|
10
|
-
QColumnId,
|
|
11
|
-
UpdateStatus,
|
|
12
|
-
is_valid_identifier,
|
|
13
|
-
is_valid_path,
|
|
14
|
-
)
|
|
6
|
+
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId, is_valid_identifier, is_valid_path
|
|
15
7
|
from .insertable_table import InsertableTable
|
|
16
8
|
from .named_function import NamedFunction
|
|
17
9
|
from .path import Path
|
|
18
10
|
from .schema_object import SchemaObject
|
|
19
11
|
from .table import Table
|
|
20
12
|
from .table_version import TableVersion
|
|
21
|
-
from .table_version_handle import TableVersionHandle
|
|
13
|
+
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
22
14
|
from .table_version_path import TableVersionPath
|
|
15
|
+
from .update_status import RowCountStats, UpdateStatus
|
|
23
16
|
from .view import View
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -308,7 +308,11 @@ class Catalog:
|
|
|
308
308
|
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
309
|
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
|
|
310
310
|
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
|
-
raise excs.Error(
|
|
311
|
+
raise excs.Error(
|
|
312
|
+
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
313
|
+
'operation that was run on a different process.\n'
|
|
314
|
+
'Please re-run the operation.'
|
|
315
|
+
) from None
|
|
312
316
|
else:
|
|
313
317
|
raise
|
|
314
318
|
|
|
@@ -762,56 +766,47 @@ class Catalog:
|
|
|
762
766
|
self._tbls[view._id] = view
|
|
763
767
|
return view
|
|
764
768
|
|
|
765
|
-
|
|
766
|
-
def create_replica(
|
|
767
|
-
self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
|
|
768
|
-
) -> None:
|
|
769
|
+
def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
|
|
769
770
|
"""
|
|
770
771
|
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
771
772
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
772
773
|
list position 0 and the (root) base table at list position -1.
|
|
773
|
-
|
|
774
|
-
TODO: create_replica() also needs to create the store tables and populate them in order to make
|
|
775
|
-
replica creation atomic.
|
|
776
774
|
"""
|
|
775
|
+
assert Env.get().in_xact
|
|
776
|
+
|
|
777
777
|
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
778
778
|
|
|
779
|
-
|
|
780
|
-
existing
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
'but a different table already exists at that location.'
|
|
786
|
-
)
|
|
787
|
-
assert isinstance(existing, View)
|
|
788
|
-
return
|
|
779
|
+
existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
|
|
780
|
+
if existing is not None and existing._id != tbl_id:
|
|
781
|
+
raise excs.Error(
|
|
782
|
+
f'An attempt was made to create a replica table at {path!r}, '
|
|
783
|
+
'but a different table already exists at that location.'
|
|
784
|
+
)
|
|
789
785
|
|
|
790
786
|
# Ensure that the system directory exists.
|
|
791
787
|
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
792
788
|
|
|
793
|
-
# Now check to see if this table already exists in the catalog.
|
|
789
|
+
# Now check to see if this table UUID already exists in the catalog.
|
|
794
790
|
existing = Catalog.get().get_table_by_id(tbl_id)
|
|
795
791
|
if existing is not None:
|
|
796
792
|
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
# into a named location), this will be a no-op, but it still serves to validate that the newly received
|
|
809
|
-
# metadata is identical to what's in the catalog.
|
|
810
|
-
self.__store_replica_md(path, md[0])
|
|
793
|
+
if existing_path != path:
|
|
794
|
+
# It does exist, under a different path from the specified one.
|
|
795
|
+
if not existing_path.is_system_path:
|
|
796
|
+
raise excs.Error(
|
|
797
|
+
f'That table has already been replicated as {existing_path!r}.\n'
|
|
798
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
799
|
+
)
|
|
800
|
+
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
801
|
+
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named)
|
|
802
|
+
# location.
|
|
803
|
+
self._move(existing_path, path)
|
|
811
804
|
|
|
812
|
-
# Now store the metadata for
|
|
805
|
+
# Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
|
|
813
806
|
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
814
|
-
|
|
807
|
+
# We instantiate the ancestors starting with the base table and ending with the immediate parent of the
|
|
808
|
+
# table being replicated.
|
|
809
|
+
for ancestor_md in md[:0:-1]:
|
|
815
810
|
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
816
811
|
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
817
812
|
replica_path: Path
|
|
@@ -824,12 +819,22 @@ class Catalog:
|
|
|
824
819
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
825
820
|
replica_path = Path(replica._path(), allow_system_paths=True)
|
|
826
821
|
|
|
827
|
-
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
828
|
-
#
|
|
822
|
+
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
823
|
+
# version (in which case the newly received metadata will be validated as identical).
|
|
824
|
+
# If it's a new version, this will result in a new TableVersion record being created.
|
|
829
825
|
self.__store_replica_md(replica_path, ancestor_md)
|
|
830
826
|
|
|
831
|
-
|
|
832
|
-
|
|
827
|
+
# Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
|
|
828
|
+
# the new TableVersion instance. This is necessary because computed columns of descendant tables might
|
|
829
|
+
# reference columns of the ancestor table that only exist in the new version.
|
|
830
|
+
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
831
|
+
assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
832
|
+
replica._tbl_version_path.clear_cached_md()
|
|
833
|
+
|
|
834
|
+
# Finally, store the metadata for the table being replicated; as before, it could be a new version or a known
|
|
835
|
+
# version. If it's a new version, then a TableVersion record will be created, unless the table being replicated
|
|
836
|
+
# is a pure snapshot.
|
|
837
|
+
self.__store_replica_md(path, md[0])
|
|
833
838
|
|
|
834
839
|
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
835
840
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
@@ -911,12 +916,19 @@ class Catalog:
|
|
|
911
916
|
'This is likely due to data corruption in the replicated table.'
|
|
912
917
|
)
|
|
913
918
|
|
|
914
|
-
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
919
|
+
self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
920
|
+
|
|
921
|
+
if new_version_md is not None and not md.is_pure_snapshot:
|
|
922
|
+
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
923
|
+
TableVersion.create_replica(md)
|
|
915
924
|
|
|
916
925
|
@_retry_loop(for_write=False)
|
|
917
926
|
def get_table(self, path: Path) -> Table:
|
|
918
927
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
919
928
|
assert isinstance(obj, Table)
|
|
929
|
+
# We need to clear cached metadata from tbl_version_path, in case the schema has been changed
|
|
930
|
+
# by another process.
|
|
931
|
+
obj._tbl_version_path.clear_cached_md()
|
|
920
932
|
return obj
|
|
921
933
|
|
|
922
934
|
@_retry_loop(for_write=True)
|
|
@@ -1228,6 +1240,43 @@ class Catalog:
|
|
|
1228
1240
|
self._tbls[tbl_id] = view
|
|
1229
1241
|
return view
|
|
1230
1242
|
|
|
1243
|
+
@_retry_loop(for_write=False)
|
|
1244
|
+
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1245
|
+
"""
|
|
1246
|
+
Returns the history of up to n versions of the table with the given UUID.
|
|
1247
|
+
|
|
1248
|
+
Args:
|
|
1249
|
+
tbl_id: the UUID of the table to collect history for.
|
|
1250
|
+
n: Optional limit on the maximum number of versions returned.
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
A sequence of rows, ordered by version number
|
|
1254
|
+
Each row contains a TableVersion and a TableSchemaVersion object.
|
|
1255
|
+
"""
|
|
1256
|
+
q = (
|
|
1257
|
+
sql.select(schema.TableVersion, schema.TableSchemaVersion)
|
|
1258
|
+
.select_from(schema.TableVersion)
|
|
1259
|
+
.join(
|
|
1260
|
+
schema.TableSchemaVersion,
|
|
1261
|
+
sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
|
|
1262
|
+
== schema.TableSchemaVersion.schema_version,
|
|
1263
|
+
)
|
|
1264
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1265
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1266
|
+
.order_by(schema.TableVersion.version.desc())
|
|
1267
|
+
)
|
|
1268
|
+
if n is not None:
|
|
1269
|
+
q = q.limit(n)
|
|
1270
|
+
src_rows = Env.get().session.execute(q).fetchall()
|
|
1271
|
+
return [
|
|
1272
|
+
schema.FullTableMd(
|
|
1273
|
+
None,
|
|
1274
|
+
schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1275
|
+
schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1276
|
+
)
|
|
1277
|
+
for row in src_rows
|
|
1278
|
+
]
|
|
1279
|
+
|
|
1231
1280
|
def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
|
|
1232
1281
|
"""
|
|
1233
1282
|
Loads metadata from the store for a given table UUID and version.
|
|
@@ -1297,19 +1346,27 @@ class Catalog:
|
|
|
1297
1346
|
def store_tbl_md(
|
|
1298
1347
|
self,
|
|
1299
1348
|
tbl_id: UUID,
|
|
1349
|
+
dir_id: Optional[UUID],
|
|
1300
1350
|
tbl_md: Optional[schema.TableMd],
|
|
1301
1351
|
version_md: Optional[schema.TableVersionMd],
|
|
1302
1352
|
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
1303
1353
|
) -> None:
|
|
1304
1354
|
"""
|
|
1305
|
-
Stores metadata to the DB.
|
|
1306
|
-
|
|
1355
|
+
Stores metadata to the DB.
|
|
1356
|
+
|
|
1357
|
+
Args:
|
|
1358
|
+
tbl_id: UUID of the table to store metadata for.
|
|
1359
|
+
dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
|
|
1360
|
+
tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
|
|
1361
|
+
version_md: inserted as a new record if present
|
|
1362
|
+
schema_version_md: will be inserted as a new record if present
|
|
1307
1363
|
|
|
1308
1364
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1309
1365
|
"""
|
|
1310
|
-
conn = Env.get().conn
|
|
1311
1366
|
assert self._in_write_xact
|
|
1367
|
+
session = Env.get().session
|
|
1312
1368
|
|
|
1369
|
+
# Construct and insert or update table record if requested.
|
|
1313
1370
|
if tbl_md is not None:
|
|
1314
1371
|
assert tbl_md.tbl_id == str(tbl_id)
|
|
1315
1372
|
if version_md is not None:
|
|
@@ -1317,32 +1374,55 @@ class Catalog:
|
|
|
1317
1374
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1318
1375
|
if schema_version_md is not None:
|
|
1319
1376
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
.
|
|
1324
|
-
|
|
1325
|
-
|
|
1377
|
+
if dir_id is not None:
|
|
1378
|
+
# We are inserting a record while creating a new table.
|
|
1379
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
1380
|
+
session.add(tbl_record)
|
|
1381
|
+
else:
|
|
1382
|
+
# Update the existing table record.
|
|
1383
|
+
result = session.execute(
|
|
1384
|
+
sql.update(schema.Table.__table__)
|
|
1385
|
+
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
1386
|
+
.where(schema.Table.id == tbl_id)
|
|
1387
|
+
)
|
|
1388
|
+
assert result.rowcount == 1, result.rowcount
|
|
1326
1389
|
|
|
1390
|
+
# Construct and insert new table version record if requested.
|
|
1327
1391
|
if version_md is not None:
|
|
1328
1392
|
assert version_md.tbl_id == str(tbl_id)
|
|
1329
1393
|
if schema_version_md is not None:
|
|
1330
1394
|
assert version_md.schema_version == schema_version_md.schema_version
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1334
|
-
)
|
|
1395
|
+
tbl_version_record = schema.TableVersion(
|
|
1396
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1335
1397
|
)
|
|
1398
|
+
session.add(tbl_version_record)
|
|
1336
1399
|
|
|
1400
|
+
# Construct and insert a new schema version record if requested.
|
|
1337
1401
|
if schema_version_md is not None:
|
|
1338
1402
|
assert schema_version_md.tbl_id == str(tbl_id)
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
tbl_id=tbl_id,
|
|
1342
|
-
schema_version=schema_version_md.schema_version,
|
|
1343
|
-
md=dataclasses.asdict(schema_version_md),
|
|
1344
|
-
)
|
|
1403
|
+
schema_version_record = schema.TableSchemaVersion(
|
|
1404
|
+
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1345
1405
|
)
|
|
1406
|
+
session.add(schema_version_record)
|
|
1407
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1408
|
+
|
|
1409
|
+
def update_tbl_version_md(self, version_md: Optional[schema.TableVersionMd]) -> None:
|
|
1410
|
+
"""
|
|
1411
|
+
Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
|
|
1412
|
+
|
|
1413
|
+
Args:
|
|
1414
|
+
version_md: TableVersionMd
|
|
1415
|
+
"""
|
|
1416
|
+
assert self._in_write_xact
|
|
1417
|
+
session = Env.get().session
|
|
1418
|
+
|
|
1419
|
+
session.execute(
|
|
1420
|
+
sql.update(schema.TableVersion.__table__)
|
|
1421
|
+
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
1422
|
+
.where(schema.TableVersion.tbl_id == version_md.tbl_id, schema.TableVersion.version == version_md.version)
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1346
1426
|
|
|
1347
1427
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1348
1428
|
"""
|
pixeltable/catalog/column.py
CHANGED
|
@@ -15,6 +15,7 @@ from .globals import MediaValidation, is_valid_identifier
|
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from .table_version import TableVersion
|
|
18
|
+
from .table_version_handle import ColumnHandle
|
|
18
19
|
from .table_version_path import TableVersionPath
|
|
19
20
|
|
|
20
21
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -35,11 +36,10 @@ class Column:
|
|
|
35
36
|
_media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
|
|
36
37
|
schema_version_add: Optional[int]
|
|
37
38
|
schema_version_drop: Optional[int]
|
|
38
|
-
|
|
39
|
+
_stores_cellmd: Optional[bool]
|
|
39
40
|
sa_col: Optional[sql.schema.Column]
|
|
40
41
|
sa_col_type: Optional[sql.sqltypes.TypeEngine]
|
|
41
|
-
|
|
42
|
-
sa_errortype_col: Optional[sql.schema.Column]
|
|
42
|
+
sa_cellmd_col: Optional[sql.schema.Column] # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
43
43
|
_value_expr: Optional[exprs.Expr]
|
|
44
44
|
value_expr_dict: Optional[dict[str, Any]]
|
|
45
45
|
# we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
|
|
@@ -58,7 +58,7 @@ class Column:
|
|
|
58
58
|
schema_version_add: Optional[int] = None,
|
|
59
59
|
schema_version_drop: Optional[int] = None,
|
|
60
60
|
sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
61
|
-
|
|
61
|
+
stores_cellmd: Optional[bool] = None,
|
|
62
62
|
value_expr_dict: Optional[dict[str, Any]] = None,
|
|
63
63
|
tbl: Optional[TableVersion] = None,
|
|
64
64
|
):
|
|
@@ -117,15 +117,14 @@ class Column:
|
|
|
117
117
|
self.schema_version_add = schema_version_add
|
|
118
118
|
self.schema_version_drop = schema_version_drop
|
|
119
119
|
|
|
120
|
-
self.
|
|
120
|
+
self._stores_cellmd = stores_cellmd
|
|
121
121
|
|
|
122
122
|
# column in the stored table for the values of this Column
|
|
123
123
|
self.sa_col = None
|
|
124
124
|
self.sa_col_type = sa_col_type
|
|
125
125
|
|
|
126
126
|
# computed cols also have storage columns for the exception string and type
|
|
127
|
-
self.
|
|
128
|
-
self.sa_errortype_col = None
|
|
127
|
+
self.sa_cellmd_col = None
|
|
129
128
|
|
|
130
129
|
def init_value_expr(self) -> None:
|
|
131
130
|
from pixeltable import exprs
|
|
@@ -148,6 +147,15 @@ class Column:
|
|
|
148
147
|
)
|
|
149
148
|
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
150
149
|
|
|
150
|
+
@property
|
|
151
|
+
def handle(self) -> 'ColumnHandle':
|
|
152
|
+
"""Returns a ColumnHandle for this Column."""
|
|
153
|
+
from .table_version_handle import ColumnHandle
|
|
154
|
+
|
|
155
|
+
assert self.tbl is not None
|
|
156
|
+
assert self.id is not None
|
|
157
|
+
return ColumnHandle(self.tbl.handle, self.id)
|
|
158
|
+
|
|
151
159
|
@property
|
|
152
160
|
def value_expr(self) -> Optional[exprs.Expr]:
|
|
153
161
|
assert self.value_expr_dict is None or self._value_expr is not None
|
|
@@ -193,11 +201,11 @@ class Column:
|
|
|
193
201
|
return self.stored
|
|
194
202
|
|
|
195
203
|
@property
|
|
196
|
-
def
|
|
204
|
+
def stores_cellmd(self) -> bool:
|
|
197
205
|
"""True if this column also stores error information."""
|
|
198
206
|
# default: record errors for computed and media columns
|
|
199
|
-
if self.
|
|
200
|
-
return self.
|
|
207
|
+
if self._stores_cellmd is not None:
|
|
208
|
+
return self._stores_cellmd
|
|
201
209
|
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
202
210
|
|
|
203
211
|
@property
|
|
@@ -233,28 +241,29 @@ class Column:
|
|
|
233
241
|
"""
|
|
234
242
|
assert self.is_stored
|
|
235
243
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
236
|
-
self.sa_col = sql.Column(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
nullable=True
|
|
240
|
-
)
|
|
241
|
-
if self.is_computed or self.col_type.is_media_type():
|
|
242
|
-
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
243
|
-
self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
244
|
+
self.sa_col = sql.Column(self.store_name(), self.get_sa_col_type(), nullable=True)
|
|
245
|
+
if self.stores_cellmd:
|
|
246
|
+
# JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
247
|
+
self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
|
|
244
248
|
|
|
245
249
|
def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
|
|
246
250
|
return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
|
|
247
251
|
|
|
252
|
+
@classmethod
|
|
253
|
+
def cellmd_type(cls) -> ts.ColumnType:
|
|
254
|
+
return ts.JsonType(nullable=True)
|
|
255
|
+
|
|
256
|
+
@classmethod
|
|
257
|
+
def sa_cellmd_type(cls) -> sql.sqltypes.TypeEngine:
|
|
258
|
+
return cls.cellmd_type().to_sa_type()
|
|
259
|
+
|
|
248
260
|
def store_name(self) -> str:
|
|
249
261
|
assert self.id is not None
|
|
250
262
|
assert self.is_stored
|
|
251
263
|
return f'col_{self.id}'
|
|
252
264
|
|
|
253
|
-
def
|
|
254
|
-
return f'{self.store_name()}
|
|
255
|
-
|
|
256
|
-
def errortype_store_name(self) -> str:
|
|
257
|
-
return f'{self.store_name()}_errortype'
|
|
265
|
+
def cellmd_store_name(self) -> str:
|
|
266
|
+
return f'{self.store_name()}_cellmd'
|
|
258
267
|
|
|
259
268
|
def __str__(self) -> str:
|
|
260
269
|
return f'{self.name}: {self.col_type}'
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import dataclasses
|
|
4
3
|
import enum
|
|
5
4
|
import itertools
|
|
6
5
|
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
|
-
from typing_extensions import Self
|
|
11
|
-
|
|
12
10
|
import pixeltable.exceptions as excs
|
|
13
11
|
|
|
14
12
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -22,54 +20,13 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
22
20
|
_PREDEF_SYMBOLS: Optional[set[str]] = None
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
@
|
|
23
|
+
@dataclass(frozen=True)
|
|
26
24
|
class QColumnId:
|
|
27
25
|
"""Qualified column id"""
|
|
28
26
|
|
|
29
27
|
tbl_id: UUID
|
|
30
28
|
col_id: int
|
|
31
29
|
|
|
32
|
-
# def __hash__(self) -> int:
|
|
33
|
-
# return hash((self.tbl_id, self.col_id))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclasses.dataclass
|
|
37
|
-
class UpdateStatus:
|
|
38
|
-
"""
|
|
39
|
-
Information about updates that resulted from a table operation.
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
num_rows: int = 0
|
|
43
|
-
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
44
|
-
num_computed_values: int = 0
|
|
45
|
-
num_excs: int = 0
|
|
46
|
-
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
47
|
-
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
48
|
-
|
|
49
|
-
def __iadd__(self, other: 'UpdateStatus') -> Self:
|
|
50
|
-
self.num_rows += other.num_rows
|
|
51
|
-
self.num_computed_values += other.num_computed_values
|
|
52
|
-
self.num_excs += other.num_excs
|
|
53
|
-
self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
|
|
54
|
-
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
55
|
-
return self
|
|
56
|
-
|
|
57
|
-
@property
|
|
58
|
-
def insert_msg(self) -> str:
|
|
59
|
-
"""Return a message describing the results of an insert operation."""
|
|
60
|
-
if self.num_excs == 0:
|
|
61
|
-
cols_with_excs_str = ''
|
|
62
|
-
else:
|
|
63
|
-
cols_with_excs_str = (
|
|
64
|
-
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
65
|
-
)
|
|
66
|
-
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
67
|
-
msg = (
|
|
68
|
-
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
69
|
-
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
70
|
-
)
|
|
71
|
-
return msg
|
|
72
|
-
|
|
73
30
|
|
|
74
31
|
class MediaValidation(enum.Enum):
|
|
75
32
|
ON_READ = 0
|
|
@@ -10,11 +10,12 @@ from pixeltable import exceptions as excs, type_system as ts
|
|
|
10
10
|
from pixeltable.env import Env
|
|
11
11
|
from pixeltable.utils.filecache import FileCache
|
|
12
12
|
|
|
13
|
-
from .globals import MediaValidation
|
|
13
|
+
from .globals import MediaValidation
|
|
14
14
|
from .table import Table
|
|
15
15
|
from .table_version import TableVersion
|
|
16
16
|
from .table_version_handle import TableVersionHandle
|
|
17
17
|
from .table_version_path import TableVersionPath
|
|
18
|
+
from .update_status import UpdateStatus
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
20
21
|
from pixeltable import exprs
|
|
@@ -106,6 +107,7 @@ class InsertableTable(Table):
|
|
|
106
107
|
|
|
107
108
|
def _get_metadata(self) -> dict[str, Any]:
|
|
108
109
|
md = super()._get_metadata()
|
|
110
|
+
md['base'] = None
|
|
109
111
|
md['is_view'] = False
|
|
110
112
|
md['is_snapshot'] = False
|
|
111
113
|
return md
|
|
@@ -171,13 +173,14 @@ class InsertableTable(Table):
|
|
|
171
173
|
from pixeltable.catalog import Catalog
|
|
172
174
|
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
173
175
|
|
|
174
|
-
status = pxt.UpdateStatus()
|
|
175
176
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
176
177
|
if isinstance(data_source, DFTableDataConduit):
|
|
178
|
+
status = pxt.UpdateStatus()
|
|
177
179
|
status += self._tbl_version.get().insert(
|
|
178
180
|
rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
179
181
|
)
|
|
180
182
|
else:
|
|
183
|
+
status = pxt.UpdateStatus()
|
|
181
184
|
for row_batch in data_source.valid_row_batch():
|
|
182
185
|
status += self._tbl_version.get().insert(
|
|
183
186
|
rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
pixeltable/catalog/path.py
CHANGED
|
@@ -77,5 +77,11 @@ class Path:
|
|
|
77
77
|
def __str__(self) -> str:
|
|
78
78
|
return '.'.join(self.components)
|
|
79
79
|
|
|
80
|
+
def __eq__(self, other: object) -> bool:
|
|
81
|
+
return isinstance(other, Path) and str(self) == str(other)
|
|
82
|
+
|
|
83
|
+
def __hash__(self) -> int:
|
|
84
|
+
return hash(str(self))
|
|
85
|
+
|
|
80
86
|
def __lt__(self, other: Path) -> bool:
|
|
81
87
|
return str(self) < str(other)
|