pixeltable 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -10
- pixeltable/catalog/catalog.py +64 -38
- pixeltable/catalog/column.py +22 -23
- pixeltable/catalog/globals.py +2 -148
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/path.py +6 -0
- pixeltable/catalog/table.py +51 -32
- pixeltable/catalog/table_version.py +69 -45
- pixeltable/catalog/update_status.py +179 -0
- pixeltable/catalog/view.py +9 -2
- pixeltable/config.py +76 -12
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +29 -0
- pixeltable/exec/exec_node.py +7 -24
- pixeltable/exec/expr_eval/schedulers.py +134 -7
- pixeltable/exprs/column_property_ref.py +21 -9
- pixeltable/exprs/column_ref.py +5 -1
- pixeltable/exprs/function_call.py +2 -2
- pixeltable/exprs/row_builder.py +10 -9
- pixeltable/exprs/rowid_ref.py +0 -4
- pixeltable/func/function.py +3 -3
- pixeltable/functions/audio.py +36 -9
- pixeltable/functions/video.py +57 -10
- pixeltable/globals.py +61 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +3 -55
- pixeltable/io/globals.py +4 -4
- pixeltable/io/hf_datasets.py +10 -2
- pixeltable/io/label_studio.py +16 -16
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_39.py +125 -0
- pixeltable/metadata/converters/util.py +3 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +14 -2
- pixeltable/plan.py +4 -0
- pixeltable/share/packager.py +20 -38
- pixeltable/store.py +18 -50
- pixeltable/type_system.py +2 -2
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/media_store.py +39 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/RECORD +47 -45
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from keyword import iskeyword as is_python_keyword
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
9
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, Union, overload
|
|
10
10
|
|
|
11
11
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
12
|
import datetime
|
|
@@ -29,13 +29,13 @@ from .globals import (
|
|
|
29
29
|
IfExistsParam,
|
|
30
30
|
IfNotExistsParam,
|
|
31
31
|
MediaValidation,
|
|
32
|
-
UpdateStatus,
|
|
33
32
|
is_system_column_name,
|
|
34
33
|
is_valid_identifier,
|
|
35
34
|
)
|
|
36
35
|
from .schema_object import SchemaObject
|
|
37
36
|
from .table_version_handle import TableVersionHandle
|
|
38
37
|
from .table_version_path import TableVersionPath
|
|
38
|
+
from .update_status import UpdateStatus
|
|
39
39
|
|
|
40
40
|
if TYPE_CHECKING:
|
|
41
41
|
import torch.utils.data
|
|
@@ -109,8 +109,6 @@ class Table(SchemaObject):
|
|
|
109
109
|
|
|
110
110
|
def _get_metadata(self) -> dict[str, Any]:
|
|
111
111
|
md = super()._get_metadata()
|
|
112
|
-
base = self._get_base_table()
|
|
113
|
-
md['base'] = base._path() if base is not None else None
|
|
114
112
|
md['schema'] = self._get_schema()
|
|
115
113
|
md['is_replica'] = self._tbl_version_path.is_replica()
|
|
116
114
|
md['version'] = self._get_version()
|
|
@@ -510,15 +508,16 @@ class Table(SchemaObject):
|
|
|
510
508
|
for cname in cols_to_ignore:
|
|
511
509
|
assert cname in col_schema
|
|
512
510
|
del col_schema[cname]
|
|
511
|
+
result = UpdateStatus()
|
|
513
512
|
if len(col_schema) == 0:
|
|
514
|
-
return
|
|
513
|
+
return result
|
|
515
514
|
new_cols = self._create_columns(col_schema)
|
|
516
515
|
for new_col in new_cols:
|
|
517
516
|
self._verify_column(new_col)
|
|
518
517
|
assert self._tbl_version is not None
|
|
519
|
-
|
|
518
|
+
result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
520
519
|
FileCache.get().emit_eviction_warnings()
|
|
521
|
-
return
|
|
520
|
+
return result
|
|
522
521
|
|
|
523
522
|
def add_column(
|
|
524
523
|
self,
|
|
@@ -595,7 +594,7 @@ class Table(SchemaObject):
|
|
|
595
594
|
- `'abort'`: an exception will be raised and the column will not be added.
|
|
596
595
|
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
597
596
|
with errors will have a `None` value for the column, with information about the error stored in the
|
|
598
|
-
corresponding `tbl.col_name.
|
|
597
|
+
corresponding `tbl.col_name.errormsg` tbl.col_name.errortype` fields.
|
|
599
598
|
if_exists: Determines the behavior if the column already exists. Must be one of the following:
|
|
600
599
|
|
|
601
600
|
- `'error'`: an exception will be raised.
|
|
@@ -642,10 +641,10 @@ class Table(SchemaObject):
|
|
|
642
641
|
# Raise an error if the column expression refers to a column error property
|
|
643
642
|
if isinstance(spec, exprs.Expr):
|
|
644
643
|
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
645
|
-
if e.
|
|
644
|
+
if e.is_cellmd_prop():
|
|
646
645
|
raise excs.Error(
|
|
647
|
-
'Use of a reference to
|
|
648
|
-
f'
|
|
646
|
+
f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
|
|
647
|
+
f'is not allowed in a computed column.'
|
|
649
648
|
)
|
|
650
649
|
|
|
651
650
|
# handle existing columns based on if_exists parameter
|
|
@@ -654,16 +653,17 @@ class Table(SchemaObject):
|
|
|
654
653
|
)
|
|
655
654
|
# if the column to add already exists and user asked to ignore
|
|
656
655
|
# exiting column, there's nothing to do.
|
|
656
|
+
result = UpdateStatus()
|
|
657
657
|
if len(cols_to_ignore) != 0:
|
|
658
658
|
assert cols_to_ignore[0] == col_name
|
|
659
|
-
return
|
|
659
|
+
return result
|
|
660
660
|
|
|
661
661
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
662
662
|
self._verify_column(new_col)
|
|
663
663
|
assert self._tbl_version is not None
|
|
664
|
-
|
|
664
|
+
result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
665
665
|
FileCache.get().emit_eviction_warnings()
|
|
666
|
-
return
|
|
666
|
+
return result
|
|
667
667
|
|
|
668
668
|
@classmethod
|
|
669
669
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -1349,9 +1349,9 @@ class Table(SchemaObject):
|
|
|
1349
1349
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1350
1350
|
if self._tbl_version_path.is_snapshot():
|
|
1351
1351
|
raise excs.Error('Cannot update a snapshot')
|
|
1352
|
-
|
|
1352
|
+
result = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1353
1353
|
FileCache.get().emit_eviction_warnings()
|
|
1354
|
-
return
|
|
1354
|
+
return result
|
|
1355
1355
|
|
|
1356
1356
|
def batch_update(
|
|
1357
1357
|
self,
|
|
@@ -1415,7 +1415,7 @@ class Table(SchemaObject):
|
|
|
1415
1415
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1416
1416
|
row_updates.append(col_vals)
|
|
1417
1417
|
|
|
1418
|
-
|
|
1418
|
+
result = self._tbl_version.get().batch_update(
|
|
1419
1419
|
row_updates,
|
|
1420
1420
|
rowids,
|
|
1421
1421
|
error_if_not_exists=if_not_exists == 'error',
|
|
@@ -1423,7 +1423,7 @@ class Table(SchemaObject):
|
|
|
1423
1423
|
cascade=cascade,
|
|
1424
1424
|
)
|
|
1425
1425
|
FileCache.get().emit_eviction_warnings()
|
|
1426
|
-
return
|
|
1426
|
+
return result
|
|
1427
1427
|
|
|
1428
1428
|
def recompute_columns(
|
|
1429
1429
|
self, *columns: Union[str, ColumnRef], errors_only: bool = False, cascade: bool = True
|
|
@@ -1433,7 +1433,7 @@ class Table(SchemaObject):
|
|
|
1433
1433
|
Args:
|
|
1434
1434
|
columns: The names or references of the computed columns to recompute.
|
|
1435
1435
|
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1436
|
-
`errortype` property
|
|
1436
|
+
`errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
|
|
1437
1437
|
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
1438
1438
|
|
|
1439
1439
|
Examples:
|
|
@@ -1484,9 +1484,9 @@ class Table(SchemaObject):
|
|
|
1484
1484
|
raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
|
|
1485
1485
|
col_names.append(col_name)
|
|
1486
1486
|
|
|
1487
|
-
|
|
1487
|
+
result = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
|
|
1488
1488
|
FileCache.get().emit_eviction_warnings()
|
|
1489
|
-
return
|
|
1489
|
+
return result
|
|
1490
1490
|
|
|
1491
1491
|
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
1492
1492
|
"""Delete rows in this table.
|
|
@@ -1588,7 +1588,7 @@ class Table(SchemaObject):
|
|
|
1588
1588
|
|
|
1589
1589
|
def sync(
|
|
1590
1590
|
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
1591
|
-
) ->
|
|
1591
|
+
) -> UpdateStatus:
|
|
1592
1592
|
"""
|
|
1593
1593
|
Synchronizes this table with its linked external stores.
|
|
1594
1594
|
|
|
@@ -1601,7 +1601,7 @@ class Table(SchemaObject):
|
|
|
1601
1601
|
from pixeltable.catalog import Catalog
|
|
1602
1602
|
|
|
1603
1603
|
if self._tbl_version_path.is_snapshot():
|
|
1604
|
-
return
|
|
1604
|
+
return UpdateStatus()
|
|
1605
1605
|
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1606
1606
|
# have their updates propagated down the tree
|
|
1607
1607
|
base_tv = self._tbl_version_path.get_tbl_versions()[-1]
|
|
@@ -1617,7 +1617,7 @@ class Table(SchemaObject):
|
|
|
1617
1617
|
if store not in all_stores:
|
|
1618
1618
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1619
1619
|
|
|
1620
|
-
sync_status =
|
|
1620
|
+
sync_status = UpdateStatus()
|
|
1621
1621
|
for store in stores:
|
|
1622
1622
|
store_obj = self._tbl_version.get().external_stores[store]
|
|
1623
1623
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
@@ -1631,6 +1631,19 @@ class Table(SchemaObject):
|
|
|
1631
1631
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1632
1632
|
return list(self._get_schema().keys())
|
|
1633
1633
|
|
|
1634
|
+
_REPORT_SCHEMA: ClassVar[dict[str, ts.ColumnType]] = {
|
|
1635
|
+
'version': ts.IntType(),
|
|
1636
|
+
'created_at': ts.TimestampType(),
|
|
1637
|
+
'user': ts.StringType(nullable=True),
|
|
1638
|
+
'note': ts.StringType(),
|
|
1639
|
+
'inserts': ts.IntType(nullable=True),
|
|
1640
|
+
'updates': ts.IntType(nullable=True),
|
|
1641
|
+
'deletes': ts.IntType(nullable=True),
|
|
1642
|
+
'errors': ts.IntType(nullable=True),
|
|
1643
|
+
'computed': ts.IntType(),
|
|
1644
|
+
'schema_change': ts.StringType(),
|
|
1645
|
+
}
|
|
1646
|
+
|
|
1634
1647
|
def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
|
|
1635
1648
|
"""Returns rows of information about the versions of this table, most recent first.
|
|
1636
1649
|
|
|
@@ -1676,19 +1689,25 @@ class Table(SchemaObject):
|
|
|
1676
1689
|
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1677
1690
|
version = vers_md.version_md.version
|
|
1678
1691
|
schema_change = md_dict.get(version, '')
|
|
1679
|
-
|
|
1692
|
+
update_status = vers_md.version_md.update_status
|
|
1693
|
+
if update_status is None:
|
|
1694
|
+
update_status = UpdateStatus()
|
|
1695
|
+
change_type = 'schema' if schema_change != '' else ''
|
|
1696
|
+
if change_type == '':
|
|
1697
|
+
change_type = 'data'
|
|
1698
|
+
rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
|
|
1680
1699
|
report_line = [
|
|
1681
1700
|
version,
|
|
1682
1701
|
datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
|
|
1702
|
+
vers_md.version_md.user,
|
|
1683
1703
|
change_type,
|
|
1704
|
+
rcs.ins_rows,
|
|
1705
|
+
rcs.upd_rows,
|
|
1706
|
+
rcs.del_rows,
|
|
1707
|
+
rcs.num_excs,
|
|
1708
|
+
rcs.computed_values,
|
|
1684
1709
|
schema_change,
|
|
1685
1710
|
]
|
|
1686
1711
|
report_lines.append(report_line)
|
|
1687
1712
|
|
|
1688
|
-
|
|
1689
|
-
'version': ts.IntType(),
|
|
1690
|
-
'created_at': ts.TimestampType(),
|
|
1691
|
-
'change': ts.StringType(),
|
|
1692
|
-
'schema_change': ts.StringType(),
|
|
1693
|
-
}
|
|
1694
|
-
return pxt.dataframe.DataFrameResultSet(report_lines, report_schema)
|
|
1713
|
+
return pxt.dataframe.DataFrameResultSet(report_lines, self._REPORT_SCHEMA)
|
|
@@ -29,14 +29,8 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
from ..func.globals import resolve_symbol
|
|
31
31
|
from .column import Column
|
|
32
|
-
from .globals import
|
|
33
|
-
|
|
34
|
-
_ROWID_COLUMN_NAME,
|
|
35
|
-
MediaValidation,
|
|
36
|
-
RowCountStats,
|
|
37
|
-
UpdateStatus,
|
|
38
|
-
is_valid_identifier,
|
|
39
|
-
)
|
|
32
|
+
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
|
|
33
|
+
from .update_status import RowCountStats, UpdateStatus
|
|
40
34
|
|
|
41
35
|
if TYPE_CHECKING:
|
|
42
36
|
from pixeltable import exec, store
|
|
@@ -243,9 +237,15 @@ class TableVersion:
|
|
|
243
237
|
additional_md={},
|
|
244
238
|
)
|
|
245
239
|
|
|
246
|
-
# create schema.TableVersion
|
|
240
|
+
# create schema.TableVersion of the initial version
|
|
247
241
|
table_version_md = schema.TableVersionMd(
|
|
248
|
-
tbl_id=tbl_id_str,
|
|
242
|
+
tbl_id=tbl_id_str,
|
|
243
|
+
created_at=timestamp,
|
|
244
|
+
version=0,
|
|
245
|
+
schema_version=0,
|
|
246
|
+
user=user,
|
|
247
|
+
update_status=None,
|
|
248
|
+
additional_md={},
|
|
249
249
|
)
|
|
250
250
|
|
|
251
251
|
# create schema.TableSchemaVersion
|
|
@@ -324,6 +324,7 @@ class TableVersion:
|
|
|
324
324
|
|
|
325
325
|
@classmethod
|
|
326
326
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
327
|
+
assert Env.get().in_xact
|
|
327
328
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
328
329
|
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
329
330
|
view_md = md.tbl_md.view_md
|
|
@@ -333,6 +334,10 @@ class TableVersion:
|
|
|
333
334
|
tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
|
|
334
335
|
)
|
|
335
336
|
cat = pxt.catalog.Catalog.get()
|
|
337
|
+
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
338
|
+
# TableVersion instance before.
|
|
339
|
+
assert tbl_version.effective_version is not None
|
|
340
|
+
assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
|
|
336
341
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
337
342
|
tbl_version.init()
|
|
338
343
|
tbl_version.store_tbl.create()
|
|
@@ -454,10 +459,10 @@ class TableVersion:
|
|
|
454
459
|
# fix up the sa column type of the index value and undo columns
|
|
455
460
|
val_col = self.cols_by_id[md.index_val_col_id]
|
|
456
461
|
val_col.sa_col_type = idx.index_sa_type()
|
|
457
|
-
val_col.
|
|
462
|
+
val_col._stores_cellmd = False
|
|
458
463
|
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
459
464
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
460
|
-
undo_col.
|
|
465
|
+
undo_col._stores_cellmd = False
|
|
461
466
|
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
462
467
|
self.idxs_by_name[md.name] = idx_info
|
|
463
468
|
|
|
@@ -473,7 +478,13 @@ class TableVersion:
|
|
|
473
478
|
else:
|
|
474
479
|
self.store_tbl = StoreTable(self)
|
|
475
480
|
|
|
476
|
-
def _write_md(
|
|
481
|
+
def _write_md(
|
|
482
|
+
self,
|
|
483
|
+
new_version: bool,
|
|
484
|
+
new_version_ts: float,
|
|
485
|
+
new_schema_version: bool,
|
|
486
|
+
update_status: Optional[UpdateStatus] = None,
|
|
487
|
+
) -> None:
|
|
477
488
|
"""Writes table metadata to the database.
|
|
478
489
|
|
|
479
490
|
Args:
|
|
@@ -484,22 +495,23 @@ class TableVersion:
|
|
|
484
495
|
"""
|
|
485
496
|
from pixeltable.catalog import Catalog
|
|
486
497
|
|
|
487
|
-
version_md
|
|
488
|
-
schema.TableVersionMd(
|
|
489
|
-
tbl_id=str(self.id),
|
|
490
|
-
created_at=new_version_ts,
|
|
491
|
-
version=self.version,
|
|
492
|
-
schema_version=self.schema_version,
|
|
493
|
-
additional_md={},
|
|
494
|
-
)
|
|
495
|
-
if new_version
|
|
496
|
-
else None
|
|
497
|
-
)
|
|
498
|
+
version_md = self._create_version_md(new_version_ts, update_status=update_status) if new_version else None
|
|
498
499
|
|
|
499
500
|
Catalog.get().store_tbl_md(
|
|
500
501
|
self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
501
502
|
)
|
|
502
503
|
|
|
504
|
+
def _write_md_update_status(self, new_version_ts: float, update_status: UpdateStatus) -> None:
|
|
505
|
+
"""Writes a new update_status in the table version metadata in the database.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
timestamp: timestamp of the change
|
|
509
|
+
update_status: UpdateStatus to be updated in the database
|
|
510
|
+
"""
|
|
511
|
+
from pixeltable.catalog import Catalog
|
|
512
|
+
|
|
513
|
+
Catalog.get().update_tbl_version_md(self._create_version_md(new_version_ts, update_status))
|
|
514
|
+
|
|
503
515
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
504
516
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
505
517
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
@@ -553,7 +565,7 @@ class TableVersion:
|
|
|
553
565
|
stored=True,
|
|
554
566
|
schema_version_add=self.schema_version,
|
|
555
567
|
schema_version_drop=None,
|
|
556
|
-
|
|
568
|
+
stores_cellmd=idx.records_value_errors(),
|
|
557
569
|
)
|
|
558
570
|
val_col.tbl = self
|
|
559
571
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
@@ -567,7 +579,7 @@ class TableVersion:
|
|
|
567
579
|
stored=True,
|
|
568
580
|
schema_version_add=self.schema_version,
|
|
569
581
|
schema_version_drop=None,
|
|
570
|
-
|
|
582
|
+
stores_cellmd=False,
|
|
571
583
|
)
|
|
572
584
|
undo_col.tbl = self
|
|
573
585
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
@@ -679,7 +691,7 @@ class TableVersion:
|
|
|
679
691
|
# Create indices and their md records
|
|
680
692
|
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
681
693
|
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
682
|
-
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
694
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True, update_status=status)
|
|
683
695
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
684
696
|
|
|
685
697
|
msg = (
|
|
@@ -899,6 +911,7 @@ class TableVersion:
|
|
|
899
911
|
assert (rows is None) != (df is None) # Exactly one must be specified
|
|
900
912
|
if rows is not None:
|
|
901
913
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
914
|
+
|
|
902
915
|
else:
|
|
903
916
|
plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
|
|
904
917
|
|
|
@@ -909,7 +922,10 @@ class TableVersion:
|
|
|
909
922
|
self.next_row_id += 1
|
|
910
923
|
yield rowid
|
|
911
924
|
|
|
912
|
-
|
|
925
|
+
result = self._insert(
|
|
926
|
+
plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
|
|
927
|
+
)
|
|
928
|
+
return result
|
|
913
929
|
|
|
914
930
|
def _insert(
|
|
915
931
|
self,
|
|
@@ -923,22 +939,26 @@ class TableVersion:
|
|
|
923
939
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
924
940
|
# we're creating a new version
|
|
925
941
|
self.version += 1
|
|
926
|
-
cols_with_excs,
|
|
942
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
927
943
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
928
944
|
)
|
|
929
|
-
result
|
|
930
|
-
|
|
945
|
+
result = UpdateStatus(
|
|
946
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
947
|
+
row_count_stats=row_counts,
|
|
948
|
+
)
|
|
931
949
|
|
|
932
950
|
# update views
|
|
933
951
|
for view in self.mutable_views:
|
|
934
952
|
from pixeltable.plan import Planner
|
|
935
953
|
|
|
936
|
-
|
|
937
|
-
status = view.get()._insert(
|
|
954
|
+
plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
|
|
955
|
+
status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
|
|
938
956
|
result += status.to_cascade()
|
|
939
957
|
|
|
958
|
+
# Use the net status after all propagations
|
|
959
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
940
960
|
if print_stats:
|
|
941
|
-
|
|
961
|
+
exec_plan.ctx.profile.print(num_rows=result.num_rows)
|
|
942
962
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
943
963
|
return result
|
|
944
964
|
|
|
@@ -1108,20 +1128,20 @@ class TableVersion:
|
|
|
1108
1128
|
cascade: bool,
|
|
1109
1129
|
show_progress: bool = True,
|
|
1110
1130
|
) -> UpdateStatus:
|
|
1111
|
-
|
|
1112
|
-
|
|
1131
|
+
result = UpdateStatus()
|
|
1132
|
+
create_new_table_version = plan is not None
|
|
1133
|
+
if create_new_table_version:
|
|
1113
1134
|
self.version += 1
|
|
1114
|
-
cols_with_excs,
|
|
1115
|
-
|
|
1135
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1136
|
+
plan, v_min=self.version, show_progress=show_progress
|
|
1137
|
+
)
|
|
1116
1138
|
result += UpdateStatus(
|
|
1117
|
-
|
|
1139
|
+
row_count_stats=row_counts.insert_to_update(),
|
|
1140
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
1118
1141
|
)
|
|
1119
1142
|
self.store_tbl.delete_rows(
|
|
1120
1143
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1121
1144
|
)
|
|
1122
|
-
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1123
|
-
else:
|
|
1124
|
-
result = UpdateStatus()
|
|
1125
1145
|
|
|
1126
1146
|
if cascade:
|
|
1127
1147
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
@@ -1137,7 +1157,8 @@ class TableVersion:
|
|
|
1137
1157
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
1138
1158
|
)
|
|
1139
1159
|
result += status.to_cascade()
|
|
1140
|
-
|
|
1160
|
+
if create_new_table_version:
|
|
1161
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
1141
1162
|
return result
|
|
1142
1163
|
|
|
1143
1164
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
@@ -1191,12 +1212,13 @@ class TableVersion:
|
|
|
1191
1212
|
if del_rows > 0:
|
|
1192
1213
|
# we're creating a new version
|
|
1193
1214
|
self.version += 1
|
|
1194
|
-
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1195
1215
|
for view in self.mutable_views:
|
|
1196
1216
|
status = view.get().propagate_delete(
|
|
1197
1217
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1198
1218
|
)
|
|
1199
1219
|
result += status.to_cascade()
|
|
1220
|
+
if del_rows > 0:
|
|
1221
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
1200
1222
|
return result
|
|
1201
1223
|
|
|
1202
1224
|
def revert(self) -> None:
|
|
@@ -1538,12 +1560,14 @@ class TableVersion:
|
|
|
1538
1560
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1539
1561
|
]
|
|
1540
1562
|
|
|
1541
|
-
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1563
|
+
def _create_version_md(self, timestamp: float, update_status: Optional[UpdateStatus]) -> schema.TableVersionMd:
|
|
1542
1564
|
return schema.TableVersionMd(
|
|
1543
1565
|
tbl_id=str(self.id),
|
|
1544
1566
|
created_at=timestamp,
|
|
1545
1567
|
version=self.version,
|
|
1546
1568
|
schema_version=self.schema_version,
|
|
1569
|
+
user=Env.get().user,
|
|
1570
|
+
update_status=update_status,
|
|
1547
1571
|
additional_md={},
|
|
1548
1572
|
)
|
|
1549
1573
|
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from IPython.lib.pretty import RepresentationPrinter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class RowCountStats:
|
|
12
|
+
"""
|
|
13
|
+
Statistics about the counts of rows affected by a table operation.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
ins_rows: int = 0 # rows inserted
|
|
17
|
+
del_rows: int = 0 # rows deleted
|
|
18
|
+
upd_rows: int = 0 # rows updated
|
|
19
|
+
num_excs: int = 0 # total number of exceptions
|
|
20
|
+
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
21
|
+
computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def num_rows(self) -> int:
|
|
25
|
+
return self.ins_rows + self.del_rows + self.upd_rows
|
|
26
|
+
|
|
27
|
+
def insert_to_update(self) -> 'RowCountStats':
|
|
28
|
+
"""
|
|
29
|
+
Convert insert row count stats to update row count stats.
|
|
30
|
+
This is used when an insert operation is treated as an update.
|
|
31
|
+
"""
|
|
32
|
+
return RowCountStats(
|
|
33
|
+
ins_rows=0,
|
|
34
|
+
del_rows=self.del_rows,
|
|
35
|
+
upd_rows=self.upd_rows + self.ins_rows,
|
|
36
|
+
num_excs=self.num_excs,
|
|
37
|
+
computed_values=self.computed_values,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
|
|
41
|
+
"""
|
|
42
|
+
Add the stats from two RowCountStats objects together.
|
|
43
|
+
"""
|
|
44
|
+
return RowCountStats(
|
|
45
|
+
ins_rows=self.ins_rows + other.ins_rows,
|
|
46
|
+
del_rows=self.del_rows + other.del_rows,
|
|
47
|
+
upd_rows=self.upd_rows + other.upd_rows,
|
|
48
|
+
num_excs=self.num_excs + other.num_excs,
|
|
49
|
+
computed_values=self.computed_values + other.computed_values,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class UpdateStatus:
|
|
55
|
+
"""
|
|
56
|
+
Information about changes to table data or table schema
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
updated_cols: list[str] = field(default_factory=list)
|
|
60
|
+
cols_with_excs: list[str] = field(default_factory=list)
|
|
61
|
+
|
|
62
|
+
# stats for the rows affected by the operation
|
|
63
|
+
row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
64
|
+
|
|
65
|
+
# stats for changes cascaded to other tables
|
|
66
|
+
cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
67
|
+
|
|
68
|
+
# stats for the rows affected by the operation in an external store
|
|
69
|
+
ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def num_rows(self) -> int:
|
|
73
|
+
return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def num_excs(self) -> int:
|
|
77
|
+
return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def num_computed_values(self) -> int:
|
|
81
|
+
return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
|
|
82
|
+
|
|
83
|
+
def insert_to_update(self) -> 'UpdateStatus':
|
|
84
|
+
"""
|
|
85
|
+
Convert the update status from an insert operation to an update operation.
|
|
86
|
+
This is used when an insert operation is treated as an update.
|
|
87
|
+
"""
|
|
88
|
+
return UpdateStatus(
|
|
89
|
+
updated_cols=self.updated_cols,
|
|
90
|
+
cols_with_excs=self.cols_with_excs,
|
|
91
|
+
row_count_stats=self.row_count_stats.insert_to_update(),
|
|
92
|
+
cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
|
|
93
|
+
ext_row_count_stats=self.ext_row_count_stats,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def to_cascade(self) -> 'UpdateStatus':
|
|
97
|
+
"""
|
|
98
|
+
Convert the update status to a cascade update status.
|
|
99
|
+
This is used when an operation cascades changes to other tables.
|
|
100
|
+
"""
|
|
101
|
+
return UpdateStatus(
|
|
102
|
+
updated_cols=self.updated_cols,
|
|
103
|
+
cols_with_excs=self.cols_with_excs,
|
|
104
|
+
row_count_stats=RowCountStats(),
|
|
105
|
+
cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
|
|
106
|
+
ext_row_count_stats=self.ext_row_count_stats,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
|
|
110
|
+
"""
|
|
111
|
+
Add the update status from two UpdateStatus objects together.
|
|
112
|
+
"""
|
|
113
|
+
return UpdateStatus(
|
|
114
|
+
updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
|
|
115
|
+
cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
|
|
116
|
+
row_count_stats=self.row_count_stats + other.row_count_stats,
|
|
117
|
+
cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
|
|
118
|
+
ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def insert_msg(self) -> str:
|
|
123
|
+
"""Return a message describing the results of an insert operation."""
|
|
124
|
+
if self.num_excs == 0:
|
|
125
|
+
cols_with_excs_str = ''
|
|
126
|
+
else:
|
|
127
|
+
cols_with_excs_str = (
|
|
128
|
+
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
129
|
+
)
|
|
130
|
+
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
131
|
+
msg = (
|
|
132
|
+
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
133
|
+
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
134
|
+
)
|
|
135
|
+
return msg
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def __cnt_str(cls, cnt: int, item: str) -> str:
|
|
139
|
+
assert cnt > 0
|
|
140
|
+
return f'{cnt} {item}{"" if cnt == 1 else "s"}'
|
|
141
|
+
|
|
142
|
+
def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
|
|
143
|
+
messages = []
|
|
144
|
+
# Combine row count stats and cascade row count stats
|
|
145
|
+
stats = self.row_count_stats + self.cascade_row_count_stats
|
|
146
|
+
if stats.ins_rows > 0:
|
|
147
|
+
messages.append(f'{self.__cnt_str(stats.ins_rows, "row")} inserted')
|
|
148
|
+
if stats.del_rows > 0:
|
|
149
|
+
messages.append(f'{self.__cnt_str(stats.del_rows, "row")} deleted')
|
|
150
|
+
if stats.upd_rows > 0:
|
|
151
|
+
messages.append(f'{self.__cnt_str(stats.upd_rows, "row")} updated')
|
|
152
|
+
if stats.computed_values > 0:
|
|
153
|
+
messages.append(f'{self.__cnt_str(stats.computed_values, "value")} computed')
|
|
154
|
+
if stats.num_excs > 0:
|
|
155
|
+
messages.append(self.__cnt_str(stats.num_excs, 'exception'))
|
|
156
|
+
p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def pxt_rows_updated(self) -> int:
|
|
160
|
+
"""
|
|
161
|
+
Returns the number of Pixeltable rows that were updated as a result of the operation.
|
|
162
|
+
"""
|
|
163
|
+
return (self.row_count_stats + self.cascade_row_count_stats).upd_rows
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def external_rows_updated(self) -> int:
|
|
167
|
+
return self.ext_row_count_stats.upd_rows
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def external_rows_created(self) -> int:
|
|
171
|
+
return self.ext_row_count_stats.ins_rows
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def external_rows_deleted(self) -> int:
|
|
175
|
+
return self.ext_row_count_stats.del_rows
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def ext_num_rows(self) -> int:
|
|
179
|
+
return self.ext_row_count_stats.num_rows
|