pixeltable 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +3 -3
- pixeltable/catalog/globals.py +2 -0
- pixeltable/catalog/insertable_table.py +1 -11
- pixeltable/catalog/schema_object.py +28 -2
- pixeltable/catalog/table.py +76 -97
- pixeltable/catalog/table_version.py +96 -58
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +31 -27
- pixeltable/dataframe.py +32 -115
- pixeltable/exprs/column_ref.py +2 -7
- pixeltable/exprs/similarity_expr.py +27 -16
- pixeltable/functions/openai.py +1 -1
- pixeltable/globals.py +70 -53
- pixeltable/index/embedding_index.py +28 -27
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +3 -3
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_17.py +26 -0
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/utils/formatter.py +234 -0
- {pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/METADATA +4 -4
- {pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/RECORD +27 -25
- {pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/WHEEL +0 -0
|
@@ -5,29 +5,31 @@ import importlib
|
|
|
5
5
|
import inspect
|
|
6
6
|
import logging
|
|
7
7
|
import time
|
|
8
|
-
from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
|
|
9
8
|
import uuid
|
|
9
|
+
from typing import Optional, List, Dict, Any, Tuple, Type, Iterable
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
import sqlalchemy as sql
|
|
13
13
|
import sqlalchemy.orm as orm
|
|
14
14
|
|
|
15
15
|
import pixeltable
|
|
16
|
-
import pixeltable.func as func
|
|
17
|
-
import pixeltable.type_system as ts
|
|
18
16
|
import pixeltable.exceptions as excs
|
|
17
|
+
import pixeltable.exprs as exprs
|
|
18
|
+
import pixeltable.func as func
|
|
19
19
|
import pixeltable.index as index
|
|
20
|
+
import pixeltable.type_system as ts
|
|
20
21
|
from pixeltable.env import Env
|
|
21
22
|
from pixeltable.iterators import ComponentIterator
|
|
22
23
|
from pixeltable.metadata import schema
|
|
23
24
|
from pixeltable.utils.filecache import FileCache
|
|
24
25
|
from pixeltable.utils.media_store import MediaStore
|
|
25
26
|
from .column import Column
|
|
26
|
-
from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
|
|
27
|
+
from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
|
|
27
28
|
from ..func.globals import resolve_symbol
|
|
28
29
|
|
|
29
30
|
_logger = logging.getLogger('pixeltable')
|
|
30
31
|
|
|
32
|
+
|
|
31
33
|
class TableVersion:
|
|
32
34
|
"""
|
|
33
35
|
TableVersion represents a particular version of a table/view along with its physical representation:
|
|
@@ -243,7 +245,6 @@ class TableVersion:
|
|
|
243
245
|
def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
244
246
|
"""Initialize self.cols with the columns visible in our effective version"""
|
|
245
247
|
import pixeltable.exprs as exprs
|
|
246
|
-
from pixeltable.catalog import Catalog
|
|
247
248
|
|
|
248
249
|
self.cols = []
|
|
249
250
|
self.cols_by_name = {}
|
|
@@ -539,39 +540,12 @@ class TableVersion:
|
|
|
539
540
|
num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
|
|
540
541
|
cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
|
|
541
542
|
|
|
542
|
-
def drop_column(self,
|
|
543
|
+
def drop_column(self, col: Column) -> None:
|
|
543
544
|
"""Drop a column from the table.
|
|
544
545
|
"""
|
|
545
546
|
from pixeltable.catalog import Catalog
|
|
546
547
|
|
|
547
548
|
assert not self.is_snapshot
|
|
548
|
-
if name not in self.cols_by_name:
|
|
549
|
-
raise excs.Error(f'Unknown column: {name}')
|
|
550
|
-
col = self.cols_by_name[name]
|
|
551
|
-
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
552
|
-
if len(dependent_user_cols) > 0:
|
|
553
|
-
raise excs.Error(
|
|
554
|
-
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
555
|
-
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
556
|
-
)
|
|
557
|
-
# See if this column has a dependent store. We need to look through all stores in all
|
|
558
|
-
# (transitive) views of this table.
|
|
559
|
-
transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
|
|
560
|
-
dependent_stores = [
|
|
561
|
-
(view, store)
|
|
562
|
-
for view in transitive_views
|
|
563
|
-
for store in view._tbl_version.external_stores.values()
|
|
564
|
-
if col in store.get_local_columns()
|
|
565
|
-
]
|
|
566
|
-
if len(dependent_stores) > 0:
|
|
567
|
-
dependent_store_names = [
|
|
568
|
-
store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
|
|
569
|
-
for view, store in dependent_stores
|
|
570
|
-
]
|
|
571
|
-
raise excs.Error(
|
|
572
|
-
f'Cannot drop column `{name}` because the following external stores depend on it:\n'
|
|
573
|
-
f'{", ".join(dependent_store_names)}'
|
|
574
|
-
)
|
|
575
549
|
|
|
576
550
|
# we're creating a new schema version
|
|
577
551
|
self.version += 1
|
|
@@ -595,7 +569,7 @@ class TableVersion:
|
|
|
595
569
|
del self.idxs_by_name[idx_name]
|
|
596
570
|
self._drop_columns(dropped_cols)
|
|
597
571
|
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
598
|
-
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
572
|
+
_logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
|
|
599
573
|
|
|
600
574
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
601
575
|
"""Mark columns as dropped"""
|
|
@@ -704,15 +678,34 @@ class TableVersion:
|
|
|
704
678
|
return result
|
|
705
679
|
|
|
706
680
|
def update(
|
|
707
|
-
|
|
708
|
-
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
681
|
+
self, value_spec: dict[str, Any], where: Optional['exprs.Predicate'] = None, cascade: bool = True
|
|
709
682
|
) -> UpdateStatus:
|
|
683
|
+
"""Update rows in this TableVersionPath.
|
|
684
|
+
Args:
|
|
685
|
+
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
686
|
+
where: a Predicate to filter rows to update.
|
|
687
|
+
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
688
|
+
including within views.
|
|
689
|
+
"""
|
|
690
|
+
if self.is_snapshot:
|
|
691
|
+
raise excs.Error('Cannot update a snapshot')
|
|
692
|
+
|
|
693
|
+
from pixeltable.plan import Planner
|
|
694
|
+
|
|
695
|
+
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
696
|
+
if where is not None:
|
|
697
|
+
if not isinstance(where, exprs.Predicate):
|
|
698
|
+
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
699
|
+
analysis_info = Planner.analyze(self.path, where)
|
|
700
|
+
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
701
|
+
if analysis_info.filter is not None:
|
|
702
|
+
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
703
|
+
|
|
710
704
|
with Env.get().engine.begin() as conn:
|
|
711
|
-
return self._update(conn,
|
|
705
|
+
return self._update(conn, update_spec, where, cascade)
|
|
712
706
|
|
|
713
707
|
def batch_update(
|
|
714
|
-
self, batch: list[dict[Column, '
|
|
715
|
-
cascade: bool = True
|
|
708
|
+
self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
|
|
716
709
|
) -> UpdateStatus:
|
|
717
710
|
"""Update rows in batch.
|
|
718
711
|
Args:
|
|
@@ -721,7 +714,6 @@ class TableVersion:
|
|
|
721
714
|
"""
|
|
722
715
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
723
716
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
724
|
-
import pixeltable.exprs as exprs
|
|
725
717
|
result_status = UpdateStatus()
|
|
726
718
|
cols_with_excs: set[str] = set()
|
|
727
719
|
updated_cols: set[str] = set()
|
|
@@ -768,24 +760,61 @@ class TableVersion:
|
|
|
768
760
|
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
|
|
769
761
|
show_progress: bool = True
|
|
770
762
|
) -> UpdateStatus:
|
|
771
|
-
"""Update rows in this table.
|
|
772
|
-
Args:
|
|
773
|
-
update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
774
|
-
where_clause: a Predicate to filter rows to update.
|
|
775
|
-
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
776
|
-
including within views.
|
|
777
|
-
"""
|
|
778
|
-
assert not self.is_snapshot
|
|
779
763
|
from pixeltable.plan import Planner
|
|
780
|
-
|
|
764
|
+
|
|
765
|
+
plan, updated_cols, recomputed_cols = (
|
|
781
766
|
Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
|
|
782
|
-
|
|
767
|
+
)
|
|
768
|
+
result = self.propagate_update(
|
|
783
769
|
plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
|
|
784
770
|
base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
|
|
785
771
|
result.updated_cols = updated_cols
|
|
786
772
|
return result
|
|
787
773
|
|
|
788
|
-
def
|
|
774
|
+
def _validate_update_spec(
|
|
775
|
+
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
|
|
776
|
+
) -> dict[Column, 'exprs.Expr']:
|
|
777
|
+
update_targets: dict[Column, exprs.Expr] = {}
|
|
778
|
+
for col_name, val in value_spec.items():
|
|
779
|
+
if not isinstance(col_name, str):
|
|
780
|
+
raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
|
|
781
|
+
if col_name == _ROWID_COLUMN_NAME:
|
|
782
|
+
# ignore pseudo-column _rowid
|
|
783
|
+
continue
|
|
784
|
+
col = self.path.get_column(col_name, include_bases=False)
|
|
785
|
+
if col is None:
|
|
786
|
+
# TODO: return more informative error if this is trying to update a base column
|
|
787
|
+
raise excs.Error(f'Column {col_name} unknown')
|
|
788
|
+
if col.is_computed:
|
|
789
|
+
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
790
|
+
if col.is_pk and not allow_pk:
|
|
791
|
+
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
792
|
+
if col.col_type.is_media_type():
|
|
793
|
+
raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
|
|
794
|
+
|
|
795
|
+
# make sure that the value is compatible with the column type
|
|
796
|
+
try:
|
|
797
|
+
# check if this is a literal
|
|
798
|
+
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
799
|
+
except TypeError:
|
|
800
|
+
if not allow_exprs:
|
|
801
|
+
raise excs.Error(
|
|
802
|
+
f'Column {col_name}: value {val!r} is not a valid literal for this column '
|
|
803
|
+
f'(expected {col.col_type})')
|
|
804
|
+
# it's not a literal, let's try to create an expr from it
|
|
805
|
+
value_expr = exprs.Expr.from_object(val)
|
|
806
|
+
if value_expr is None:
|
|
807
|
+
raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
|
|
808
|
+
if not col.col_type.matches(value_expr.col_type):
|
|
809
|
+
raise excs.Error((
|
|
810
|
+
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
811
|
+
f'{col_name} ({col.col_type})'
|
|
812
|
+
))
|
|
813
|
+
update_targets[col] = value_expr
|
|
814
|
+
|
|
815
|
+
return update_targets
|
|
816
|
+
|
|
817
|
+
def propagate_update(
|
|
789
818
|
self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
|
|
790
819
|
recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
|
|
791
820
|
timestamp: float, cascade: bool, show_progress: bool = True
|
|
@@ -810,7 +839,7 @@ class TableVersion:
|
|
|
810
839
|
if len(recomputed_cols) > 0:
|
|
811
840
|
from pixeltable.plan import Planner
|
|
812
841
|
plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
|
|
813
|
-
status = view.
|
|
842
|
+
status = view.propagate_update(
|
|
814
843
|
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
|
|
815
844
|
result.num_rows += status.num_rows
|
|
816
845
|
result.num_excs += status.num_excs
|
|
@@ -819,22 +848,31 @@ class TableVersion:
|
|
|
819
848
|
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
820
849
|
return result
|
|
821
850
|
|
|
822
|
-
def delete(self, where: Optional['
|
|
851
|
+
def delete(self, where: Optional['exprs.Predicate'] = None) -> UpdateStatus:
|
|
823
852
|
"""Delete rows in this table.
|
|
824
853
|
Args:
|
|
825
854
|
where: a Predicate to filter rows to delete.
|
|
826
855
|
"""
|
|
827
856
|
assert self.is_insertable()
|
|
857
|
+
from pixeltable.exprs import Predicate
|
|
828
858
|
from pixeltable.plan import Planner
|
|
829
|
-
|
|
859
|
+
if where is not None:
|
|
860
|
+
if not isinstance(where, Predicate):
|
|
861
|
+
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
862
|
+
analysis_info = Planner.analyze(self.path, where)
|
|
863
|
+
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
864
|
+
if analysis_info.filter is not None:
|
|
865
|
+
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
866
|
+
|
|
867
|
+
analysis_info = Planner.analyze(self.path, where)
|
|
830
868
|
with Env.get().engine.begin() as conn:
|
|
831
|
-
num_rows = self.
|
|
869
|
+
num_rows = self.propagate_delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
|
|
832
870
|
|
|
833
871
|
status = UpdateStatus(num_rows=num_rows)
|
|
834
872
|
return status
|
|
835
873
|
|
|
836
|
-
def
|
|
837
|
-
self, where: Optional['
|
|
874
|
+
def propagate_delete(
|
|
875
|
+
self, where: Optional['exprs.Predicate'], base_versions: List[Optional[int]],
|
|
838
876
|
conn: sql.engine.Connection, timestamp: float) -> int:
|
|
839
877
|
"""Delete rows in this table and propagate to views.
|
|
840
878
|
Args:
|
|
@@ -853,7 +891,7 @@ class TableVersion:
|
|
|
853
891
|
else:
|
|
854
892
|
pass
|
|
855
893
|
for view in self.mutable_views:
|
|
856
|
-
num_rows += view.
|
|
894
|
+
num_rows += view.propagate_delete(
|
|
857
895
|
where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
|
|
858
896
|
return num_rows
|
|
859
897
|
|
|
@@ -5,13 +5,13 @@ from typing import Optional, Union
|
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import pixeltable
|
|
8
|
-
import pixeltable.catalog as catalog
|
|
9
8
|
from .column import Column
|
|
10
9
|
from .globals import POS_COLUMN_NAME
|
|
11
10
|
from .table_version import TableVersion
|
|
12
11
|
|
|
13
12
|
_logger = logging.getLogger('pixeltable')
|
|
14
13
|
|
|
14
|
+
|
|
15
15
|
class TableVersionPath:
|
|
16
16
|
"""
|
|
17
17
|
A TableVersionPath represents the sequence of TableVersions from a base table to a particular view:
|
pixeltable/catalog/view.py
CHANGED
|
@@ -1,30 +1,33 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
2
4
|
import logging
|
|
3
|
-
from typing import
|
|
5
|
+
from typing import Optional, Type, Dict, Set, Any, Iterable, TYPE_CHECKING
|
|
4
6
|
from uuid import UUID
|
|
5
|
-
import inspect
|
|
6
7
|
|
|
7
8
|
import sqlalchemy.orm as orm
|
|
8
9
|
|
|
9
|
-
from .table import Table
|
|
10
|
-
from .table_version import TableVersion
|
|
11
|
-
from .table_version_path import TableVersionPath
|
|
12
|
-
from .column import Column
|
|
13
|
-
from .catalog import Catalog
|
|
14
|
-
from .globals import POS_COLUMN_NAME, UpdateStatus
|
|
15
|
-
from pixeltable.env import Env
|
|
16
|
-
from pixeltable.iterators import ComponentIterator
|
|
17
|
-
from pixeltable.exceptions import Error
|
|
18
|
-
import pixeltable.func as func
|
|
19
|
-
import pixeltable.type_system as ts
|
|
20
10
|
import pixeltable.catalog as catalog
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.func as func
|
|
21
13
|
import pixeltable.metadata.schema as md_schema
|
|
14
|
+
from pixeltable.env import Env
|
|
15
|
+
from pixeltable.exceptions import Error
|
|
16
|
+
from pixeltable.iterators import ComponentIterator
|
|
22
17
|
from pixeltable.type_system import InvalidType, IntType
|
|
23
|
-
|
|
18
|
+
from .catalog import Catalog
|
|
19
|
+
from .column import Column
|
|
20
|
+
from .globals import POS_COLUMN_NAME, UpdateStatus
|
|
21
|
+
from .table import Table
|
|
22
|
+
from .table_version import TableVersion
|
|
23
|
+
from .table_version_path import TableVersionPath
|
|
24
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
import pixeltable as pxt
|
|
25
27
|
|
|
26
28
|
_logger = logging.getLogger('pixeltable')
|
|
27
29
|
|
|
30
|
+
|
|
28
31
|
class View(Table):
|
|
29
32
|
"""A `Table` that presents a virtual view of another table (or view).
|
|
30
33
|
|
|
@@ -34,10 +37,11 @@ class View(Table):
|
|
|
34
37
|
is simply a reference to a specific set of base versions.
|
|
35
38
|
"""
|
|
36
39
|
def __init__(
|
|
37
|
-
self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath,
|
|
40
|
+
self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
|
|
38
41
|
snapshot_only: bool):
|
|
39
42
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
40
|
-
|
|
43
|
+
assert base_id in catalog.Catalog.get().tbl_dependents
|
|
44
|
+
self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
|
|
41
45
|
self._snapshot_only = snapshot_only
|
|
42
46
|
|
|
43
47
|
@classmethod
|
|
@@ -46,8 +50,8 @@ class View(Table):
|
|
|
46
50
|
|
|
47
51
|
@classmethod
|
|
48
52
|
def create(
|
|
49
|
-
cls, dir_id: UUID, name: str, base:
|
|
50
|
-
predicate: 'exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
|
|
53
|
+
cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
|
|
54
|
+
predicate: 'pxt.exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
|
|
51
55
|
iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
|
|
52
56
|
) -> View:
|
|
53
57
|
columns = cls._create_columns(schema)
|
|
@@ -55,8 +59,8 @@ class View(Table):
|
|
|
55
59
|
|
|
56
60
|
# verify that filter can be evaluated in the context of the base
|
|
57
61
|
if predicate is not None:
|
|
58
|
-
if not predicate.is_bound_by(base
|
|
59
|
-
raise excs.Error(f'Filter cannot be computed in the context of the base {base.
|
|
62
|
+
if not predicate.is_bound_by(base):
|
|
63
|
+
raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
|
|
60
64
|
# create a copy that we can modify and store
|
|
61
65
|
predicate = predicate.copy()
|
|
62
66
|
|
|
@@ -65,9 +69,9 @@ class View(Table):
|
|
|
65
69
|
if not col.is_computed:
|
|
66
70
|
continue
|
|
67
71
|
# make sure that the value can be computed in the context of the base
|
|
68
|
-
if col.value_expr is not None and not col.value_expr.is_bound_by(base
|
|
72
|
+
if col.value_expr is not None and not col.value_expr.is_bound_by(base):
|
|
69
73
|
raise excs.Error(
|
|
70
|
-
f'Column {col.name}: value expression cannot be computed in the context of the base {base.
|
|
74
|
+
f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
|
|
71
75
|
|
|
72
76
|
if iterator_cls is not None:
|
|
73
77
|
assert iterator_args is not None
|
|
@@ -114,7 +118,7 @@ class View(Table):
|
|
|
114
118
|
iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
115
119
|
iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
|
|
116
120
|
else None
|
|
117
|
-
base_version_path = cls._get_snapshot_path(base
|
|
121
|
+
base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
|
|
118
122
|
base_versions = [
|
|
119
123
|
(tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
|
|
120
124
|
for tbl_version in base_version_path.get_tbl_versions()
|
|
@@ -139,11 +143,11 @@ class View(Table):
|
|
|
139
143
|
session, dir_id, name, columns, num_retained_versions, comment, base_path=base_version_path, view_md=view_md)
|
|
140
144
|
if tbl_version is None:
|
|
141
145
|
# this is purely a snapshot: we use the base's tbl version path
|
|
142
|
-
view = cls(id, dir_id, name, base_version_path, base, snapshot_only=True)
|
|
146
|
+
view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
|
|
143
147
|
_logger.info(f'created snapshot {name}')
|
|
144
148
|
else:
|
|
145
149
|
view = cls(
|
|
146
|
-
id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base,
|
|
150
|
+
id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
|
|
147
151
|
snapshot_only=False)
|
|
148
152
|
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
149
153
|
|
|
@@ -156,7 +160,7 @@ class View(Table):
|
|
|
156
160
|
session.commit()
|
|
157
161
|
cat = Catalog.get()
|
|
158
162
|
cat.tbl_dependents[view._id] = []
|
|
159
|
-
cat.tbl_dependents[base.
|
|
163
|
+
cat.tbl_dependents[base.tbl_id()].append(view)
|
|
160
164
|
cat.tbls[view._id] = view
|
|
161
165
|
return view
|
|
162
166
|
|
|
@@ -200,7 +204,7 @@ class View(Table):
|
|
|
200
204
|
del cat.tbls[self._id]
|
|
201
205
|
else:
|
|
202
206
|
super()._drop()
|
|
203
|
-
cat.tbl_dependents[self.
|
|
207
|
+
cat.tbl_dependents[self._base_id].remove(self)
|
|
204
208
|
del cat.tbl_dependents[self._id]
|
|
205
209
|
|
|
206
210
|
def insert(
|
pixeltable/dataframe.py
CHANGED
|
@@ -1,32 +1,27 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import base64
|
|
4
3
|
import copy
|
|
5
4
|
import hashlib
|
|
6
|
-
import io
|
|
7
5
|
import json
|
|
8
6
|
import logging
|
|
9
7
|
import mimetypes
|
|
10
8
|
import traceback
|
|
11
9
|
from pathlib import Path
|
|
12
|
-
from typing import List, Optional, Any, Dict, Iterator, Tuple, Set
|
|
10
|
+
from typing import List, Optional, Any, Dict, Iterator, Tuple, Set, Callable
|
|
13
11
|
|
|
14
|
-
import PIL.Image
|
|
15
|
-
import cv2
|
|
16
12
|
import pandas as pd
|
|
17
13
|
import pandas.io.formats.style
|
|
18
14
|
import sqlalchemy as sql
|
|
19
|
-
from PIL import Image
|
|
20
15
|
|
|
21
16
|
import pixeltable.catalog as catalog
|
|
22
17
|
import pixeltable.exceptions as excs
|
|
23
18
|
import pixeltable.exprs as exprs
|
|
24
|
-
import pixeltable.type_system as ts
|
|
25
|
-
import pixeltable.func as func
|
|
26
19
|
from pixeltable.catalog import is_valid_identifier
|
|
20
|
+
from pixeltable.catalog.globals import UpdateStatus
|
|
27
21
|
from pixeltable.env import Env
|
|
28
22
|
from pixeltable.plan import Planner
|
|
29
23
|
from pixeltable.type_system import ColumnType
|
|
24
|
+
from pixeltable.utils.formatter import Formatter
|
|
30
25
|
from pixeltable.utils.http_server import get_file_uri
|
|
31
26
|
|
|
32
27
|
__all__ = ['DataFrame']
|
|
@@ -47,12 +42,7 @@ class DataFrameResultSet:
|
|
|
47
42
|
self._rows = rows
|
|
48
43
|
self._col_names = col_names
|
|
49
44
|
self._col_types = col_types
|
|
50
|
-
self.
|
|
51
|
-
ts.ImageType: self._format_img,
|
|
52
|
-
ts.VideoType: self._format_video,
|
|
53
|
-
ts.AudioType: self._format_audio,
|
|
54
|
-
ts.DocumentType: self._format_document,
|
|
55
|
-
}
|
|
45
|
+
self.__formatter = Formatter(len(self._rows), len(self._col_names), Env.get().http_address)
|
|
56
46
|
|
|
57
47
|
def __len__(self) -> int:
|
|
58
48
|
return len(self._rows)
|
|
@@ -67,11 +57,11 @@ class DataFrameResultSet:
|
|
|
67
57
|
return self.to_pandas().__repr__()
|
|
68
58
|
|
|
69
59
|
def _repr_html_(self) -> str:
|
|
70
|
-
formatters = {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if
|
|
74
|
-
|
|
60
|
+
formatters: dict[str, Callable] = {}
|
|
61
|
+
for col_name, col_type in zip(self._col_names, self._col_types):
|
|
62
|
+
formatter = self.__formatter.get_pandas_formatter(col_type)
|
|
63
|
+
if formatter is not None:
|
|
64
|
+
formatters[col_name] = formatter
|
|
75
65
|
return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
|
|
76
66
|
|
|
77
67
|
def __str__(self) -> str:
|
|
@@ -87,100 +77,6 @@ class DataFrameResultSet:
|
|
|
87
77
|
def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
|
|
88
78
|
return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
|
|
89
79
|
|
|
90
|
-
# Formatters
|
|
91
|
-
def _format_img(self, img: Image.Image) -> str:
|
|
92
|
-
"""
|
|
93
|
-
Create <img> tag for Image object.
|
|
94
|
-
"""
|
|
95
|
-
assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
|
|
96
|
-
# Try to make it look decent in a variety of display scenarios
|
|
97
|
-
if len(self._rows) > 1:
|
|
98
|
-
width = 240 # Multiple rows: display small images
|
|
99
|
-
elif len(self._col_names) > 1:
|
|
100
|
-
width = 480 # Multiple columns: display medium images
|
|
101
|
-
else:
|
|
102
|
-
width = 640 # A single image: larger display
|
|
103
|
-
with io.BytesIO() as buffer:
|
|
104
|
-
img.save(buffer, 'jpeg')
|
|
105
|
-
img_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
106
|
-
return f"""
|
|
107
|
-
<div class="pxt_image" style="width:{width}px;">
|
|
108
|
-
<img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
|
|
109
|
-
</div>
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
def _format_video(self, file_path: str) -> str:
|
|
113
|
-
thumb_tag = ''
|
|
114
|
-
# Attempt to extract the first frame of the video to use as a thumbnail,
|
|
115
|
-
# so that the notebook can be exported as HTML and viewed in contexts where
|
|
116
|
-
# the video itself is not accessible.
|
|
117
|
-
# TODO(aaron-siegel): If the video is backed by a concrete external URL,
|
|
118
|
-
# should we link to that instead?
|
|
119
|
-
video_reader = cv2.VideoCapture(str(file_path))
|
|
120
|
-
if video_reader.isOpened():
|
|
121
|
-
status, img_array = video_reader.read()
|
|
122
|
-
if status:
|
|
123
|
-
img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
|
|
124
|
-
thumb = PIL.Image.fromarray(img_array)
|
|
125
|
-
with io.BytesIO() as buffer:
|
|
126
|
-
thumb.save(buffer, 'jpeg')
|
|
127
|
-
thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
128
|
-
thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
|
|
129
|
-
video_reader.release()
|
|
130
|
-
if len(self._rows) > 1:
|
|
131
|
-
width = 320
|
|
132
|
-
elif len(self._col_names) > 1:
|
|
133
|
-
width = 480
|
|
134
|
-
else:
|
|
135
|
-
width = 800
|
|
136
|
-
return f"""
|
|
137
|
-
<div class="pxt_video" style="width:{width}px;">
|
|
138
|
-
<video controls width="{width}" {thumb_tag}>
|
|
139
|
-
{_create_source_tag(file_path)}
|
|
140
|
-
</video>
|
|
141
|
-
</div>
|
|
142
|
-
"""
|
|
143
|
-
|
|
144
|
-
def _format_document(self, file_path: str) -> str:
|
|
145
|
-
max_width = max_height = 320
|
|
146
|
-
# by default, file path will be shown as a link
|
|
147
|
-
inner_element = file_path
|
|
148
|
-
# try generating a thumbnail for different types and use that if successful
|
|
149
|
-
if file_path.lower().endswith('.pdf'):
|
|
150
|
-
try:
|
|
151
|
-
import fitz
|
|
152
|
-
|
|
153
|
-
doc = fitz.open(file_path)
|
|
154
|
-
p = doc.get_page_pixmap(0)
|
|
155
|
-
while p.width > max_width or p.height > max_height:
|
|
156
|
-
# shrink(1) will halve each dimension
|
|
157
|
-
p.shrink(1)
|
|
158
|
-
data = p.tobytes(output='jpeg')
|
|
159
|
-
thumb_base64 = base64.b64encode(data).decode()
|
|
160
|
-
img_src = f'data:image/jpeg;base64,{thumb_base64}'
|
|
161
|
-
inner_element = f"""
|
|
162
|
-
<img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
|
|
163
|
-
"""
|
|
164
|
-
except:
|
|
165
|
-
logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
|
|
166
|
-
|
|
167
|
-
return f"""
|
|
168
|
-
<div class="pxt_document" style="width:{max_width}px;">
|
|
169
|
-
<a href="{get_file_uri(Env.get().http_address, file_path)}">
|
|
170
|
-
{inner_element}
|
|
171
|
-
</a>
|
|
172
|
-
</div>
|
|
173
|
-
"""
|
|
174
|
-
|
|
175
|
-
def _format_audio(self, file_path: str) -> str:
|
|
176
|
-
return f"""
|
|
177
|
-
<div class="pxt_audio">
|
|
178
|
-
<audio controls>
|
|
179
|
-
{_create_source_tag(file_path)}
|
|
180
|
-
</audio>
|
|
181
|
-
</div>
|
|
182
|
-
"""
|
|
183
|
-
|
|
184
80
|
def __getitem__(self, index: Any) -> Any:
|
|
185
81
|
if isinstance(index, str):
|
|
186
82
|
if index not in self._col_names:
|
|
@@ -595,7 +491,7 @@ class DataFrame:
|
|
|
595
491
|
raise excs.Error(f'Invalid name: {name}')
|
|
596
492
|
base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
|
|
597
493
|
if len(base_list) == 0:
|
|
598
|
-
|
|
494
|
+
return self
|
|
599
495
|
|
|
600
496
|
# analyze select list; wrap literals with the corresponding expressions
|
|
601
497
|
select_list = []
|
|
@@ -662,7 +558,7 @@ class DataFrame:
|
|
|
662
558
|
# we need to make sure that the grouping table is a base of self.tbl
|
|
663
559
|
base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
|
|
664
560
|
if base is None or base.id == self.tbl.tbl_id():
|
|
665
|
-
raise excs.Error(f'group_by(): {item.
|
|
561
|
+
raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
|
|
666
562
|
grouping_tbl = item._tbl_version_path.tbl_version
|
|
667
563
|
break
|
|
668
564
|
if not isinstance(item, exprs.Expr):
|
|
@@ -708,6 +604,27 @@ class DataFrame:
|
|
|
708
604
|
limit=n,
|
|
709
605
|
)
|
|
710
606
|
|
|
607
|
+
def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
|
|
608
|
+
self._validate_mutable('update')
|
|
609
|
+
return self.tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
|
|
610
|
+
|
|
611
|
+
def delete(self) -> UpdateStatus:
|
|
612
|
+
self._validate_mutable('delete')
|
|
613
|
+
if not self.tbl.is_insertable():
|
|
614
|
+
raise excs.Error(f'Cannot delete from view')
|
|
615
|
+
return self.tbl.tbl_version.delete(where=self.where_clause)
|
|
616
|
+
|
|
617
|
+
def _validate_mutable(self, op_name: str) -> None:
|
|
618
|
+
"""Tests whether this `DataFrame` can be mutated (such as by an update operation)."""
|
|
619
|
+
if self.group_by_clause is not None or self.grouping_tbl is not None:
|
|
620
|
+
raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
|
|
621
|
+
if self.order_by_clause is not None:
|
|
622
|
+
raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
|
|
623
|
+
if self.select_list is not None:
|
|
624
|
+
raise excs.Error(f'Cannot use `{op_name}` after `select`')
|
|
625
|
+
if self.limit_val is not None:
|
|
626
|
+
raise excs.Error(f'Cannot use `{op_name}` after `limit`')
|
|
627
|
+
|
|
711
628
|
def __getitem__(self, index: object) -> DataFrame:
|
|
712
629
|
"""
|
|
713
630
|
Allowed:
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -63,14 +63,9 @@ class ColumnRef(Expr):
|
|
|
63
63
|
|
|
64
64
|
return super().__getattr__(name)
|
|
65
65
|
|
|
66
|
-
def similarity(self,
|
|
67
|
-
# if isinstance(other, Expr):
|
|
68
|
-
# raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
|
-
item = Expr.from_object(other)
|
|
70
|
-
if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
|
|
71
|
-
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
|
|
66
|
+
def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
|
|
72
67
|
from .similarity_expr import SimilarityExpr
|
|
73
|
-
return SimilarityExpr(self, item)
|
|
68
|
+
return SimilarityExpr(self, item, idx_name=idx)
|
|
74
69
|
|
|
75
70
|
def default_column_name(self) -> Optional[str]:
|
|
76
71
|
return str(self)
|