PyPI - pixeltable - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

pixeltable 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (27) hide show

pixeltable/__init__.py +2 -2
pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +3 -3
pixeltable/catalog/globals.py +2 -0
pixeltable/catalog/insertable_table.py +1 -11
pixeltable/catalog/schema_object.py +28 -2
pixeltable/catalog/table.py +76 -97
pixeltable/catalog/table_version.py +96 -58
pixeltable/catalog/table_version_path.py +1 -1
pixeltable/catalog/view.py +31 -27
pixeltable/dataframe.py +32 -115
pixeltable/exprs/column_ref.py +2 -7
pixeltable/exprs/similarity_expr.py +27 -16
pixeltable/functions/openai.py +1 -1
pixeltable/globals.py +70 -53
pixeltable/index/embedding_index.py +28 -27
pixeltable/io/external_store.py +2 -2
pixeltable/io/globals.py +1 -1
pixeltable/io/label_studio.py +3 -3
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_17.py +26 -0
pixeltable/tool/create_test_db_dump.py +1 -1
pixeltable/utils/formatter.py +234 -0
{pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/METADATA +4 -4
{pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/RECORD +27 -25
{pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/LICENSE +0 -0
{pixeltable-0.2.10.dist-info → pixeltable-0.2.12.dist-info}/WHEEL +0 -0

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -5,29 +5,31 @@ import importlib
 import inspect
 import logging
 import time
-from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
 import uuid
+from typing import Optional, List, Dict, Any, Tuple, Type, Iterable
 from uuid import UUID
 import sqlalchemy as sql
 import sqlalchemy.orm as orm
 import pixeltable
-import pixeltable.func as func
-import pixeltable.type_system as ts
 import pixeltable.exceptions as excs
+import pixeltable.exprs as exprs
+import pixeltable.func as func
 import pixeltable.index as index
+import pixeltable.type_system as ts
 from pixeltable.env import Env
 from pixeltable.iterators import ComponentIterator
 from pixeltable.metadata import schema
 from pixeltable.utils.filecache import FileCache
 from pixeltable.utils.media_store import MediaStore
 from .column import Column
-from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
+from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
 from ..func.globals import resolve_symbol
 _logger = logging.getLogger('pixeltable')
 class TableVersion:
     """
     TableVersion represents a particular version of a table/view along with its physical representation:
@@ -243,7 +245,6 @@ class TableVersion:
     def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
         """Initialize self.cols with the columns visible in our effective version"""
         import pixeltable.exprs as exprs
-        from pixeltable.catalog import Catalog
         self.cols = []
         self.cols_by_name = {}
@@ -539,39 +540,12 @@ class TableVersion:
             num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
             cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
-    def drop_column(self, name: str) -> None:
+    def drop_column(self, col: Column) -> None:
         """Drop a column from the table.
         """
         from pixeltable.catalog import Catalog
         assert not self.is_snapshot
-        if name not in self.cols_by_name:
-            raise excs.Error(f'Unknown column: {name}')
-        col = self.cols_by_name[name]
-        dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
-        if len(dependent_user_cols) > 0:
-            raise excs.Error(
-                f'Cannot drop column `{name}` because the following columns depend on it:\n'
-                f'{", ".join(c.name for c in dependent_user_cols)}'
-            )
-        # See if this column has a dependent store. We need to look through all stores in all
-        # (transitive) views of this table.
-        transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
-        dependent_stores = [
-            (view, store)
-            for view in transitive_views
-            for store in view._tbl_version.external_stores.values()
-            if col in store.get_local_columns()
-        ]
-        if len(dependent_stores) > 0:
-            dependent_store_names = [
-                store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
-                for view, store in dependent_stores
-            ]
-            raise excs.Error(
-                f'Cannot drop column `{name}` because the following external stores depend on it:\n'
-                f'{", ".join(dependent_store_names)}'
-            )
         # we're creating a new schema version
         self.version += 1
@@ -595,7 +569,7 @@ class TableVersion:
                 del self.idxs_by_name[idx_name]
             self._drop_columns(dropped_cols)
             self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
-        _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
+        _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
     def _drop_columns(self, cols: Iterable[Column]) -> None:
         """Mark columns as dropped"""
@@ -704,15 +678,34 @@ class TableVersion:
         return result
     def update(
-            self, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
-            where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
+        self, value_spec: dict[str, Any], where: Optional['exprs.Predicate'] = None, cascade: bool = True
     ) -> UpdateStatus:
+        """Update rows in this TableVersionPath.
+        Args:
+            value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
+            where: a Predicate to filter rows to update.
+            cascade: if True, also update all computed columns that transitively depend on the updated columns,
+                including within views.
+        """
+        if self.is_snapshot:
+            raise excs.Error('Cannot update a snapshot')
+        from pixeltable.plan import Planner
+        update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
+        if where is not None:
+            if not isinstance(where, exprs.Predicate):
+                raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
+            analysis_info = Planner.analyze(self.path, where)
+            # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
+            if analysis_info.filter is not None:
+                raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
         with Env.get().engine.begin() as conn:
-            return self._update(conn, update_targets, where_clause, cascade)
+            return self._update(conn, update_spec, where, cascade)
     def batch_update(
-            self, batch: list[dict[Column, 'pixeltable.exprs.Expr']], rowids: list[Tuple[int, ...]],
-            cascade: bool = True
+            self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
     ) -> UpdateStatus:
         """Update rows in batch.
         Args:
@@ -721,7 +714,6 @@ class TableVersion:
         """
         # if we do lookups of rowids, we must have one for each row in the batch
         assert len(rowids) == 0 or len(rowids) == len(batch)
-        import pixeltable.exprs as exprs
         result_status = UpdateStatus()
         cols_with_excs: set[str] = set()
         updated_cols: set[str] = set()
@@ -768,24 +760,61 @@ class TableVersion:
             where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
             show_progress: bool = True
     ) -> UpdateStatus:
-        """Update rows in this table.
-        Args:
-            update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
-            where_clause: a Predicate to filter rows to update.
-            cascade: if True, also update all computed columns that transitively depend on the updated columns,
-                including within views.
-        """
-        assert not self.is_snapshot
         from pixeltable.plan import Planner
-        plan, updated_cols, recomputed_cols = \
+        plan, updated_cols, recomputed_cols = (
             Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
-        result = self._propagate_update(
+        )
+        result = self.propagate_update(
             plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
             base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
         result.updated_cols = updated_cols
         return result
-    def _propagate_update(
+    def _validate_update_spec(
+            self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
+    ) -> dict[Column, 'exprs.Expr']:
+        update_targets: dict[Column, exprs.Expr] = {}
+        for col_name, val in value_spec.items():
+            if not isinstance(col_name, str):
+                raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
+            if col_name == _ROWID_COLUMN_NAME:
+                # ignore pseudo-column _rowid
+                continue
+            col = self.path.get_column(col_name, include_bases=False)
+            if col is None:
+                # TODO: return more informative error if this is trying to update a base column
+                raise excs.Error(f'Column {col_name} unknown')
+            if col.is_computed:
+                raise excs.Error(f'Column {col_name} is computed and cannot be updated')
+            if col.is_pk and not allow_pk:
+                raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
+            if col.col_type.is_media_type():
+                raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
+            # make sure that the value is compatible with the column type
+            try:
+                # check if this is a literal
+                value_expr = exprs.Literal(val, col_type=col.col_type)
+            except TypeError:
+                if not allow_exprs:
+                    raise excs.Error(
+                        f'Column {col_name}: value {val!r} is not a valid literal for this column '
+                        f'(expected {col.col_type})')
+                # it's not a literal, let's try to create an expr from it
+                value_expr = exprs.Expr.from_object(val)
+                if value_expr is None:
+                    raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
+                if not col.col_type.matches(value_expr.col_type):
+                    raise excs.Error((
+                        f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
+                        f'{col_name} ({col.col_type})'
+                    ))
+            update_targets[col] = value_expr
+        return update_targets
+    def propagate_update(
             self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
             recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
             timestamp: float, cascade: bool, show_progress: bool = True
@@ -810,7 +839,7 @@ class TableVersion:
                 if len(recomputed_cols) > 0:
                     from pixeltable.plan import Planner
                     plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
-                status = view._propagate_update(
+                status = view.propagate_update(
                     plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
                 result.num_rows += status.num_rows
                 result.num_excs += status.num_excs
@@ -819,22 +848,31 @@ class TableVersion:
         result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys())  # remove duplicates
         return result
-    def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
+    def delete(self, where: Optional['exprs.Predicate'] = None) -> UpdateStatus:
         """Delete rows in this table.
         Args:
             where: a Predicate to filter rows to delete.
         """
         assert self.is_insertable()
+        from pixeltable.exprs import Predicate
         from pixeltable.plan import Planner
-        analysis_info = Planner.analyze(self, where)
+        if where is not None:
+            if not isinstance(where, Predicate):
+                raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
+            analysis_info = Planner.analyze(self.path, where)
+            # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
+            if analysis_info.filter is not None:
+                raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
+        analysis_info = Planner.analyze(self.path, where)
         with Env.get().engine.begin() as conn:
-            num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
+            num_rows = self.propagate_delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
         status = UpdateStatus(num_rows=num_rows)
         return status
-    def _delete(
-            self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
+    def propagate_delete(
+            self, where: Optional['exprs.Predicate'], base_versions: List[Optional[int]],
             conn: sql.engine.Connection, timestamp: float) -> int:
         """Delete rows in this table and propagate to views.
         Args:
@@ -853,7 +891,7 @@ class TableVersion:
         else:
             pass
         for view in self.mutable_views:
-            num_rows += view._delete(
+            num_rows += view.propagate_delete(
                 where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
         return num_rows

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -5,13 +5,13 @@ from typing import Optional, Union
 from uuid import UUID
 import pixeltable
-import pixeltable.catalog as catalog
 from .column import Column
 from .globals import POS_COLUMN_NAME
 from .table_version import TableVersion
 _logger = logging.getLogger('pixeltable')
 class TableVersionPath:
     """
     A TableVersionPath represents the sequence of TableVersions from a base table to a particular view:

pixeltable/catalog/view.py CHANGED Viewed

@@ -1,30 +1,33 @@
 from __future__ import annotations
+import inspect
 import logging
-from typing import List, Optional, Type, Dict, Set, Any, Iterable
+from typing import Optional, Type, Dict, Set, Any, Iterable, TYPE_CHECKING
 from uuid import UUID
-import inspect
 import sqlalchemy.orm as orm
-from .table import Table
-from .table_version import TableVersion
-from .table_version_path import TableVersionPath
-from .column import Column
-from .catalog import Catalog
-from .globals import POS_COLUMN_NAME, UpdateStatus
-from pixeltable.env import Env
-from pixeltable.iterators import ComponentIterator
-from pixeltable.exceptions import Error
-import pixeltable.func as func
-import pixeltable.type_system as ts
 import pixeltable.catalog as catalog
+import pixeltable.exceptions as excs
+import pixeltable.func as func
 import pixeltable.metadata.schema as md_schema
+from pixeltable.env import Env
+from pixeltable.exceptions import Error
+from pixeltable.iterators import ComponentIterator
 from pixeltable.type_system import InvalidType, IntType
-import pixeltable.exceptions as excs
+from .catalog import Catalog
+from .column import Column
+from .globals import POS_COLUMN_NAME, UpdateStatus
+from .table import Table
+from .table_version import TableVersion
+from .table_version_path import TableVersionPath
+if TYPE_CHECKING:
+    import pixeltable as pxt
 _logger = logging.getLogger('pixeltable')
 class View(Table):
     """A `Table` that presents a virtual view of another table (or view).
@@ -34,10 +37,11 @@ class View(Table):
     is simply a reference to a specific set of base versions.
     """
     def __init__(
-            self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base: Table,
+            self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
             snapshot_only: bool):
         super().__init__(id, dir_id, name, tbl_version_path)
-        self._base = base  # keep a reference to the base Table, so that we can keep track of its dependents
+        assert base_id in catalog.Catalog.get().tbl_dependents
+        self._base_id = base_id  # keep a reference to the base Table ID, so that we can keep track of its dependents
         self._snapshot_only = snapshot_only
     @classmethod
@@ -46,8 +50,8 @@ class View(Table):
     @classmethod
     def create(
-            cls, dir_id: UUID, name: str, base: Table, schema: Dict[str, Any],
-            predicate: 'exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
+            cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
+            predicate: 'pxt.exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
             iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
     ) -> View:
         columns = cls._create_columns(schema)
@@ -55,8 +59,8 @@ class View(Table):
         # verify that filter can be evaluated in the context of the base
         if predicate is not None:
-            if not predicate.is_bound_by(base._tbl_version_path):
-                raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
+            if not predicate.is_bound_by(base):
+                raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
             # create a copy that we can modify and store
             predicate = predicate.copy()
@@ -65,9 +69,9 @@ class View(Table):
             if not col.is_computed:
                 continue
             # make sure that the value can be computed in the context of the base
-            if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
+            if col.value_expr is not None and not col.value_expr.is_bound_by(base):
                 raise excs.Error(
-                    f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
+                    f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
         if iterator_cls is not None:
             assert iterator_args is not None
@@ -114,7 +118,7 @@ class View(Table):
             iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
             iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
                 else None
-            base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
+            base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
             base_versions = [
                 (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
                 for tbl_version in base_version_path.get_tbl_versions()
@@ -139,11 +143,11 @@ class View(Table):
                 session, dir_id, name, columns, num_retained_versions, comment, base_path=base_version_path, view_md=view_md)
             if tbl_version is None:
                 # this is purely a snapshot: we use the base's tbl version path
-                view = cls(id, dir_id, name, base_version_path, base, snapshot_only=True)
+                view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
                 _logger.info(f'created snapshot {name}')
             else:
                 view = cls(
-                    id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base,
+                    id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
                     snapshot_only=False)
                 _logger.info(f'Created view `{name}`, id={tbl_version.id}')
@@ -156,7 +160,7 @@ class View(Table):
             session.commit()
             cat = Catalog.get()
             cat.tbl_dependents[view._id] = []
-            cat.tbl_dependents[base._id].append(view)
+            cat.tbl_dependents[base.tbl_id()].append(view)
             cat.tbls[view._id] = view
             return view
@@ -200,7 +204,7 @@ class View(Table):
             del cat.tbls[self._id]
         else:
             super()._drop()
-        cat.tbl_dependents[self._base._id].remove(self)
+        cat.tbl_dependents[self._base_id].remove(self)
         del cat.tbl_dependents[self._id]
     def insert(

pixeltable/dataframe.py CHANGED Viewed

@@ -1,32 +1,27 @@
 from __future__ import annotations
-import base64
 import copy
 import hashlib
-import io
 import json
 import logging
 import mimetypes
 import traceback
 from pathlib import Path
-from typing import List, Optional, Any, Dict, Iterator, Tuple, Set
+from typing import List, Optional, Any, Dict, Iterator, Tuple, Set, Callable
-import PIL.Image
-import cv2
 import pandas as pd
 import pandas.io.formats.style
 import sqlalchemy as sql
-from PIL import Image
 import pixeltable.catalog as catalog
 import pixeltable.exceptions as excs
 import pixeltable.exprs as exprs
-import pixeltable.type_system as ts
-import pixeltable.func as func
 from pixeltable.catalog import is_valid_identifier
+from pixeltable.catalog.globals import UpdateStatus
 from pixeltable.env import Env
 from pixeltable.plan import Planner
 from pixeltable.type_system import ColumnType
+from pixeltable.utils.formatter import Formatter
 from pixeltable.utils.http_server import get_file_uri
 __all__ = ['DataFrame']
@@ -47,12 +42,7 @@ class DataFrameResultSet:
         self._rows = rows
         self._col_names = col_names
         self._col_types = col_types
-        self._formatters = {
-            ts.ImageType: self._format_img,
-            ts.VideoType: self._format_video,
-            ts.AudioType: self._format_audio,
-            ts.DocumentType: self._format_document,
-        }
+        self.__formatter = Formatter(len(self._rows), len(self._col_names), Env.get().http_address)
     def __len__(self) -> int:
         return len(self._rows)
@@ -67,11 +57,11 @@ class DataFrameResultSet:
         return self.to_pandas().__repr__()
     def _repr_html_(self) -> str:
-        formatters = {
-            col_name: self._formatters[col_type.__class__]
-            for col_name, col_type in zip(self._col_names, self._col_types)
-            if col_type.__class__ in self._formatters
-        }
+        formatters: dict[str, Callable] = {}
+        for col_name, col_type in zip(self._col_names, self._col_types):
+            formatter = self.__formatter.get_pandas_formatter(col_type)
+            if formatter is not None:
+                formatters[col_name] = formatter
         return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
     def __str__(self) -> str:
@@ -87,100 +77,6 @@ class DataFrameResultSet:
     def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
-    # Formatters
-    def _format_img(self, img: Image.Image) -> str:
-        """
-        Create <img> tag for Image object.
-        """
-        assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
-        # Try to make it look decent in a variety of display scenarios
-        if len(self._rows) > 1:
-            width = 240  # Multiple rows: display small images
-        elif len(self._col_names) > 1:
-            width = 480  # Multiple columns: display medium images
-        else:
-            width = 640  # A single image: larger display
-        with io.BytesIO() as buffer:
-            img.save(buffer, 'jpeg')
-            img_base64 = base64.b64encode(buffer.getvalue()).decode()
-            return f"""
-            <div class="pxt_image" style="width:{width}px;">
-                <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
-            </div>
-            """
-    def _format_video(self, file_path: str) -> str:
-        thumb_tag = ''
-        # Attempt to extract the first frame of the video to use as a thumbnail,
-        # so that the notebook can be exported as HTML and viewed in contexts where
-        # the video itself is not accessible.
-        # TODO(aaron-siegel): If the video is backed by a concrete external URL,
-        # should we link to that instead?
-        video_reader = cv2.VideoCapture(str(file_path))
-        if video_reader.isOpened():
-            status, img_array = video_reader.read()
-            if status:
-                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
-                thumb = PIL.Image.fromarray(img_array)
-                with io.BytesIO() as buffer:
-                    thumb.save(buffer, 'jpeg')
-                    thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
-                    thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
-            video_reader.release()
-        if len(self._rows) > 1:
-            width = 320
-        elif len(self._col_names) > 1:
-            width = 480
-        else:
-            width = 800
-        return f"""
-        <div class="pxt_video" style="width:{width}px;">
-            <video controls width="{width}" {thumb_tag}>
-                {_create_source_tag(file_path)}
-            </video>
-        </div>
-        """
-    def _format_document(self, file_path: str) -> str:
-        max_width = max_height = 320
-        # by default, file path will be shown as a link
-        inner_element = file_path
-        # try generating a thumbnail for different types and use that if successful
-        if file_path.lower().endswith('.pdf'):
-            try:
-                import fitz
-                doc = fitz.open(file_path)
-                p = doc.get_page_pixmap(0)
-                while p.width > max_width or p.height > max_height:
-                    # shrink(1) will halve each dimension
-                    p.shrink(1)
-                data = p.tobytes(output='jpeg')
-                thumb_base64 = base64.b64encode(data).decode()
-                img_src = f'data:image/jpeg;base64,{thumb_base64}'
-                inner_element = f"""
-                    <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
-                """
-            except:
-                logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
-        return f"""
-        <div class="pxt_document" style="width:{max_width}px;">
-            <a href="{get_file_uri(Env.get().http_address, file_path)}">
-                {inner_element}
-            </a>
-        </div>
-        """
-    def _format_audio(self, file_path: str) -> str:
-        return f"""
-        <div class="pxt_audio">
-            <audio controls>
-                {_create_source_tag(file_path)}
-            </audio>
-        </div>
-        """
     def __getitem__(self, index: Any) -> Any:
         if isinstance(index, str):
             if index not in self._col_names:
@@ -595,7 +491,7 @@ class DataFrame:
                 raise excs.Error(f'Invalid name: {name}')
         base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
         if len(base_list) == 0:
-            raise excs.Error(f'Empty select list')
+            return self
         # analyze select list; wrap literals with the corresponding expressions
         select_list = []
@@ -662,7 +558,7 @@ class DataFrame:
                 # we need to make sure that the grouping table is a base of self.tbl
                 base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
                 if base is None or base.id == self.tbl.tbl_id():
-                    raise excs.Error(f'group_by(): {item.get_name()} is not a base table of {self.tbl.tbl_name()}')
+                    raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
                 grouping_tbl = item._tbl_version_path.tbl_version
                 break
             if not isinstance(item, exprs.Expr):
@@ -708,6 +604,27 @@ class DataFrame:
             limit=n,
         )
+    def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
+        self._validate_mutable('update')
+        return self.tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
+    def delete(self) -> UpdateStatus:
+        self._validate_mutable('delete')
+        if not self.tbl.is_insertable():
+            raise excs.Error(f'Cannot delete from view')
+        return self.tbl.tbl_version.delete(where=self.where_clause)
+    def _validate_mutable(self, op_name: str) -> None:
+        """Tests whether this `DataFrame` can be mutated (such as by an update operation)."""
+        if self.group_by_clause is not None or self.grouping_tbl is not None:
+            raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
+        if self.order_by_clause is not None:
+            raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
+        if self.select_list is not None:
+            raise excs.Error(f'Cannot use `{op_name}` after `select`')
+        if self.limit_val is not None:
+            raise excs.Error(f'Cannot use `{op_name}` after `limit`')
     def __getitem__(self, index: object) -> DataFrame:
         """
         Allowed:

pixeltable/exprs/column_ref.py CHANGED Viewed

@@ -63,14 +63,9 @@ class ColumnRef(Expr):
         return super().__getattr__(name)
-    def similarity(self, other: Any) -> Expr:
-        # if isinstance(other, Expr):
-        #     raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
-        item = Expr.from_object(other)
-        if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
-            raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
+    def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
         from .similarity_expr import SimilarityExpr
-        return SimilarityExpr(self, item)
+        return SimilarityExpr(self, item, idx_name=idx)
     def default_column_name(self) -> Optional[str]:
         return str(self)

pixeltable 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl