pixeltable 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

@@ -5,29 +5,31 @@ import importlib
5
5
  import inspect
6
6
  import logging
7
7
  import time
8
- from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
9
8
  import uuid
9
+ from typing import Optional, List, Dict, Any, Tuple, Type, Iterable
10
10
  from uuid import UUID
11
11
 
12
12
  import sqlalchemy as sql
13
13
  import sqlalchemy.orm as orm
14
14
 
15
15
  import pixeltable
16
- import pixeltable.func as func
17
- import pixeltable.type_system as ts
18
16
  import pixeltable.exceptions as excs
17
+ import pixeltable.exprs as exprs
18
+ import pixeltable.func as func
19
19
  import pixeltable.index as index
20
+ import pixeltable.type_system as ts
20
21
  from pixeltable.env import Env
21
22
  from pixeltable.iterators import ComponentIterator
22
23
  from pixeltable.metadata import schema
23
24
  from pixeltable.utils.filecache import FileCache
24
25
  from pixeltable.utils.media_store import MediaStore
25
26
  from .column import Column
26
- from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
27
+ from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
27
28
  from ..func.globals import resolve_symbol
28
29
 
29
30
  _logger = logging.getLogger('pixeltable')
30
31
 
32
+
31
33
  class TableVersion:
32
34
  """
33
35
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -243,7 +245,6 @@ class TableVersion:
243
245
  def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
244
246
  """Initialize self.cols with the columns visible in our effective version"""
245
247
  import pixeltable.exprs as exprs
246
- from pixeltable.catalog import Catalog
247
248
 
248
249
  self.cols = []
249
250
  self.cols_by_name = {}
@@ -539,39 +540,12 @@ class TableVersion:
539
540
  num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
540
541
  cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
541
542
 
542
- def drop_column(self, name: str) -> None:
543
+ def drop_column(self, col: Column) -> None:
543
544
  """Drop a column from the table.
544
545
  """
545
546
  from pixeltable.catalog import Catalog
546
547
 
547
548
  assert not self.is_snapshot
548
- if name not in self.cols_by_name:
549
- raise excs.Error(f'Unknown column: {name}')
550
- col = self.cols_by_name[name]
551
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
552
- if len(dependent_user_cols) > 0:
553
- raise excs.Error(
554
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
555
- f'{", ".join(c.name for c in dependent_user_cols)}'
556
- )
557
- # See if this column has a dependent store. We need to look through all stores in all
558
- # (transitive) views of this table.
559
- transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
560
- dependent_stores = [
561
- (view, store)
562
- for view in transitive_views
563
- for store in view._tbl_version.external_stores.values()
564
- if col in store.get_local_columns()
565
- ]
566
- if len(dependent_stores) > 0:
567
- dependent_store_names = [
568
- store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
569
- for view, store in dependent_stores
570
- ]
571
- raise excs.Error(
572
- f'Cannot drop column `{name}` because the following external stores depend on it:\n'
573
- f'{", ".join(dependent_store_names)}'
574
- )
575
549
 
576
550
  # we're creating a new schema version
577
551
  self.version += 1
@@ -595,7 +569,7 @@ class TableVersion:
595
569
  del self.idxs_by_name[idx_name]
596
570
  self._drop_columns(dropped_cols)
597
571
  self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
598
- _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
572
+ _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
599
573
 
600
574
  def _drop_columns(self, cols: Iterable[Column]) -> None:
601
575
  """Mark columns as dropped"""
@@ -704,15 +678,34 @@ class TableVersion:
704
678
  return result
705
679
 
706
680
  def update(
707
- self, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
708
- where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
681
+ self, value_spec: dict[str, Any], where: Optional['exprs.Predicate'] = None, cascade: bool = True
709
682
  ) -> UpdateStatus:
683
+ """Update rows in this TableVersionPath.
684
+ Args:
685
+ value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
686
+ where: a Predicate to filter rows to update.
687
+ cascade: if True, also update all computed columns that transitively depend on the updated columns,
688
+ including within views.
689
+ """
690
+ if self.is_snapshot:
691
+ raise excs.Error('Cannot update a snapshot')
692
+
693
+ from pixeltable.plan import Planner
694
+
695
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
696
+ if where is not None:
697
+ if not isinstance(where, exprs.Predicate):
698
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
699
+ analysis_info = Planner.analyze(self.path, where)
700
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
701
+ if analysis_info.filter is not None:
702
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
703
+
710
704
  with Env.get().engine.begin() as conn:
711
- return self._update(conn, update_targets, where_clause, cascade)
705
+ return self._update(conn, update_spec, where, cascade)
712
706
 
713
707
  def batch_update(
714
- self, batch: list[dict[Column, 'pixeltable.exprs.Expr']], rowids: list[Tuple[int, ...]],
715
- cascade: bool = True
708
+ self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
716
709
  ) -> UpdateStatus:
717
710
  """Update rows in batch.
718
711
  Args:
@@ -721,7 +714,6 @@ class TableVersion:
721
714
  """
722
715
  # if we do lookups of rowids, we must have one for each row in the batch
723
716
  assert len(rowids) == 0 or len(rowids) == len(batch)
724
- import pixeltable.exprs as exprs
725
717
  result_status = UpdateStatus()
726
718
  cols_with_excs: set[str] = set()
727
719
  updated_cols: set[str] = set()
@@ -768,24 +760,61 @@ class TableVersion:
768
760
  where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
769
761
  show_progress: bool = True
770
762
  ) -> UpdateStatus:
771
- """Update rows in this table.
772
- Args:
773
- update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
774
- where_clause: a Predicate to filter rows to update.
775
- cascade: if True, also update all computed columns that transitively depend on the updated columns,
776
- including within views.
777
- """
778
- assert not self.is_snapshot
779
763
  from pixeltable.plan import Planner
780
- plan, updated_cols, recomputed_cols = \
764
+
765
+ plan, updated_cols, recomputed_cols = (
781
766
  Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
782
- result = self._propagate_update(
767
+ )
768
+ result = self.propagate_update(
783
769
  plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
784
770
  base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
785
771
  result.updated_cols = updated_cols
786
772
  return result
787
773
 
788
- def _propagate_update(
774
+ def _validate_update_spec(
775
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
776
+ ) -> dict[Column, 'exprs.Expr']:
777
+ update_targets: dict[Column, exprs.Expr] = {}
778
+ for col_name, val in value_spec.items():
779
+ if not isinstance(col_name, str):
780
+ raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
781
+ if col_name == _ROWID_COLUMN_NAME:
782
+ # ignore pseudo-column _rowid
783
+ continue
784
+ col = self.path.get_column(col_name, include_bases=False)
785
+ if col is None:
786
+ # TODO: return more informative error if this is trying to update a base column
787
+ raise excs.Error(f'Column {col_name} unknown')
788
+ if col.is_computed:
789
+ raise excs.Error(f'Column {col_name} is computed and cannot be updated')
790
+ if col.is_pk and not allow_pk:
791
+ raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
792
+ if col.col_type.is_media_type():
793
+ raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
794
+
795
+ # make sure that the value is compatible with the column type
796
+ try:
797
+ # check if this is a literal
798
+ value_expr = exprs.Literal(val, col_type=col.col_type)
799
+ except TypeError:
800
+ if not allow_exprs:
801
+ raise excs.Error(
802
+ f'Column {col_name}: value {val!r} is not a valid literal for this column '
803
+ f'(expected {col.col_type})')
804
+ # it's not a literal, let's try to create an expr from it
805
+ value_expr = exprs.Expr.from_object(val)
806
+ if value_expr is None:
807
+ raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
808
+ if not col.col_type.matches(value_expr.col_type):
809
+ raise excs.Error((
810
+ f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
811
+ f'{col_name} ({col.col_type})'
812
+ ))
813
+ update_targets[col] = value_expr
814
+
815
+ return update_targets
816
+
817
+ def propagate_update(
789
818
  self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
790
819
  recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
791
820
  timestamp: float, cascade: bool, show_progress: bool = True
@@ -810,7 +839,7 @@ class TableVersion:
810
839
  if len(recomputed_cols) > 0:
811
840
  from pixeltable.plan import Planner
812
841
  plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
813
- status = view._propagate_update(
842
+ status = view.propagate_update(
814
843
  plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
815
844
  result.num_rows += status.num_rows
816
845
  result.num_excs += status.num_excs
@@ -819,22 +848,31 @@ class TableVersion:
819
848
  result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
820
849
  return result
821
850
 
822
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
851
+ def delete(self, where: Optional['exprs.Predicate'] = None) -> UpdateStatus:
823
852
  """Delete rows in this table.
824
853
  Args:
825
854
  where: a Predicate to filter rows to delete.
826
855
  """
827
856
  assert self.is_insertable()
857
+ from pixeltable.exprs import Predicate
828
858
  from pixeltable.plan import Planner
829
- analysis_info = Planner.analyze(self, where)
859
+ if where is not None:
860
+ if not isinstance(where, Predicate):
861
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
862
+ analysis_info = Planner.analyze(self.path, where)
863
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
864
+ if analysis_info.filter is not None:
865
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
866
+
867
+ analysis_info = Planner.analyze(self.path, where)
830
868
  with Env.get().engine.begin() as conn:
831
- num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
869
+ num_rows = self.propagate_delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
832
870
 
833
871
  status = UpdateStatus(num_rows=num_rows)
834
872
  return status
835
873
 
836
- def _delete(
837
- self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
874
+ def propagate_delete(
875
+ self, where: Optional['exprs.Predicate'], base_versions: List[Optional[int]],
838
876
  conn: sql.engine.Connection, timestamp: float) -> int:
839
877
  """Delete rows in this table and propagate to views.
840
878
  Args:
@@ -853,7 +891,7 @@ class TableVersion:
853
891
  else:
854
892
  pass
855
893
  for view in self.mutable_views:
856
- num_rows += view._delete(
894
+ num_rows += view.propagate_delete(
857
895
  where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
858
896
  return num_rows
859
897
 
@@ -5,13 +5,13 @@ from typing import Optional, Union
5
5
  from uuid import UUID
6
6
 
7
7
  import pixeltable
8
- import pixeltable.catalog as catalog
9
8
  from .column import Column
10
9
  from .globals import POS_COLUMN_NAME
11
10
  from .table_version import TableVersion
12
11
 
13
12
  _logger = logging.getLogger('pixeltable')
14
13
 
14
+
15
15
  class TableVersionPath:
16
16
  """
17
17
  A TableVersionPath represents the sequence of TableVersions from a base table to a particular view:
@@ -1,30 +1,33 @@
1
1
  from __future__ import annotations
2
+
3
+ import inspect
2
4
  import logging
3
- from typing import List, Optional, Type, Dict, Set, Any, Iterable
5
+ from typing import Optional, Type, Dict, Set, Any, Iterable, TYPE_CHECKING
4
6
  from uuid import UUID
5
- import inspect
6
7
 
7
8
  import sqlalchemy.orm as orm
8
9
 
9
- from .table import Table
10
- from .table_version import TableVersion
11
- from .table_version_path import TableVersionPath
12
- from .column import Column
13
- from .catalog import Catalog
14
- from .globals import POS_COLUMN_NAME, UpdateStatus
15
- from pixeltable.env import Env
16
- from pixeltable.iterators import ComponentIterator
17
- from pixeltable.exceptions import Error
18
- import pixeltable.func as func
19
- import pixeltable.type_system as ts
20
10
  import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.func as func
21
13
  import pixeltable.metadata.schema as md_schema
14
+ from pixeltable.env import Env
15
+ from pixeltable.exceptions import Error
16
+ from pixeltable.iterators import ComponentIterator
22
17
  from pixeltable.type_system import InvalidType, IntType
23
- import pixeltable.exceptions as excs
18
+ from .catalog import Catalog
19
+ from .column import Column
20
+ from .globals import POS_COLUMN_NAME, UpdateStatus
21
+ from .table import Table
22
+ from .table_version import TableVersion
23
+ from .table_version_path import TableVersionPath
24
24
 
25
+ if TYPE_CHECKING:
26
+ import pixeltable as pxt
25
27
 
26
28
  _logger = logging.getLogger('pixeltable')
27
29
 
30
+
28
31
  class View(Table):
29
32
  """A `Table` that presents a virtual view of another table (or view).
30
33
 
@@ -34,10 +37,11 @@ class View(Table):
34
37
  is simply a reference to a specific set of base versions.
35
38
  """
36
39
  def __init__(
37
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base: Table,
40
+ self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
38
41
  snapshot_only: bool):
39
42
  super().__init__(id, dir_id, name, tbl_version_path)
40
- self._base = base # keep a reference to the base Table, so that we can keep track of its dependents
43
+ assert base_id in catalog.Catalog.get().tbl_dependents
44
+ self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
41
45
  self._snapshot_only = snapshot_only
42
46
 
43
47
  @classmethod
@@ -46,8 +50,8 @@ class View(Table):
46
50
 
47
51
  @classmethod
48
52
  def create(
49
- cls, dir_id: UUID, name: str, base: Table, schema: Dict[str, Any],
50
- predicate: 'exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
53
+ cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
54
+ predicate: 'pxt.exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
51
55
  iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
52
56
  ) -> View:
53
57
  columns = cls._create_columns(schema)
@@ -55,8 +59,8 @@ class View(Table):
55
59
 
56
60
  # verify that filter can be evaluated in the context of the base
57
61
  if predicate is not None:
58
- if not predicate.is_bound_by(base._tbl_version_path):
59
- raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
62
+ if not predicate.is_bound_by(base):
63
+ raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
60
64
  # create a copy that we can modify and store
61
65
  predicate = predicate.copy()
62
66
 
@@ -65,9 +69,9 @@ class View(Table):
65
69
  if not col.is_computed:
66
70
  continue
67
71
  # make sure that the value can be computed in the context of the base
68
- if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
72
+ if col.value_expr is not None and not col.value_expr.is_bound_by(base):
69
73
  raise excs.Error(
70
- f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
74
+ f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
71
75
 
72
76
  if iterator_cls is not None:
73
77
  assert iterator_args is not None
@@ -114,7 +118,7 @@ class View(Table):
114
118
  iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
115
119
  iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
116
120
  else None
117
- base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
121
+ base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
118
122
  base_versions = [
119
123
  (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
120
124
  for tbl_version in base_version_path.get_tbl_versions()
@@ -139,11 +143,11 @@ class View(Table):
139
143
  session, dir_id, name, columns, num_retained_versions, comment, base_path=base_version_path, view_md=view_md)
140
144
  if tbl_version is None:
141
145
  # this is purely a snapshot: we use the base's tbl version path
142
- view = cls(id, dir_id, name, base_version_path, base, snapshot_only=True)
146
+ view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
143
147
  _logger.info(f'created snapshot {name}')
144
148
  else:
145
149
  view = cls(
146
- id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base,
150
+ id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
147
151
  snapshot_only=False)
148
152
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
149
153
 
@@ -156,7 +160,7 @@ class View(Table):
156
160
  session.commit()
157
161
  cat = Catalog.get()
158
162
  cat.tbl_dependents[view._id] = []
159
- cat.tbl_dependents[base._id].append(view)
163
+ cat.tbl_dependents[base.tbl_id()].append(view)
160
164
  cat.tbls[view._id] = view
161
165
  return view
162
166
 
@@ -200,7 +204,7 @@ class View(Table):
200
204
  del cat.tbls[self._id]
201
205
  else:
202
206
  super()._drop()
203
- cat.tbl_dependents[self._base._id].remove(self)
207
+ cat.tbl_dependents[self._base_id].remove(self)
204
208
  del cat.tbl_dependents[self._id]
205
209
 
206
210
  def insert(
pixeltable/dataframe.py CHANGED
@@ -1,32 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
- import base64
4
3
  import copy
5
4
  import hashlib
6
- import io
7
5
  import json
8
6
  import logging
9
7
  import mimetypes
10
8
  import traceback
11
9
  from pathlib import Path
12
- from typing import List, Optional, Any, Dict, Iterator, Tuple, Set
10
+ from typing import List, Optional, Any, Dict, Iterator, Tuple, Set, Callable
13
11
 
14
- import PIL.Image
15
- import cv2
16
12
  import pandas as pd
17
13
  import pandas.io.formats.style
18
14
  import sqlalchemy as sql
19
- from PIL import Image
20
15
 
21
16
  import pixeltable.catalog as catalog
22
17
  import pixeltable.exceptions as excs
23
18
  import pixeltable.exprs as exprs
24
- import pixeltable.type_system as ts
25
- import pixeltable.func as func
26
19
  from pixeltable.catalog import is_valid_identifier
20
+ from pixeltable.catalog.globals import UpdateStatus
27
21
  from pixeltable.env import Env
28
22
  from pixeltable.plan import Planner
29
23
  from pixeltable.type_system import ColumnType
24
+ from pixeltable.utils.formatter import Formatter
30
25
  from pixeltable.utils.http_server import get_file_uri
31
26
 
32
27
  __all__ = ['DataFrame']
@@ -47,12 +42,7 @@ class DataFrameResultSet:
47
42
  self._rows = rows
48
43
  self._col_names = col_names
49
44
  self._col_types = col_types
50
- self._formatters = {
51
- ts.ImageType: self._format_img,
52
- ts.VideoType: self._format_video,
53
- ts.AudioType: self._format_audio,
54
- ts.DocumentType: self._format_document,
55
- }
45
+ self.__formatter = Formatter(len(self._rows), len(self._col_names), Env.get().http_address)
56
46
 
57
47
  def __len__(self) -> int:
58
48
  return len(self._rows)
@@ -67,11 +57,11 @@ class DataFrameResultSet:
67
57
  return self.to_pandas().__repr__()
68
58
 
69
59
  def _repr_html_(self) -> str:
70
- formatters = {
71
- col_name: self._formatters[col_type.__class__]
72
- for col_name, col_type in zip(self._col_names, self._col_types)
73
- if col_type.__class__ in self._formatters
74
- }
60
+ formatters: dict[str, Callable] = {}
61
+ for col_name, col_type in zip(self._col_names, self._col_types):
62
+ formatter = self.__formatter.get_pandas_formatter(col_type)
63
+ if formatter is not None:
64
+ formatters[col_name] = formatter
75
65
  return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
76
66
 
77
67
  def __str__(self) -> str:
@@ -87,100 +77,6 @@ class DataFrameResultSet:
87
77
  def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
88
78
  return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
89
79
 
90
- # Formatters
91
- def _format_img(self, img: Image.Image) -> str:
92
- """
93
- Create <img> tag for Image object.
94
- """
95
- assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
96
- # Try to make it look decent in a variety of display scenarios
97
- if len(self._rows) > 1:
98
- width = 240 # Multiple rows: display small images
99
- elif len(self._col_names) > 1:
100
- width = 480 # Multiple columns: display medium images
101
- else:
102
- width = 640 # A single image: larger display
103
- with io.BytesIO() as buffer:
104
- img.save(buffer, 'jpeg')
105
- img_base64 = base64.b64encode(buffer.getvalue()).decode()
106
- return f"""
107
- <div class="pxt_image" style="width:{width}px;">
108
- <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
109
- </div>
110
- """
111
-
112
- def _format_video(self, file_path: str) -> str:
113
- thumb_tag = ''
114
- # Attempt to extract the first frame of the video to use as a thumbnail,
115
- # so that the notebook can be exported as HTML and viewed in contexts where
116
- # the video itself is not accessible.
117
- # TODO(aaron-siegel): If the video is backed by a concrete external URL,
118
- # should we link to that instead?
119
- video_reader = cv2.VideoCapture(str(file_path))
120
- if video_reader.isOpened():
121
- status, img_array = video_reader.read()
122
- if status:
123
- img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
124
- thumb = PIL.Image.fromarray(img_array)
125
- with io.BytesIO() as buffer:
126
- thumb.save(buffer, 'jpeg')
127
- thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
128
- thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
129
- video_reader.release()
130
- if len(self._rows) > 1:
131
- width = 320
132
- elif len(self._col_names) > 1:
133
- width = 480
134
- else:
135
- width = 800
136
- return f"""
137
- <div class="pxt_video" style="width:{width}px;">
138
- <video controls width="{width}" {thumb_tag}>
139
- {_create_source_tag(file_path)}
140
- </video>
141
- </div>
142
- """
143
-
144
- def _format_document(self, file_path: str) -> str:
145
- max_width = max_height = 320
146
- # by default, file path will be shown as a link
147
- inner_element = file_path
148
- # try generating a thumbnail for different types and use that if successful
149
- if file_path.lower().endswith('.pdf'):
150
- try:
151
- import fitz
152
-
153
- doc = fitz.open(file_path)
154
- p = doc.get_page_pixmap(0)
155
- while p.width > max_width or p.height > max_height:
156
- # shrink(1) will halve each dimension
157
- p.shrink(1)
158
- data = p.tobytes(output='jpeg')
159
- thumb_base64 = base64.b64encode(data).decode()
160
- img_src = f'data:image/jpeg;base64,{thumb_base64}'
161
- inner_element = f"""
162
- <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
163
- """
164
- except:
165
- logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
166
-
167
- return f"""
168
- <div class="pxt_document" style="width:{max_width}px;">
169
- <a href="{get_file_uri(Env.get().http_address, file_path)}">
170
- {inner_element}
171
- </a>
172
- </div>
173
- """
174
-
175
- def _format_audio(self, file_path: str) -> str:
176
- return f"""
177
- <div class="pxt_audio">
178
- <audio controls>
179
- {_create_source_tag(file_path)}
180
- </audio>
181
- </div>
182
- """
183
-
184
80
  def __getitem__(self, index: Any) -> Any:
185
81
  if isinstance(index, str):
186
82
  if index not in self._col_names:
@@ -595,7 +491,7 @@ class DataFrame:
595
491
  raise excs.Error(f'Invalid name: {name}')
596
492
  base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
597
493
  if len(base_list) == 0:
598
- raise excs.Error(f'Empty select list')
494
+ return self
599
495
 
600
496
  # analyze select list; wrap literals with the corresponding expressions
601
497
  select_list = []
@@ -662,7 +558,7 @@ class DataFrame:
662
558
  # we need to make sure that the grouping table is a base of self.tbl
663
559
  base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
664
560
  if base is None or base.id == self.tbl.tbl_id():
665
- raise excs.Error(f'group_by(): {item.get_name()} is not a base table of {self.tbl.tbl_name()}')
561
+ raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
666
562
  grouping_tbl = item._tbl_version_path.tbl_version
667
563
  break
668
564
  if not isinstance(item, exprs.Expr):
@@ -708,6 +604,27 @@ class DataFrame:
708
604
  limit=n,
709
605
  )
710
606
 
607
+ def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
608
+ self._validate_mutable('update')
609
+ return self.tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
610
+
611
+ def delete(self) -> UpdateStatus:
612
+ self._validate_mutable('delete')
613
+ if not self.tbl.is_insertable():
614
+ raise excs.Error(f'Cannot delete from view')
615
+ return self.tbl.tbl_version.delete(where=self.where_clause)
616
+
617
+ def _validate_mutable(self, op_name: str) -> None:
618
+ """Tests whether this `DataFrame` can be mutated (such as by an update operation)."""
619
+ if self.group_by_clause is not None or self.grouping_tbl is not None:
620
+ raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
621
+ if self.order_by_clause is not None:
622
+ raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
623
+ if self.select_list is not None:
624
+ raise excs.Error(f'Cannot use `{op_name}` after `select`')
625
+ if self.limit_val is not None:
626
+ raise excs.Error(f'Cannot use `{op_name}` after `limit`')
627
+
711
628
  def __getitem__(self, index: object) -> DataFrame:
712
629
  """
713
630
  Allowed:
@@ -63,14 +63,9 @@ class ColumnRef(Expr):
63
63
 
64
64
  return super().__getattr__(name)
65
65
 
66
- def similarity(self, other: Any) -> Expr:
67
- # if isinstance(other, Expr):
68
- # raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
- item = Expr.from_object(other)
70
- if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
71
- raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
66
+ def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
72
67
  from .similarity_expr import SimilarityExpr
73
- return SimilarityExpr(self, item)
68
+ return SimilarityExpr(self, item, idx_name=idx)
74
69
 
75
70
  def default_column_name(self) -> Optional[str]:
76
71
  return str(self)