pixeltable 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.9"
3
- __version_tuple__ = (0, 2, 9)
2
+ __version__ = "0.2.11"
3
+ __version_tuple__ = (0, 2, 11)
@@ -120,7 +120,7 @@ class Catalog:
120
120
  base = base_version
121
121
  assert base_path is not None
122
122
 
123
- base_tbl = self.tbls[base_path.tbl_version.id]
123
+ base_tbl_id = base_path.tbl_id()
124
124
  is_snapshot = view_md is not None and view_md.is_snapshot
125
125
  snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
126
126
  if snapshot_only:
@@ -134,9 +134,9 @@ class Catalog:
134
134
  view_path = TableVersionPath(tbl_version, base=base_path)
135
135
 
136
136
  tbl = View(
137
- tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl,
137
+ tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
138
138
  snapshot_only=snapshot_only)
139
- self.tbl_dependents[base_tbl._id].append(tbl)
139
+ self.tbl_dependents[base_tbl_id].append(tbl)
140
140
 
141
141
  else:
142
142
  tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
@@ -7,6 +7,8 @@ _logger = logging.getLogger('pixeltable')
7
7
 
8
8
  # name of the position column in a component view
9
9
  POS_COLUMN_NAME = 'pos'
10
+ _ROWID_COLUMN_NAME = '_rowid'
11
+
10
12
 
11
13
  @dataclasses.dataclass
12
14
  class UpdateStatus:
@@ -144,14 +144,4 @@ class InsertableTable(Table):
144
144
 
145
145
  >>> tbl.delete(tbl.a > 5)
146
146
  """
147
- from pixeltable.exprs import Predicate
148
- from pixeltable.plan import Planner
149
- if where is not None:
150
- if not isinstance(where, Predicate):
151
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
152
- analysis_info = Planner.analyze(self._tbl_version_path, where)
153
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
154
- if analysis_info.filter is not None:
155
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
156
-
157
- return self.tbl_version.delete(where)
147
+ return self._tbl_version.delete(where=where)
@@ -19,7 +19,7 @@ import pixeltable.index as index
19
19
  import pixeltable.metadata.schema as schema
20
20
  import pixeltable.type_system as ts
21
21
  from .column import Column
22
- from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
22
+ from .globals import _ROWID_COLUMN_NAME, is_valid_identifier, is_system_column_name, UpdateStatus
23
23
  from .schema_object import SchemaObject
24
24
  from .table_version import TableVersion
25
25
  from .table_version_path import TableVersionPath
@@ -29,8 +29,6 @@ _logger = logging.getLogger('pixeltable')
29
29
  class Table(SchemaObject):
30
30
  """Base class for all tabular SchemaObjects."""
31
31
 
32
- __ROWID_COLUMN_NAME = '_rowid'
33
-
34
32
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
35
33
  super().__init__(id, name, dir_id)
36
34
  self._is_dropped = False
@@ -93,7 +91,7 @@ class Table(SchemaObject):
93
91
  else:
94
92
  return catalog.Catalog.get().tbl_dependents[self._get_id()]
95
93
 
96
- def df(self) -> 'pixeltable.dataframe.DataFrame':
94
+ def _df(self) -> 'pixeltable.dataframe.DataFrame':
97
95
  """Return a DataFrame for this table.
98
96
  """
99
97
  # local import: avoid circular imports
@@ -132,30 +130,30 @@ class Table(SchemaObject):
132
130
 
133
131
  def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
134
132
  """Return rows from this table."""
135
- return self.df().collect()
133
+ return self._df().collect()
136
134
 
137
135
  def show(
138
136
  self, *args, **kwargs
139
137
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
140
138
  """Return rows from this table.
141
139
  """
142
- return self.df().show(*args, **kwargs)
140
+ return self._df().show(*args, **kwargs)
143
141
 
144
142
  def head(
145
143
  self, *args, **kwargs
146
144
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
147
145
  """Return the first n rows inserted into this table."""
148
- return self.df().head(*args, **kwargs)
146
+ return self._df().head(*args, **kwargs)
149
147
 
150
148
  def tail(
151
149
  self, *args, **kwargs
152
150
  ) -> 'pixeltable.dataframe.DataFrameResultSet':
153
151
  """Return the last n rows inserted into this table."""
154
- return self.df().tail(*args, **kwargs)
152
+ return self._df().tail(*args, **kwargs)
155
153
 
156
154
  def count(self) -> int:
157
155
  """Return the number of rows in this table."""
158
- return self.df().count()
156
+ return self._df().count()
159
157
 
160
158
  def column_names(self) -> list[str]:
161
159
  """Return the names of the columns in this table."""
@@ -706,21 +704,8 @@ class Table(SchemaObject):
706
704
 
707
705
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
708
706
  """
709
- if self._tbl_version_path.is_snapshot():
710
- raise excs.Error('Cannot update a snapshot')
711
707
  self._check_is_dropped()
712
-
713
- update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
714
- from pixeltable.plan import Planner
715
- if where is not None:
716
- if not isinstance(where, exprs.Predicate):
717
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
718
- analysis_info = Planner.analyze(self._tbl_version_path, where)
719
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
720
- if analysis_info.filter is not None:
721
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
722
-
723
- return self._tbl_version.update(update_spec, where, cascade)
708
+ return self._tbl_version.update(value_spec, where, cascade)
724
709
 
725
710
  def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
726
711
  """Update rows in this table.
@@ -738,22 +723,23 @@ class Table(SchemaObject):
738
723
  if self._tbl_version_path.is_snapshot():
739
724
  raise excs.Error('Cannot update a snapshot')
740
725
  self._check_is_dropped()
726
+ rows = list(rows)
741
727
 
742
728
  row_updates: list[dict[Column, exprs.Expr]] = []
743
729
  pk_col_names = set(c.name for c in self._tbl_version.primary_key_columns())
744
730
 
745
731
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
746
- has_rowid = self.__ROWID_COLUMN_NAME in rows[0]
732
+ has_rowid = _ROWID_COLUMN_NAME in rows[0]
747
733
  rowids: list[Tuple[int, ...]] = []
748
734
  if len(pk_col_names) == 0 and not has_rowid:
749
735
  raise excs.Error('Table must have primary key for batch update')
750
736
 
751
737
  for row_spec in rows:
752
- col_vals = self._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
738
+ col_vals = self._tbl_version._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
753
739
  if has_rowid:
754
740
  # we expect the _rowid column to be present for each row
755
- assert self.__ROWID_COLUMN_NAME in row_spec
756
- rowids.append(row_spec[self.__ROWID_COLUMN_NAME])
741
+ assert _ROWID_COLUMN_NAME in row_spec
742
+ rowids.append(row_spec[_ROWID_COLUMN_NAME])
757
743
  else:
758
744
  col_names = set(col.name for col in col_vals.keys())
759
745
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
@@ -762,51 +748,6 @@ class Table(SchemaObject):
762
748
  row_updates.append(col_vals)
763
749
  return self._tbl_version.batch_update(row_updates, rowids, cascade)
764
750
 
765
- def _validate_update_spec(
766
- self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
767
- ) -> dict[Column, 'pixeltable.exprs.Expr']:
768
- from pixeltable import exprs
769
- update_targets: dict[Column, exprs.Expr] = {}
770
- for col_name, val in value_spec.items():
771
- if not isinstance(col_name, str):
772
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
773
- if col_name == self.__ROWID_COLUMN_NAME:
774
- # ignore pseudo-column _rowid
775
- continue
776
- col = self._tbl_version_path.get_column(col_name, include_bases=False)
777
- if col is None:
778
- # TODO: return more informative error if this is trying to update a base column
779
- raise excs.Error(f'Column {col_name} unknown')
780
- if col.is_computed:
781
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
782
- if col.is_pk and not allow_pk:
783
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
784
- if col.col_type.is_media_type():
785
- raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
786
-
787
- # make sure that the value is compatible with the column type
788
- try:
789
- # check if this is a literal
790
- value_expr = exprs.Literal(val, col_type=col.col_type)
791
- except TypeError:
792
- if not allow_exprs:
793
- raise excs.Error(
794
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
795
- f'(expected {col.col_type})')
796
- # it's not a literal, let's try to create an expr from it
797
- value_expr = exprs.Expr.from_object(val)
798
- if value_expr is None:
799
- raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
800
- if not col.col_type.matches(value_expr.col_type):
801
- raise excs.Error((
802
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
803
- f'{col_name} ({col.col_type})'
804
- ))
805
- update_targets[col] = value_expr
806
-
807
- return update_targets
808
-
809
- @abc.abstractmethod
810
751
  def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
811
752
  """Delete rows in this table.
812
753
 
@@ -5,29 +5,31 @@ import importlib
5
5
  import inspect
6
6
  import logging
7
7
  import time
8
- from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
9
8
  import uuid
9
+ from typing import Optional, List, Dict, Any, Tuple, Type, Iterable
10
10
  from uuid import UUID
11
11
 
12
12
  import sqlalchemy as sql
13
13
  import sqlalchemy.orm as orm
14
14
 
15
15
  import pixeltable
16
- import pixeltable.func as func
17
- import pixeltable.type_system as ts
18
16
  import pixeltable.exceptions as excs
17
+ import pixeltable.exprs as exprs
18
+ import pixeltable.func as func
19
19
  import pixeltable.index as index
20
+ import pixeltable.type_system as ts
20
21
  from pixeltable.env import Env
21
22
  from pixeltable.iterators import ComponentIterator
22
23
  from pixeltable.metadata import schema
23
24
  from pixeltable.utils.filecache import FileCache
24
25
  from pixeltable.utils.media_store import MediaStore
25
26
  from .column import Column
26
- from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
27
+ from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
27
28
  from ..func.globals import resolve_symbol
28
29
 
29
30
  _logger = logging.getLogger('pixeltable')
30
31
 
32
+
31
33
  class TableVersion:
32
34
  """
33
35
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -243,7 +245,6 @@ class TableVersion:
243
245
  def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
244
246
  """Initialize self.cols with the columns visible in our effective version"""
245
247
  import pixeltable.exprs as exprs
246
- from pixeltable.catalog import Catalog
247
248
 
248
249
  self.cols = []
249
250
  self.cols_by_name = {}
@@ -704,15 +705,34 @@ class TableVersion:
704
705
  return result
705
706
 
706
707
  def update(
707
- self, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
708
- where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
708
+ self, value_spec: dict[str, Any], where: Optional['exprs.Predicate'] = None, cascade: bool = True
709
709
  ) -> UpdateStatus:
710
+ """Update rows in this TableVersionPath.
711
+ Args:
712
+ value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
713
+ where: a Predicate to filter rows to update.
714
+ cascade: if True, also update all computed columns that transitively depend on the updated columns,
715
+ including within views.
716
+ """
717
+ if self.is_snapshot:
718
+ raise excs.Error('Cannot update a snapshot')
719
+
720
+ from pixeltable.plan import Planner
721
+
722
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
723
+ if where is not None:
724
+ if not isinstance(where, exprs.Predicate):
725
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
726
+ analysis_info = Planner.analyze(self.path, where)
727
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
728
+ if analysis_info.filter is not None:
729
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
730
+
710
731
  with Env.get().engine.begin() as conn:
711
- return self._update(conn, update_targets, where_clause, cascade)
732
+ return self._update(conn, update_spec, where, cascade)
712
733
 
713
734
  def batch_update(
714
- self, batch: list[dict[Column, 'pixeltable.exprs.Expr']], rowids: list[Tuple[int, ...]],
715
- cascade: bool = True
735
+ self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
716
736
  ) -> UpdateStatus:
717
737
  """Update rows in batch.
718
738
  Args:
@@ -721,7 +741,6 @@ class TableVersion:
721
741
  """
722
742
  # if we do lookups of rowids, we must have one for each row in the batch
723
743
  assert len(rowids) == 0 or len(rowids) == len(batch)
724
- import pixeltable.exprs as exprs
725
744
  result_status = UpdateStatus()
726
745
  cols_with_excs: set[str] = set()
727
746
  updated_cols: set[str] = set()
@@ -768,24 +787,61 @@ class TableVersion:
768
787
  where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
769
788
  show_progress: bool = True
770
789
  ) -> UpdateStatus:
771
- """Update rows in this table.
772
- Args:
773
- update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
774
- where_clause: a Predicate to filter rows to update.
775
- cascade: if True, also update all computed columns that transitively depend on the updated columns,
776
- including within views.
777
- """
778
- assert not self.is_snapshot
779
790
  from pixeltable.plan import Planner
780
- plan, updated_cols, recomputed_cols = \
791
+
792
+ plan, updated_cols, recomputed_cols = (
781
793
  Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
782
- result = self._propagate_update(
794
+ )
795
+ result = self.propagate_update(
783
796
  plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
784
797
  base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
785
798
  result.updated_cols = updated_cols
786
799
  return result
787
800
 
788
- def _propagate_update(
801
+ def _validate_update_spec(
802
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
803
+ ) -> dict[Column, 'exprs.Expr']:
804
+ update_targets: dict[Column, exprs.Expr] = {}
805
+ for col_name, val in value_spec.items():
806
+ if not isinstance(col_name, str):
807
+ raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
808
+ if col_name == _ROWID_COLUMN_NAME:
809
+ # ignore pseudo-column _rowid
810
+ continue
811
+ col = self.path.get_column(col_name, include_bases=False)
812
+ if col is None:
813
+ # TODO: return more informative error if this is trying to update a base column
814
+ raise excs.Error(f'Column {col_name} unknown')
815
+ if col.is_computed:
816
+ raise excs.Error(f'Column {col_name} is computed and cannot be updated')
817
+ if col.is_pk and not allow_pk:
818
+ raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
819
+ if col.col_type.is_media_type():
820
+ raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
821
+
822
+ # make sure that the value is compatible with the column type
823
+ try:
824
+ # check if this is a literal
825
+ value_expr = exprs.Literal(val, col_type=col.col_type)
826
+ except TypeError:
827
+ if not allow_exprs:
828
+ raise excs.Error(
829
+ f'Column {col_name}: value {val!r} is not a valid literal for this column '
830
+ f'(expected {col.col_type})')
831
+ # it's not a literal, let's try to create an expr from it
832
+ value_expr = exprs.Expr.from_object(val)
833
+ if value_expr is None:
834
+ raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
835
+ if not col.col_type.matches(value_expr.col_type):
836
+ raise excs.Error((
837
+ f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
838
+ f'{col_name} ({col.col_type})'
839
+ ))
840
+ update_targets[col] = value_expr
841
+
842
+ return update_targets
843
+
844
+ def propagate_update(
789
845
  self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
790
846
  recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
791
847
  timestamp: float, cascade: bool, show_progress: bool = True
@@ -810,7 +866,7 @@ class TableVersion:
810
866
  if len(recomputed_cols) > 0:
811
867
  from pixeltable.plan import Planner
812
868
  plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
813
- status = view._propagate_update(
869
+ status = view.propagate_update(
814
870
  plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
815
871
  result.num_rows += status.num_rows
816
872
  result.num_excs += status.num_excs
@@ -819,22 +875,31 @@ class TableVersion:
819
875
  result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
820
876
  return result
821
877
 
822
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
878
+ def delete(self, where: Optional['exprs.Predicate'] = None) -> UpdateStatus:
823
879
  """Delete rows in this table.
824
880
  Args:
825
881
  where: a Predicate to filter rows to delete.
826
882
  """
827
883
  assert self.is_insertable()
884
+ from pixeltable.exprs import Predicate
828
885
  from pixeltable.plan import Planner
829
- analysis_info = Planner.analyze(self, where)
886
+ if where is not None:
887
+ if not isinstance(where, Predicate):
888
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
889
+ analysis_info = Planner.analyze(self.path, where)
890
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
891
+ if analysis_info.filter is not None:
892
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
893
+
894
+ analysis_info = Planner.analyze(self.path, where)
830
895
  with Env.get().engine.begin() as conn:
831
- num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
896
+ num_rows = self.propagate_delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
832
897
 
833
898
  status = UpdateStatus(num_rows=num_rows)
834
899
  return status
835
900
 
836
- def _delete(
837
- self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
901
+ def propagate_delete(
902
+ self, where: Optional['exprs.Predicate'], base_versions: List[Optional[int]],
838
903
  conn: sql.engine.Connection, timestamp: float) -> int:
839
904
  """Delete rows in this table and propagate to views.
840
905
  Args:
@@ -853,7 +918,7 @@ class TableVersion:
853
918
  else:
854
919
  pass
855
920
  for view in self.mutable_views:
856
- num_rows += view._delete(
921
+ num_rows += view.propagate_delete(
857
922
  where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
858
923
  return num_rows
859
924
 
@@ -5,13 +5,13 @@ from typing import Optional, Union
5
5
  from uuid import UUID
6
6
 
7
7
  import pixeltable
8
- import pixeltable.catalog as catalog
9
8
  from .column import Column
10
9
  from .globals import POS_COLUMN_NAME
11
10
  from .table_version import TableVersion
12
11
 
13
12
  _logger = logging.getLogger('pixeltable')
14
13
 
14
+
15
15
  class TableVersionPath:
16
16
  """
17
17
  A TableVersionPath represents the sequence of TableVersions from a base table to a particular view:
@@ -1,30 +1,33 @@
1
1
  from __future__ import annotations
2
+
3
+ import inspect
2
4
  import logging
3
- from typing import List, Optional, Type, Dict, Set, Any, Iterable
5
+ from typing import Optional, Type, Dict, Set, Any, Iterable, TYPE_CHECKING
4
6
  from uuid import UUID
5
- import inspect
6
7
 
7
8
  import sqlalchemy.orm as orm
8
9
 
9
- from .table import Table
10
- from .table_version import TableVersion
11
- from .table_version_path import TableVersionPath
12
- from .column import Column
13
- from .catalog import Catalog
14
- from .globals import POS_COLUMN_NAME, UpdateStatus
15
- from pixeltable.env import Env
16
- from pixeltable.iterators import ComponentIterator
17
- from pixeltable.exceptions import Error
18
- import pixeltable.func as func
19
- import pixeltable.type_system as ts
20
10
  import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.func as func
21
13
  import pixeltable.metadata.schema as md_schema
14
+ from pixeltable.env import Env
15
+ from pixeltable.exceptions import Error
16
+ from pixeltable.iterators import ComponentIterator
22
17
  from pixeltable.type_system import InvalidType, IntType
23
- import pixeltable.exceptions as excs
18
+ from .catalog import Catalog
19
+ from .column import Column
20
+ from .globals import POS_COLUMN_NAME, UpdateStatus
21
+ from .table import Table
22
+ from .table_version import TableVersion
23
+ from .table_version_path import TableVersionPath
24
24
 
25
+ if TYPE_CHECKING:
26
+ import pixeltable as pxt
25
27
 
26
28
  _logger = logging.getLogger('pixeltable')
27
29
 
30
+
28
31
  class View(Table):
29
32
  """A `Table` that presents a virtual view of another table (or view).
30
33
 
@@ -34,10 +37,11 @@ class View(Table):
34
37
  is simply a reference to a specific set of base versions.
35
38
  """
36
39
  def __init__(
37
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base: Table,
40
+ self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
38
41
  snapshot_only: bool):
39
42
  super().__init__(id, dir_id, name, tbl_version_path)
40
- self._base = base # keep a reference to the base Table, so that we can keep track of its dependents
43
+ assert base_id in catalog.Catalog.get().tbl_dependents
44
+ self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
41
45
  self._snapshot_only = snapshot_only
42
46
 
43
47
  @classmethod
@@ -46,8 +50,8 @@ class View(Table):
46
50
 
47
51
  @classmethod
48
52
  def create(
49
- cls, dir_id: UUID, name: str, base: Table, schema: Dict[str, Any],
50
- predicate: 'exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
53
+ cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
54
+ predicate: 'pxt.exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
51
55
  iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
52
56
  ) -> View:
53
57
  columns = cls._create_columns(schema)
@@ -55,8 +59,8 @@ class View(Table):
55
59
 
56
60
  # verify that filter can be evaluated in the context of the base
57
61
  if predicate is not None:
58
- if not predicate.is_bound_by(base._tbl_version_path):
59
- raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
62
+ if not predicate.is_bound_by(base):
63
+ raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
60
64
  # create a copy that we can modify and store
61
65
  predicate = predicate.copy()
62
66
 
@@ -65,9 +69,9 @@ class View(Table):
65
69
  if not col.is_computed:
66
70
  continue
67
71
  # make sure that the value can be computed in the context of the base
68
- if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
72
+ if col.value_expr is not None and not col.value_expr.is_bound_by(base):
69
73
  raise excs.Error(
70
- f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
74
+ f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
71
75
 
72
76
  if iterator_cls is not None:
73
77
  assert iterator_args is not None
@@ -114,7 +118,7 @@ class View(Table):
114
118
  iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
115
119
  iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
116
120
  else None
117
- base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
121
+ base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
118
122
  base_versions = [
119
123
  (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
120
124
  for tbl_version in base_version_path.get_tbl_versions()
@@ -139,11 +143,11 @@ class View(Table):
139
143
  session, dir_id, name, columns, num_retained_versions, comment, base_path=base_version_path, view_md=view_md)
140
144
  if tbl_version is None:
141
145
  # this is purely a snapshot: we use the base's tbl version path
142
- view = cls(id, dir_id, name, base_version_path, base, snapshot_only=True)
146
+ view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
143
147
  _logger.info(f'created snapshot {name}')
144
148
  else:
145
149
  view = cls(
146
- id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base,
150
+ id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
147
151
  snapshot_only=False)
148
152
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
149
153
 
@@ -156,7 +160,7 @@ class View(Table):
156
160
  session.commit()
157
161
  cat = Catalog.get()
158
162
  cat.tbl_dependents[view._id] = []
159
- cat.tbl_dependents[base._id].append(view)
163
+ cat.tbl_dependents[base.tbl_id()].append(view)
160
164
  cat.tbls[view._id] = view
161
165
  return view
162
166
 
@@ -200,7 +204,7 @@ class View(Table):
200
204
  del cat.tbls[self._id]
201
205
  else:
202
206
  super()._drop()
203
- cat.tbl_dependents[self._base._id].remove(self)
207
+ cat.tbl_dependents[self._base_id].remove(self)
204
208
  del cat.tbl_dependents[self._id]
205
209
 
206
210
  def insert(
pixeltable/dataframe.py CHANGED
@@ -1,32 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
- import base64
4
3
  import copy
5
4
  import hashlib
6
- import io
7
5
  import json
8
6
  import logging
9
7
  import mimetypes
10
8
  import traceback
11
9
  from pathlib import Path
12
- from typing import List, Optional, Any, Dict, Iterator, Tuple, Set
10
+ from typing import List, Optional, Any, Dict, Iterator, Tuple, Set, Callable
13
11
 
14
- import PIL.Image
15
- import cv2
16
12
  import pandas as pd
17
13
  import pandas.io.formats.style
18
14
  import sqlalchemy as sql
19
- from PIL import Image
20
15
 
21
16
  import pixeltable.catalog as catalog
22
17
  import pixeltable.exceptions as excs
23
18
  import pixeltable.exprs as exprs
24
- import pixeltable.type_system as ts
25
- import pixeltable.func as func
26
19
  from pixeltable.catalog import is_valid_identifier
20
+ from pixeltable.catalog.globals import UpdateStatus
27
21
  from pixeltable.env import Env
28
22
  from pixeltable.plan import Planner
29
23
  from pixeltable.type_system import ColumnType
24
+ from pixeltable.utils.formatter import Formatter
30
25
  from pixeltable.utils.http_server import get_file_uri
31
26
 
32
27
  __all__ = ['DataFrame']
@@ -47,12 +42,7 @@ class DataFrameResultSet:
47
42
  self._rows = rows
48
43
  self._col_names = col_names
49
44
  self._col_types = col_types
50
- self._formatters = {
51
- ts.ImageType: self._format_img,
52
- ts.VideoType: self._format_video,
53
- ts.AudioType: self._format_audio,
54
- ts.DocumentType: self._format_document,
55
- }
45
+ self.__formatter = Formatter(len(self._rows), len(self._col_names), Env.get().http_address)
56
46
 
57
47
  def __len__(self) -> int:
58
48
  return len(self._rows)
@@ -67,11 +57,11 @@ class DataFrameResultSet:
67
57
  return self.to_pandas().__repr__()
68
58
 
69
59
  def _repr_html_(self) -> str:
70
- formatters = {
71
- col_name: self._formatters[col_type.__class__]
72
- for col_name, col_type in zip(self._col_names, self._col_types)
73
- if col_type.__class__ in self._formatters
74
- }
60
+ formatters: dict[str, Callable] = {}
61
+ for col_name, col_type in zip(self._col_names, self._col_types):
62
+ formatter = self.__formatter.get_pandas_formatter(col_type)
63
+ if formatter is not None:
64
+ formatters[col_name] = formatter
75
65
  return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
76
66
 
77
67
  def __str__(self) -> str:
@@ -87,100 +77,6 @@ class DataFrameResultSet:
87
77
  def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
88
78
  return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
89
79
 
90
- # Formatters
91
- def _format_img(self, img: Image.Image) -> str:
92
- """
93
- Create <img> tag for Image object.
94
- """
95
- assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
96
- # Try to make it look decent in a variety of display scenarios
97
- if len(self._rows) > 1:
98
- width = 240 # Multiple rows: display small images
99
- elif len(self._col_names) > 1:
100
- width = 480 # Multiple columns: display medium images
101
- else:
102
- width = 640 # A single image: larger display
103
- with io.BytesIO() as buffer:
104
- img.save(buffer, 'jpeg')
105
- img_base64 = base64.b64encode(buffer.getvalue()).decode()
106
- return f"""
107
- <div class="pxt_image" style="width:{width}px;">
108
- <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
109
- </div>
110
- """
111
-
112
- def _format_video(self, file_path: str) -> str:
113
- thumb_tag = ''
114
- # Attempt to extract the first frame of the video to use as a thumbnail,
115
- # so that the notebook can be exported as HTML and viewed in contexts where
116
- # the video itself is not accessible.
117
- # TODO(aaron-siegel): If the video is backed by a concrete external URL,
118
- # should we link to that instead?
119
- video_reader = cv2.VideoCapture(str(file_path))
120
- if video_reader.isOpened():
121
- status, img_array = video_reader.read()
122
- if status:
123
- img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
124
- thumb = PIL.Image.fromarray(img_array)
125
- with io.BytesIO() as buffer:
126
- thumb.save(buffer, 'jpeg')
127
- thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
128
- thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
129
- video_reader.release()
130
- if len(self._rows) > 1:
131
- width = 320
132
- elif len(self._col_names) > 1:
133
- width = 480
134
- else:
135
- width = 800
136
- return f"""
137
- <div class="pxt_video" style="width:{width}px;">
138
- <video controls width="{width}" {thumb_tag}>
139
- {_create_source_tag(file_path)}
140
- </video>
141
- </div>
142
- """
143
-
144
- def _format_document(self, file_path: str) -> str:
145
- max_width = max_height = 320
146
- # by default, file path will be shown as a link
147
- inner_element = file_path
148
- # try generating a thumbnail for different types and use that if successful
149
- if file_path.lower().endswith('.pdf'):
150
- try:
151
- import fitz
152
-
153
- doc = fitz.open(file_path)
154
- p = doc.get_page_pixmap(0)
155
- while p.width > max_width or p.height > max_height:
156
- # shrink(1) will halve each dimension
157
- p.shrink(1)
158
- data = p.tobytes(output='jpeg')
159
- thumb_base64 = base64.b64encode(data).decode()
160
- img_src = f'data:image/jpeg;base64,{thumb_base64}'
161
- inner_element = f"""
162
- <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
163
- """
164
- except:
165
- logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
166
-
167
- return f"""
168
- <div class="pxt_document" style="width:{max_width}px;">
169
- <a href="{get_file_uri(Env.get().http_address, file_path)}">
170
- {inner_element}
171
- </a>
172
- </div>
173
- """
174
-
175
- def _format_audio(self, file_path: str) -> str:
176
- return f"""
177
- <div class="pxt_audio">
178
- <audio controls>
179
- {_create_source_tag(file_path)}
180
- </audio>
181
- </div>
182
- """
183
-
184
80
  def __getitem__(self, index: Any) -> Any:
185
81
  if isinstance(index, str):
186
82
  if index not in self._col_names:
@@ -595,7 +491,7 @@ class DataFrame:
595
491
  raise excs.Error(f'Invalid name: {name}')
596
492
  base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
597
493
  if len(base_list) == 0:
598
- raise excs.Error(f'Empty select list')
494
+ return self
599
495
 
600
496
  # analyze select list; wrap literals with the corresponding expressions
601
497
  select_list = []
@@ -662,7 +558,7 @@ class DataFrame:
662
558
  # we need to make sure that the grouping table is a base of self.tbl
663
559
  base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
664
560
  if base is None or base.id == self.tbl.tbl_id():
665
- raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
561
+ raise excs.Error(f'group_by(): {item.get_name()} is not a base table of {self.tbl.tbl_name()}')
666
562
  grouping_tbl = item._tbl_version_path.tbl_version
667
563
  break
668
564
  if not isinstance(item, exprs.Expr):
@@ -708,6 +604,27 @@ class DataFrame:
708
604
  limit=n,
709
605
  )
710
606
 
607
+ def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
608
+ self._validate_mutable('update')
609
+ return self.tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
610
+
611
+ def delete(self) -> UpdateStatus:
612
+ self._validate_mutable('delete')
613
+ if not self.tbl.is_insertable():
614
+ raise excs.Error(f'Cannot delete from view')
615
+ return self.tbl.tbl_version.delete(where=self.where_clause)
616
+
617
+ def _validate_mutable(self, op_name: str) -> None:
618
+ """Tests whether this `DataFrame` can be mutated (such as by an update operation)."""
619
+ if self.group_by_clause is not None or self.grouping_tbl is not None:
620
+ raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
621
+ if self.order_by_clause is not None:
622
+ raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
623
+ if self.select_list is not None:
624
+ raise excs.Error(f'Cannot use `{op_name}` after `select`')
625
+ if self.limit_val is not None:
626
+ raise excs.Error(f'Cannot use `{op_name}` after `limit`')
627
+
711
628
  def __getitem__(self, index: object) -> DataFrame:
712
629
  """
713
630
  Allowed:
@@ -37,8 +37,6 @@ def transcribe(
37
37
  word_timestamps: bool = False,
38
38
  prepend_punctuations: str = '"\'“¿([{-',
39
39
  append_punctuations: str = '"\'.。,,!!??::”)]}、',
40
- clip_timestamps: str = '0',
41
- hallucination_silence_threshold: Optional[float] = None,
42
40
  decode_options: Optional[dict] = None,
43
41
  ) -> dict:
44
42
  import torch
@@ -58,8 +56,6 @@ def transcribe(
58
56
  word_timestamps=word_timestamps,
59
57
  prepend_punctuations=prepend_punctuations,
60
58
  append_punctuations=append_punctuations,
61
- clip_timestamps=clip_timestamps,
62
- hallucination_silence_threshold=hallucination_silence_threshold,
63
59
  **decode_options,
64
60
  )
65
61
  return result
pixeltable/globals.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import dataclasses
2
2
  import logging
3
- from typing import Any, Optional, Union, Type
3
+ from typing import Any, Optional, Union
4
4
 
5
5
  import pandas as pd
6
6
  import sqlalchemy as sql
7
7
  from sqlalchemy.util.preloaded import orm
8
8
 
9
9
  import pixeltable.exceptions as excs
10
- from pixeltable import catalog, func
10
+ from pixeltable import catalog, func, DataFrame
11
11
  from pixeltable.catalog import Catalog
12
12
  from pixeltable.env import Env
13
13
  from pixeltable.exprs import Predicate
@@ -78,7 +78,7 @@ def create_table(
78
78
 
79
79
  def create_view(
80
80
  path_str: str,
81
- base: catalog.Table,
81
+ base: Union[catalog.Table, DataFrame],
82
82
  *,
83
83
  schema: Optional[dict[str, Any]] = None,
84
84
  filter: Optional[Predicate] = None,
@@ -92,7 +92,7 @@ def create_view(
92
92
 
93
93
  Args:
94
94
  path_str: Path to the view.
95
- base: Table (ie, table or view or snapshot) to base the view on.
95
+ base: Table (i.e., table or view or snapshot) or DataFrame to base the view on.
96
96
  schema: dictionary mapping column names to column types, value expressions, or to column specifications.
97
97
  filter: Predicate to filter rows of the base table.
98
98
  is_snapshot: Whether the view is a snapshot.
@@ -122,7 +122,19 @@ def create_view(
122
122
  >>> snapshot_view = cl.create_view(
123
123
  'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
124
124
  """
125
- assert isinstance(base, catalog.Table)
125
+ if isinstance(base, catalog.Table):
126
+ tbl_version_path = base._tbl_version_path
127
+ elif isinstance(base, DataFrame):
128
+ base._validate_mutable('create_view')
129
+ tbl_version_path = base.tbl
130
+ if base.where_clause is not None and filter is not None:
131
+ raise excs.Error(
132
+ 'Cannot specify a `filter` directly if one is already declared in a `DataFrame.where` clause'
133
+ )
134
+ filter = base.where_clause
135
+ else:
136
+ raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
137
+ assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
126
138
  path = catalog.Path(path_str)
127
139
  try:
128
140
  Catalog.get().paths.check_is_valid(path, expected=None)
@@ -139,10 +151,11 @@ def create_view(
139
151
  iterator_class, iterator_args = None, None
140
152
  else:
141
153
  iterator_class, iterator_args = iterator
154
+
142
155
  view = catalog.View.create(
143
156
  dir._id,
144
157
  path.name,
145
- base=base,
158
+ base=tbl_version_path,
146
159
  schema=schema,
147
160
  predicate=filter,
148
161
  is_snapshot=is_snapshot,
@@ -0,0 +1,234 @@
1
+ import base64
2
+ import html
3
+ import json
4
+ import logging
5
+ import mimetypes
6
+ from typing import Any, Callable, Optional
7
+
8
+ import PIL
9
+ import PIL.Image as Image
10
+ import cv2
11
+ import numpy as np
12
+
13
+ import io
14
+ import pixeltable.type_system as ts
15
+ from pixeltable.utils.http_server import get_file_uri
16
+
17
+ _logger = logging.getLogger('pixeltable')
18
+
19
+
20
+ class Formatter:
21
+ """
22
+ A factory for constructing HTML formatters for Pixeltable data. The formatters are used to customize
23
+ the rendering of `DataFrameResultSet`s in notebooks.
24
+
25
+ Args:
26
+ num_rows: Number of rows in the DataFrame being rendered.
27
+ num_cols: Number of columns in the DataFrame being rendered.
28
+ http_address: Root address of the Pixeltable HTTP server (used to construct URLs for media references).
29
+ """
30
+
31
+ __FLOAT_PRECISION = 3
32
+ __LIST_THRESHOLD = 16
33
+ __LIST_EDGEITEMS = 6
34
+ __STRING_SEP = ' ...... '
35
+ __STRING_MAX_LEN = 1000
36
+ __NESTED_STRING_MAX_LEN = 300
37
+
38
+ def __init__(self, num_rows: int, num_cols: int, http_address: str):
39
+ self.__num_rows = num_rows
40
+ self.__num_cols = num_cols
41
+ self.__http_address = http_address
42
+
43
+ def get_pandas_formatter(self, col_type: ts.ColumnType) -> Optional[Callable]:
44
+ if col_type.is_string_type():
45
+ return self.format_string
46
+ if col_type.is_float_type():
47
+ return self.format_float
48
+ if col_type.is_json_type():
49
+ return self.format_json
50
+ if col_type.is_array_type():
51
+ return self.format_array
52
+ if col_type.is_image_type():
53
+ return self.format_img
54
+ if col_type.is_video_type():
55
+ return self.format_video
56
+ if col_type.is_audio_type():
57
+ return self.format_audio
58
+ if col_type.is_document_type():
59
+ return self.format_document
60
+ return None
61
+
62
+ @classmethod
63
+ def format_string(cls, val: str) -> str:
64
+ """
65
+ Escapes special characters in `val`, and abbreviates `val` if its length exceeds `_STRING_MAX_LEN`.
66
+ """
67
+ return cls.__escape(cls.__abbreviate(val, cls.__STRING_MAX_LEN))
68
+
69
+ @classmethod
70
+ def __abbreviate(cls, val: str, max_len: int) -> str:
71
+ if len(val) > max_len:
72
+ edgeitems = (max_len - len(cls.__STRING_SEP)) // 2
73
+ return f'{val[:edgeitems]}{cls.__STRING_SEP}{val[-edgeitems:]}'
74
+ return val
75
+
76
+ @classmethod
77
+ def __escape(cls, val: str) -> str:
78
+ # HTML-escape the specified string, then escape $ signs to suppress MathJax formatting
79
+ # TODO(aaron-siegel): The '$' escaping isn't perfect; it will fail on '$' that are already escaped
80
+ return html.escape(val).replace('$', r'\$')
81
+
82
+ @classmethod
83
+ def format_float(cls, val: float) -> str:
84
+ # stay consistent with numpy formatting (0-D array has no brackets)
85
+ return np.array2string(np.array(val), precision=cls.__FLOAT_PRECISION)
86
+
87
+ @classmethod
88
+ def format_array(cls, arr: np.ndarray) -> str:
89
+ return np.array2string(
90
+ arr,
91
+ precision=cls.__FLOAT_PRECISION,
92
+ threshold=cls.__LIST_THRESHOLD,
93
+ edgeitems=cls.__LIST_EDGEITEMS,
94
+ max_line_width=1000000,
95
+ )
96
+
97
+ @classmethod
98
+ def format_json(cls, val: Any) -> str:
99
+ if isinstance(val, str):
100
+ # JSON-like formatting will be applied to strings that appear nested within a list or dict
101
+ # (quote the string; escape any quotes inside the string; shorter abbreviations).
102
+ # However, if the string appears in top-level position (i.e., the entire JSON value is a
103
+ # string), then we format it like an ordinary string.
104
+ return cls.format_string(val)
105
+ # In all other cases, dump the JSON struct recursively.
106
+ return cls.__format_json_rec(val)
107
+
108
+ @classmethod
109
+ def __format_json_rec(cls, val: Any) -> str:
110
+ if isinstance(val, str):
111
+ return cls.__escape(json.dumps(cls.__abbreviate(val, cls.__NESTED_STRING_MAX_LEN)))
112
+ if isinstance(val, float):
113
+ return cls.format_float(val)
114
+ if isinstance(val, np.ndarray):
115
+ return cls.format_array(val)
116
+ if isinstance(val, list):
117
+ if len(val) < cls.__LIST_THRESHOLD:
118
+ components = [cls.__format_json_rec(x) for x in val]
119
+ else:
120
+ components = [cls.__format_json_rec(x) for x in val[: cls.__LIST_EDGEITEMS]]
121
+ components.append('...')
122
+ components.extend(cls.__format_json_rec(x) for x in val[-cls.__LIST_EDGEITEMS :])
123
+ return '[' + ', '.join(components) + ']'
124
+ if isinstance(val, dict):
125
+ kv_pairs = (f'{cls.__format_json_rec(k)}: {cls.__format_json_rec(v)}' for k, v in val.items())
126
+ return '{' + ', '.join(kv_pairs) + '}'
127
+
128
+ # Everything else
129
+ try:
130
+ return json.dumps(val)
131
+ except TypeError: # Not JSON serializable
132
+ return str(val)
133
+
134
+ def format_img(self, img: Image.Image) -> str:
135
+ """
136
+ Create <img> tag for Image object.
137
+ """
138
+ assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
139
+ # Try to make it look decent in a variety of display scenarios
140
+ if self.__num_rows > 1:
141
+ width = 240 # Multiple rows: display small images
142
+ elif self.__num_cols > 1:
143
+ width = 480 # Multiple columns: display medium images
144
+ else:
145
+ width = 640 # A single image: larger display
146
+ with io.BytesIO() as buffer:
147
+ img.save(buffer, 'jpeg')
148
+ img_base64 = base64.b64encode(buffer.getvalue()).decode()
149
+ return f"""
150
+ <div class="pxt_image" style="width:{width}px;">
151
+ <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
152
+ </div>
153
+ """
154
+
155
+ def format_video(self, file_path: str) -> str:
156
+ thumb_tag = ''
157
+ # Attempt to extract the first frame of the video to use as a thumbnail,
158
+ # so that the notebook can be exported as HTML and viewed in contexts where
159
+ # the video itself is not accessible.
160
+ # TODO(aaron-siegel): If the video is backed by a concrete external URL,
161
+ # should we link to that instead?
162
+ video_reader = cv2.VideoCapture(str(file_path))
163
+ if video_reader.isOpened():
164
+ status, img_array = video_reader.read()
165
+ if status:
166
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
167
+ thumb = PIL.Image.fromarray(img_array)
168
+ with io.BytesIO() as buffer:
169
+ thumb.save(buffer, 'jpeg')
170
+ thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
171
+ thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
172
+ video_reader.release()
173
+ if self.__num_rows > 1:
174
+ width = 320
175
+ elif self.__num_cols > 1:
176
+ width = 480
177
+ else:
178
+ width = 800
179
+ return f"""
180
+ <div class="pxt_video" style="width:{width}px;">
181
+ <video controls width="{width}" {thumb_tag}>
182
+ {self.__create_source_tag(self.__http_address, file_path)}
183
+ </video>
184
+ </div>
185
+ """
186
+
187
+ def format_audio(self, file_path: str) -> str:
188
+ return f"""
189
+ <div class="pxt_audio">
190
+ <audio controls>
191
+ {self.__create_source_tag(self.__http_address, file_path)}
192
+ </audio>
193
+ </div>
194
+ """
195
+
196
+ def format_document(self, file_path: str) -> str:
197
+ max_width = max_height = 320
198
+ # by default, file path will be shown as a link
199
+ inner_element = file_path
200
+ inner_element = html.escape(inner_element)
201
+ # try generating a thumbnail for different types and use that if successful
202
+ if file_path.lower().endswith('.pdf'):
203
+ try:
204
+ import fitz
205
+
206
+ doc = fitz.open(file_path)
207
+ p = doc.get_page_pixmap(0)
208
+ while p.width > max_width or p.height > max_height:
209
+ # shrink(1) will halve each dimension
210
+ p.shrink(1)
211
+ data = p.tobytes(output='jpeg')
212
+ thumb_base64 = base64.b64encode(data).decode()
213
+ img_src = f'data:image/jpeg;base64,{thumb_base64}'
214
+ inner_element = f"""
215
+ <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
216
+ """
217
+ except:
218
+ logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
219
+
220
+ return f"""
221
+ <div class="pxt_document" style="width:{max_width}px;">
222
+ <a href="{get_file_uri(self.__http_address, file_path)}">
223
+ {inner_element}
224
+ </a>
225
+ </div>
226
+ """
227
+
228
+ @classmethod
229
+ def __create_source_tag(cls, http_address: str, file_path: str) -> str:
230
+ src_url = get_file_uri(http_address, file_path)
231
+ mime = mimetypes.guess_type(src_url)[0]
232
+ # if mime is None, the attribute string would not be valid html.
233
+ mime_attr = f'type="{mime}"' if mime is not None else ''
234
+ return f'<source src="{src_url}" {mime_attr} />'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -75,7 +75,7 @@ Learn how to create tables, populate them with data, and enhance them with built
75
75
 
76
76
  | Topic | Notebook | API |
77
77
  |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
78
- | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
78
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
79
79
  | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
80
80
  | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
81
81
  | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
@@ -1,20 +1,20 @@
1
1
  pixeltable/__init__.py,sha256=GNlV3jQKDQKTrOq8BVvPsnd9W9ROneP9-1C-zgNSWsg,1275
2
- pixeltable/__version__.py,sha256=ZH1jMRnmzFfm1T-NAz5ZJ08lYojBz6jzzdMQmFBGDzw,112
2
+ pixeltable/__version__.py,sha256=EOLZYNHwHWPLWPMk2nm8H1fHHCn1KsQtw2ovnB_eAnY,114
3
3
  pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
4
- pixeltable/catalog/catalog.py,sha256=gzEuse0f_rj7oKRWMyorhUpOLhYbWSQ8CFLOuFLbXak,7918
4
+ pixeltable/catalog/catalog.py,sha256=8gsFWm6o9Qg4_BEO6oejdpmP4MAOlhmuKRaJP0o2UPU,7906
5
5
  pixeltable/catalog/column.py,sha256=Dmc6CgFLExJy3tdvuX0Emjc8SgqZvmCbAHozibO1-G0,9417
6
6
  pixeltable/catalog/dir.py,sha256=DWl9nnCOoiYLKWp31MNMvLmryXeQiQZu5YJcd4tpy38,921
7
- pixeltable/catalog/globals.py,sha256=yLEGNbsSnLzjWNHVJacfjA9hbw13Q6QXLOSCRmdTlq0,943
8
- pixeltable/catalog/insertable_table.py,sha256=jP6EFMqkGNAbqRAj6YpCaZzWdFwu13t_dQste_f0clk,6468
7
+ pixeltable/catalog/globals.py,sha256=tNb_6jzAKJWD4MH1Y9g2UhVQkiomYfbHQZ2GIcg7vUI,974
8
+ pixeltable/catalog/insertable_table.py,sha256=u2yMCw1lWSIeRAknIKQmSMWS_3oLTn1kCR1Hk79OvrI,5883
9
9
  pixeltable/catalog/named_function.py,sha256=UhHaimM_uJHS-0RQcqGOgvWeZtMfKsIgSeKSRwT2moU,1149
10
10
  pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
11
11
  pixeltable/catalog/path_dict.py,sha256=xfvxg1Ze5jZCARUGASF2DRbQPh7pRVTYhuJ_u82gYUo,5941
12
12
  pixeltable/catalog/schema_object.py,sha256=Sf8aGARpsPeRUz1NjsZ97oSBMyElsctP39uU9x9Ic80,964
13
- pixeltable/catalog/table.py,sha256=qzxm3SN0hHObZlJycEsmzOd3VLsqSnrZ9fk1M3F5YGM,42636
14
- pixeltable/catalog/table_version.py,sha256=-46Z-aSKdjXpbmWlwZpGWZMw6VOKJZczNvx05Txe_0I,55148
15
- pixeltable/catalog/table_version_path.py,sha256=lE2EFn0kylXhy-oBXTyXbJox6rl_6klfOmDjK-2Uq_Q,6432
16
- pixeltable/catalog/view.py,sha256=dphf9ZFi_ReQkokcN_Auh6ZQFmGtnOcVDvz6fcJfJzs,10332
17
- pixeltable/dataframe.py,sha256=cQb70E0WVWsTQ_rA6CNx0dfCkGQoqgdoERa6HgpdPDY,37167
13
+ pixeltable/catalog/table.py,sha256=5CSEASIJqWJ0l1mDERYMqj_5rD--y4F8yhqq3Uu0lAI,39473
14
+ pixeltable/catalog/table_version.py,sha256=JrwwvG1vizCDJ-11GgTwnCg2XSOqyrJ6w2wbqmWYR60,58582
15
+ pixeltable/catalog/table_version_path.py,sha256=6JZlgKMYa3Xf8p_2Z-iDIFIcfuYRyjbpc3_CC9l1HME,6396
16
+ pixeltable/catalog/view.py,sha256=3OAk-NBPlSagMCsdVtcx35jHD80SIYCuOy5dc1kM7Fs,10403
17
+ pixeltable/dataframe.py,sha256=O8tAgOqFa_twjak-hDIBoXB5MZx-ZyzYOCeQr8jCO-0,34339
18
18
  pixeltable/env.py,sha256=WO_WLfRj9Fft6QyW89S9cw47RTg1ALviStu9pNygJEQ,21635
19
19
  pixeltable/exceptions.py,sha256=MSP9zeL0AmXT93XqjdvgGN4rzno1_KRrGriq6hpemnw,376
20
20
  pixeltable/exec/__init__.py,sha256=RK7SKvrQ7Ky3G_LXDP4Bf7lHmMM_uYZl8dJaZYs0FjY,454
@@ -79,8 +79,8 @@ pixeltable/functions/string.py,sha256=Ae_weygd9Aj98buLC4tPLRYGg3LGSJEpXaqr93TF4n
79
79
  pixeltable/functions/together.py,sha256=2vHOoXMUIpeYwTYGTr3hDHePzy8zepvoeyORgV_9n34,4412
80
80
  pixeltable/functions/util.py,sha256=F2iiIL7UfhYdCVzdCa3efYqWbaeLKFrbycKnuPkG57M,650
81
81
  pixeltable/functions/video.py,sha256=yn52MimAVrSzUyAUtyxbd1RWveX_TyjwsomBuhK9V60,6516
82
- pixeltable/functions/whisper.py,sha256=0lsfaGPGmJCFh2Jh-RtzwLhjo9BA83X3l7ot15fNEiA,2406
83
- pixeltable/globals.py,sha256=WeIQbN6peyJL3VbIBxtL21p7XKREpDmtZ_MuDEUmOu4,14158
82
+ pixeltable/functions/whisper.py,sha256=s7C4eV5tCJed-4Joob5LojGFEHPgapmT8awFPVxBKgQ,2199
83
+ pixeltable/globals.py,sha256=xSj0pmgmRavmD_-TOFhNXd8slskOC2R95el9L7pyzxM,14795
84
84
  pixeltable/index/__init__.py,sha256=XBwetNQQwnz0fiKwonOKhyy_U32l_cjt77kNvEIdjWs,102
85
85
  pixeltable/index/base.py,sha256=YAQ5Dz1mfI0dfu9rxWHWroE8TjB90yKfPtXAzoADq38,1568
86
86
  pixeltable/index/btree.py,sha256=NE4GYhcJWYJhdKyeHI0sQBlFvUaIgGOF9KLyCZOfFjE,1822
@@ -118,6 +118,7 @@ pixeltable/utils/coco.py,sha256=ISpFBhR4eO1jOcg_SPb0thVI4KdS6H0RyNQauZIA5A4,7287
118
118
  pixeltable/utils/code.py,sha256=AOw1u2r8_DQXpX-lxJhyHWARGrCRDXOJHFVgKOi54Uc,1231
119
119
  pixeltable/utils/documents.py,sha256=Q7e5U2Hk0go83MdKzD_MIiMscwbcFsLMgRw2IU_vQF4,2213
120
120
  pixeltable/utils/filecache.py,sha256=UoNONG2VaAc2IBB0e3sQdsvyOPOes2XSDc5_CsA4qek,7839
121
+ pixeltable/utils/formatter.py,sha256=2nAEXehP4FZQq2qzQcEJy97seRYubTuLBSrL_unRFLo,9239
121
122
  pixeltable/utils/help.py,sha256=cCnxJ4VP9MJ57iDqExmnDcM-JG3a1lw_q7g-D7bpSVI,252
122
123
  pixeltable/utils/http_server.py,sha256=WQ5ILMzlz4TlwI9j5YqAPgEZyhrN1GytMNDbLD9occk,2422
123
124
  pixeltable/utils/media_store.py,sha256=x71wnJDZDHcdd13VCfL4AkHQ6IJB41gNA-zBvXJwFos,3116
@@ -125,7 +126,7 @@ pixeltable/utils/pytorch.py,sha256=BR4tgfUWw-2rwWTOgzXj5qdMBpe1Arpp5SK4ax6jjpk,3
125
126
  pixeltable/utils/s3.py,sha256=rkanuhk9DWvSfmbOLQW1j1Iov4sl2KhxGGKN-AJ8LSE,432
126
127
  pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
127
128
  pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
128
- pixeltable-0.2.9.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
129
- pixeltable-0.2.9.dist-info/METADATA,sha256=YInl_cYNpCosk7ENDDVZ9GGfFKCuKB3ZyWsGiezTrRI,9806
130
- pixeltable-0.2.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
131
- pixeltable-0.2.9.dist-info/RECORD,,
129
+ pixeltable-0.2.11.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
130
+ pixeltable-0.2.11.dist-info/METADATA,sha256=c4rIZLuAKseDSgl84ndgoMupaq9XUThLxwFmaBeyjtA,9815
131
+ pixeltable-0.2.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
132
+ pixeltable-0.2.11.dist-info/RECORD,,