pixeltable 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/__init__.py CHANGED
@@ -1,7 +1,17 @@
1
1
  # ruff: noqa: F401
2
2
 
3
3
  from .__version__ import __version__, __version_tuple__
4
- from .catalog import Column, ColumnMetadata, IndexMetadata, InsertableTable, Table, TableMetadata, UpdateStatus, View
4
+ from .catalog import (
5
+ Column,
6
+ ColumnMetadata,
7
+ IndexMetadata,
8
+ InsertableTable,
9
+ Table,
10
+ TableMetadata,
11
+ UpdateStatus,
12
+ VersionMetadata,
13
+ View,
14
+ )
5
15
  from .dataframe import DataFrame
6
16
  from .exceptions import Error, ExprEvalError, PixeltableWarning
7
17
  from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
@@ -8,7 +8,8 @@ from .insertable_table import InsertableTable
8
8
  from .named_function import NamedFunction
9
9
  from .path import Path
10
10
  from .schema_object import SchemaObject
11
- from .table import ColumnMetadata, IndexMetadata, Table, TableMetadata
11
+ from .table import Table
12
+ from .table_metadata import ColumnMetadata, IndexMetadata, TableMetadata, VersionMetadata
12
13
  from .table_version import TableVersion
13
14
  from .table_version_handle import ColumnHandle, TableVersionHandle
14
15
  from .table_version_path import TableVersionPath
@@ -7,9 +7,7 @@ import json
7
7
  import logging
8
8
  from keyword import iskeyword as is_python_keyword
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, TypedDict, overload
11
-
12
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
10
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
13
11
  from uuid import UUID
14
12
 
15
13
  import pandas as pd
@@ -17,6 +15,13 @@ import sqlalchemy as sql
17
15
 
18
16
  import pixeltable as pxt
19
17
  from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
18
+ from pixeltable.catalog.table_metadata import (
19
+ ColumnMetadata,
20
+ EmbeddingIndexParams,
21
+ IndexMetadata,
22
+ TableMetadata,
23
+ VersionMetadata,
24
+ )
20
25
  from pixeltable.metadata import schema
21
26
  from pixeltable.metadata.utils import MetadataUtils
22
27
 
@@ -37,6 +42,9 @@ from .table_version_handle import TableVersionHandle
37
42
  from .table_version_path import TableVersionPath
38
43
  from .update_status import UpdateStatus
39
44
 
45
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
46
+
47
+
40
48
  if TYPE_CHECKING:
41
49
  import torch.utils.data
42
50
 
@@ -95,7 +103,7 @@ class Table(SchemaObject):
95
103
 
96
104
  return op()
97
105
 
98
- def _get_metadata(self) -> 'TableMetadata':
106
+ def _get_metadata(self) -> TableMetadata:
99
107
  columns = self._tbl_version_path.columns()
100
108
  column_info: dict[str, ColumnMetadata] = {}
101
109
  for col in columns:
@@ -1478,12 +1486,17 @@ class Table(SchemaObject):
1478
1486
  return result
1479
1487
 
1480
1488
  def recompute_columns(
1481
- self, *columns: str | ColumnRef, errors_only: bool = False, cascade: bool = True
1489
+ self,
1490
+ *columns: str | ColumnRef,
1491
+ where: 'exprs.Expr' | None = None,
1492
+ errors_only: bool = False,
1493
+ cascade: bool = True,
1482
1494
  ) -> UpdateStatus:
1483
1495
  """Recompute the values in one or more computed columns of this table.
1484
1496
 
1485
1497
  Args:
1486
1498
  columns: The names or references of the computed columns to recompute.
1499
+ where: A predicate to filter rows to recompute.
1487
1500
  errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1488
1501
  `errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
1489
1502
  cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
@@ -1499,6 +1512,10 @@ class Table(SchemaObject):
1499
1512
 
1500
1513
  >>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
1501
1514
 
1515
+ Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
1516
+
1517
+ >>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
1518
+
1502
1519
  Recompute column `c1` and its dependents, but only for rows that have errors in it:
1503
1520
 
1504
1521
  >>> tbl.recompute_columns('c1', errors_only=True)
@@ -1535,7 +1552,12 @@ class Table(SchemaObject):
1535
1552
  raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
1536
1553
  col_names.append(col_name)
1537
1554
 
1538
- result = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
1555
+ if where is not None and not where.is_bound_by([self._tbl_version_path]):
1556
+ raise excs.Error(f"'where' ({where}) not bound by {self._display_str()}")
1557
+
1558
+ result = self._tbl_version.get().recompute_columns(
1559
+ col_names, where=where, errors_only=errors_only, cascade=cascade
1560
+ )
1539
1561
  FileCache.get().emit_eviction_warnings()
1540
1562
  return result
1541
1563
 
@@ -1676,43 +1698,35 @@ class Table(SchemaObject):
1676
1698
  def _ipython_key_completions_(self) -> list[str]:
1677
1699
  return list(self._get_schema().keys())
1678
1700
 
1679
- _REPORT_SCHEMA: ClassVar[dict[str, ts.ColumnType]] = {
1680
- 'version': ts.IntType(),
1681
- 'created_at': ts.TimestampType(),
1682
- 'user': ts.StringType(nullable=True),
1683
- 'note': ts.StringType(),
1684
- 'inserts': ts.IntType(nullable=True),
1685
- 'updates': ts.IntType(nullable=True),
1686
- 'deletes': ts.IntType(nullable=True),
1687
- 'errors': ts.IntType(nullable=True),
1688
- 'computed': ts.IntType(),
1689
- 'schema_change': ts.StringType(),
1690
- }
1691
-
1692
- def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
1693
- """Returns rows of information about the versions of this table, most recent first.
1701
+ def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
1702
+ """
1703
+ Returns information about versions of this table, most recent first.
1704
+
1705
+ `get_versions()` is intended for programmatic access to version metadata; for human-readable
1706
+ output, use [`history()`][pixeltable.Table.history] instead.
1694
1707
 
1695
1708
  Args:
1696
- n: a limit to the number of versions listed
1709
+ n: if specified, will return at most `n` versions
1697
1710
 
1698
- Examples:
1699
- Report history:
1711
+ Returns:
1712
+ A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
1713
+ recent first.
1700
1714
 
1701
- >>> tbl.history()
1715
+ Examples:
1716
+ Retrieve metadata about all versions of the table `tbl`:
1702
1717
 
1703
- Report only the most recent 5 changes to the table:
1718
+ >>> tbl.get_versions()
1704
1719
 
1705
- >>> tbl.history(n=5)
1720
+ Retrieve metadata about the most recent 5 versions of the table `tbl`:
1706
1721
 
1707
- Returns:
1708
- A list of information about each version, ordered from most recent to oldest version.
1722
+ >>> tbl.get_versions(n=5)
1709
1723
  """
1710
1724
  from pixeltable.catalog import Catalog
1711
1725
 
1712
1726
  if n is None:
1713
1727
  n = 1_000_000_000
1714
1728
  if not isinstance(n, int) or n < 1:
1715
- raise excs.Error(f'Invalid value for n: {n}')
1729
+ raise excs.Error(f'Invalid value for `n`: {n}')
1716
1730
 
1717
1731
  # Retrieve the table history components from the catalog
1718
1732
  tbl_id = self._id
@@ -1730,104 +1744,60 @@ class Table(SchemaObject):
1730
1744
  else:
1731
1745
  over_count = 0
1732
1746
 
1733
- report_lines: list[list[Any]] = []
1747
+ metadata_dicts: list[VersionMetadata] = []
1734
1748
  for vers_md in vers_list[0 : len(vers_list) - over_count]:
1735
1749
  version = vers_md.version_md.version
1736
- schema_change = md_dict.get(version, '')
1750
+ schema_change = md_dict.get(version, None)
1737
1751
  update_status = vers_md.version_md.update_status
1738
1752
  if update_status is None:
1739
1753
  update_status = UpdateStatus()
1740
- change_type = 'schema' if schema_change != '' else ''
1741
- if change_type == '':
1742
- change_type = 'data'
1754
+ change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
1743
1755
  rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
1744
- report_line = [
1745
- version,
1746
- datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
1747
- vers_md.version_md.user,
1748
- change_type,
1749
- rcs.ins_rows,
1750
- rcs.upd_rows,
1751
- rcs.del_rows,
1752
- rcs.num_excs,
1753
- rcs.computed_values,
1754
- schema_change,
1755
- ]
1756
- report_lines.append(report_line)
1756
+ metadata_dicts.append(
1757
+ VersionMetadata(
1758
+ version=version,
1759
+ created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
1760
+ user=vers_md.version_md.user,
1761
+ change_type=change_type,
1762
+ inserts=rcs.ins_rows,
1763
+ updates=rcs.upd_rows,
1764
+ deletes=rcs.del_rows,
1765
+ errors=rcs.num_excs,
1766
+ computed=rcs.computed_values,
1767
+ schema_change=schema_change,
1768
+ )
1769
+ )
1770
+
1771
+ return metadata_dicts
1772
+
1773
+ def history(self, n: Optional[int] = None) -> pd.DataFrame:
1774
+ """
1775
+ Returns a human-readable report about versions of this table.
1757
1776
 
1758
- return pxt.dataframe.DataFrameResultSet(report_lines, self._REPORT_SCHEMA)
1777
+ `history()` is intended for human-readable output of version metadata; for programmatic access,
1778
+ use [`get_versions()`][pixeltable.Table.get_versions] instead.
1779
+
1780
+ Args:
1781
+ n: if specified, will return at most `n` versions
1782
+
1783
+ Returns:
1784
+ A report with information about each version, one per row, most recent first.
1785
+
1786
+ Examples:
1787
+ Report all versions of the table:
1788
+
1789
+ >>> tbl.history()
1790
+
1791
+ Report only the most recent 5 changes to the table:
1792
+
1793
+ >>> tbl.history(n=5)
1794
+ """
1795
+ versions = self.get_versions(n)
1796
+ assert len(versions) > 0
1797
+ return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
1759
1798
 
1760
1799
  def __check_mutable(self, op_descr: str) -> None:
1761
1800
  if self._tbl_version_path.is_snapshot():
1762
1801
  raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
1763
1802
  if self._tbl_version_path.is_replica():
1764
1803
  raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
1765
-
1766
-
1767
- class ColumnMetadata(TypedDict):
1768
- """Metadata for a column of a Pixeltable table."""
1769
-
1770
- name: str
1771
- """The name of the column."""
1772
- type_: str
1773
- """The type specifier of the column."""
1774
- version_added: int
1775
- """The table version when this column was added."""
1776
- is_stored: bool
1777
- """`True` if this is a stored column; `False` if it is dynamically computed."""
1778
- is_primary_key: bool
1779
- """`True` if this column is part of the table's primary key."""
1780
- media_validation: Optional[Literal['on_read', 'on_write']]
1781
- """The media validation policy for this column."""
1782
- computed_with: Optional[str]
1783
- """Expression used to compute this column; `None` if this is not a computed column."""
1784
-
1785
-
1786
- class IndexMetadata(TypedDict):
1787
- """Metadata for a column of a Pixeltable table."""
1788
-
1789
- name: str
1790
- """The name of the index."""
1791
- columns: list[str]
1792
- """The table columns that are indexed."""
1793
- index_type: Literal['embedding']
1794
- """The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
1795
- parameters: EmbeddingIndexParams
1796
-
1797
-
1798
- class EmbeddingIndexParams(TypedDict):
1799
- metric: Literal['cosine', 'ip', 'l2']
1800
- """Index metric."""
1801
- embeddings: list[str]
1802
- """List of embeddings defined for this index."""
1803
-
1804
-
1805
- class TableMetadata(TypedDict):
1806
- """Metadata for a Pixeltable table."""
1807
-
1808
- name: str
1809
- """The name of the table (ex: `'my_table'`)."""
1810
- path: str
1811
- """The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
1812
- columns: dict[str, ColumnMetadata]
1813
- """Column metadata for all of the visible columns of the table."""
1814
- indices: dict[str, IndexMetadata]
1815
- """Index metadata for all of the indices of the table."""
1816
- is_replica: bool
1817
- """`True` if this table is a replica of another (shared) table."""
1818
- is_view: bool
1819
- """`True` if this table is a view."""
1820
- is_snapshot: bool
1821
- """`True` if this table is a snapshot."""
1822
- version: int
1823
- """The current version of the table."""
1824
- version_created: datetime.datetime
1825
- """The timestamp when this table version was created."""
1826
- schema_version: int
1827
- """The current schema version of the table."""
1828
- comment: Optional[str]
1829
- """User-provided table comment, if one exists."""
1830
- media_validation: Literal['on_read', 'on_write']
1831
- """The media validation policy for this table."""
1832
- base: Optional[str]
1833
- """If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
@@ -0,0 +1,96 @@
1
+ import datetime
2
+ from typing import Literal, Optional, TypedDict
3
+
4
+
5
+ class ColumnMetadata(TypedDict):
6
+ """Metadata for a column of a Pixeltable table."""
7
+
8
+ name: str
9
+ """The name of the column."""
10
+ type_: str
11
+ """The type specifier of the column."""
12
+ version_added: int
13
+ """The table version when this column was added."""
14
+ is_stored: bool
15
+ """`True` if this is a stored column; `False` if it is dynamically computed."""
16
+ is_primary_key: bool
17
+ """`True` if this column is part of the table's primary key."""
18
+ media_validation: Optional[Literal['on_read', 'on_write']]
19
+ """The media validation policy for this column."""
20
+ computed_with: Optional[str]
21
+ """Expression used to compute this column; `None` if this is not a computed column."""
22
+
23
+
24
+ class EmbeddingIndexParams(TypedDict):
25
+ metric: Literal['cosine', 'ip', 'l2']
26
+ """Index metric."""
27
+ embeddings: list[str]
28
+ """List of embeddings defined for this index."""
29
+
30
+
31
+ class IndexMetadata(TypedDict):
32
+ """Metadata for a column of a Pixeltable table."""
33
+
34
+ name: str
35
+ """The name of the index."""
36
+ columns: list[str]
37
+ """The table columns that are indexed."""
38
+ index_type: Literal['embedding']
39
+ """The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
40
+ parameters: EmbeddingIndexParams
41
+
42
+
43
+ class TableMetadata(TypedDict):
44
+ """Metadata for a Pixeltable table."""
45
+
46
+ name: str
47
+ """The name of the table (ex: `'my_table'`)."""
48
+ path: str
49
+ """The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
50
+ columns: dict[str, ColumnMetadata]
51
+ """Column metadata for all of the visible columns of the table."""
52
+ indices: dict[str, IndexMetadata]
53
+ """Index metadata for all of the indices of the table."""
54
+ is_replica: bool
55
+ """`True` if this table is a replica of another (shared) table."""
56
+ is_view: bool
57
+ """`True` if this table is a view."""
58
+ is_snapshot: bool
59
+ """`True` if this table is a snapshot."""
60
+ version: int
61
+ """The current version of the table."""
62
+ version_created: datetime.datetime
63
+ """The timestamp when this table version was created."""
64
+ schema_version: int
65
+ """The current schema version of the table."""
66
+ comment: Optional[str]
67
+ """User-provided table comment, if one exists."""
68
+ media_validation: Literal['on_read', 'on_write']
69
+ """The media validation policy for this table."""
70
+ base: Optional[str]
71
+ """If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
72
+
73
+
74
+ class VersionMetadata(TypedDict):
75
+ """Metadata for a specific version of a Pixeltable table."""
76
+
77
+ """The version number."""
78
+ version: int
79
+ """The timestamp when this version was created."""
80
+ created_at: datetime.datetime
81
+ """The user who created this version, if defined."""
82
+ user: str | None
83
+ """The type of table transformation that this version represents (`'data'` or `'schema'`)."""
84
+ change_type: Literal['data', 'schema']
85
+ """The number of rows inserted in this version."""
86
+ inserts: int
87
+ """The number of rows updated in this version."""
88
+ updates: int
89
+ """The number of rows deleted in this version."""
90
+ deletes: int
91
+ """The number of errors encountered during this version."""
92
+ errors: int
93
+ """The number of computed values calculated in this version."""
94
+ computed: int
95
+ """A description of the schema change that occurred in this version, if any."""
96
+ schema_change: str | None
@@ -1065,21 +1065,28 @@ class TableVersion:
1065
1065
 
1066
1066
  return update_targets
1067
1067
 
1068
- def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
1068
+ def recompute_columns(
1069
+ self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1070
+ ) -> UpdateStatus:
1069
1071
  assert self.is_mutable
1070
1072
  assert all(name in self.cols_by_name for name in col_names)
1071
1073
  assert len(col_names) > 0
1072
1074
  assert len(col_names) == 1 or not errors_only
1073
1075
 
1076
+ from pixeltable.exprs import CompoundPredicate
1074
1077
  from pixeltable.plan import Planner
1075
1078
 
1076
1079
  target_columns = [self.cols_by_name[name] for name in col_names]
1077
1080
  where_clause: Optional[exprs.Expr] = None
1081
+ if where is not None:
1082
+ self._validate_where_clause(where, error_prefix="'where' argument")
1083
+ where_clause = where
1078
1084
  if errors_only:
1079
- where_clause = (
1085
+ errortype_pred = (
1080
1086
  exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1081
1087
  != None
1082
1088
  )
1089
+ where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
1083
1090
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1084
1091
  self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1085
1092
  )
@@ -1142,40 +1149,30 @@ class TableVersion:
1142
1149
  self._write_md(new_version=True, new_schema_version=False)
1143
1150
  return result
1144
1151
 
1145
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1146
- """Delete rows in this table.
1147
- Args:
1148
- where: a predicate to filter rows to delete.
1149
-
1150
- Returns:
1151
- UpdateStatus: an object containing the number of deleted rows and other statistics.
1152
- """
1152
+ def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
1153
+ """Validates that pred can be expressed as a SQL Where clause"""
1153
1154
  assert self.is_insertable
1154
1155
  from pixeltable.exprs import Expr
1155
1156
  from pixeltable.plan import Planner
1156
1157
 
1157
- sql_where_clause: Optional[Expr] = None
1158
- if where is not None:
1159
- if not isinstance(where, Expr):
1160
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1161
- analysis_info = Planner.analyze(self.path, where)
1162
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1163
- if analysis_info.filter is not None:
1164
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1165
- sql_where_clause = analysis_info.sql_where_clause
1158
+ if not isinstance(pred, Expr):
1159
+ raise excs.Error(f'{error_prefix} must be a predicate, got {type(pred)}')
1160
+ analysis_info = Planner.analyze(self.path, pred)
1161
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1162
+ if analysis_info.filter is not None:
1163
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1166
1164
 
1167
- status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1165
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1166
+ assert self.is_insertable
1167
+ if where is not None:
1168
+ self._validate_where_clause(where, error_prefix="'where' argument")
1169
+ status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
1168
1170
  return status
1169
1171
 
1170
1172
  def propagate_delete(
1171
1173
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1172
1174
  ) -> UpdateStatus:
1173
- """Delete rows in this table and propagate to views.
1174
- Args:
1175
- where: a predicate to filter rows to delete.
1176
- Returns:
1177
- number of deleted rows
1178
- """
1175
+ """Delete rows in this table and propagate to views"""
1179
1176
  # print(f'calling sql_expr()')
1180
1177
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1181
1178
  # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
@@ -323,7 +323,9 @@ class View(Table):
323
323
  else:
324
324
  base_descr = f'{base._path()}:{effective_version}'
325
325
  bases_descrs.append(f'{base_descr!r}')
326
- result.append(f' (of {", ".join(bases_descrs)})')
326
+ if len(bases_descrs) > 0:
327
+ # bases_descrs can be empty in the case of a table-replica
328
+ result.append(f' (of {", ".join(bases_descrs)})')
327
329
 
328
330
  if self._tbl_version_path.tbl_version.get().predicate is not None:
329
331
  result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
pixeltable/dataframe.py CHANGED
@@ -1210,17 +1210,42 @@ class DataFrame:
1210
1210
  Via the above DataFrame person, update the column 'city' to 'Oakland'
1211
1211
  and 'state' to 'CA' in the table t:
1212
1212
 
1213
- >>> df = person.update({'city': 'Oakland', 'state': 'CA'})
1213
+ >>> person.update({'city': 'Oakland', 'state': 'CA'})
1214
1214
 
1215
1215
  Via the above DataFrame person, update the column 'age' to 30 for any
1216
1216
  rows where 'year' is 2014 in the table t:
1217
1217
 
1218
- >>> df = person.where(t.year == 2014).update({'age': 30})
1218
+ >>> person.where(t.year == 2014).update({'age': 30})
1219
1219
  """
1220
1220
  self._validate_mutable('update', False)
1221
1221
  with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
1222
1222
  return self._first_tbl.tbl_version.get().update(value_spec, where=self.where_clause, cascade=cascade)
1223
1223
 
1224
+ def recompute_columns(
1225
+ self, *columns: str | exprs.ColumnRef, errors_only: bool = False, cascade: bool = True
1226
+ ) -> UpdateStatus:
1227
+ """Recompute one or more computed columns of the underlying table of the DataFrame.
1228
+
1229
+ Args:
1230
+ columns: The names or references of the computed columns to recompute.
1231
+ errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1232
+ `errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
1233
+ cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
1234
+
1235
+ Returns:
1236
+ UpdateStatus: the status of the operation.
1237
+
1238
+ Example:
1239
+ For table `person` with column `age` and computed column `height`, recompute the value of `height` for all
1240
+ rows where `age` is less than 18:
1241
+
1242
+ >>> df = person.where(t.age < 18).recompute_columns(person.height)
1243
+ """
1244
+ self._validate_mutable('recompute_columns', False)
1245
+ with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
1246
+ tbl = Catalog.get().get_table_by_id(self._first_tbl.tbl_id)
1247
+ return tbl.recompute_columns(*columns, where=self.where_clause, errors_only=errors_only, cascade=cascade)
1248
+
1224
1249
  def delete(self) -> UpdateStatus:
1225
1250
  """Delete rows form the underlying table of the DataFrame.
1226
1251
 
@@ -1230,13 +1255,9 @@ class DataFrame:
1230
1255
  UpdateStatus: the status of the delete operation.
1231
1256
 
1232
1257
  Example:
1233
- Given the DataFrame person from a table t with all its columns and rows:
1234
-
1235
- >>> person = t.select()
1236
-
1237
- Via the above DataFrame person, delete all rows from the table t where the column 'age' is less than 18:
1258
+ For a table `person` with column `age`, delete all rows where 'age' is less than 18:
1238
1259
 
1239
- >>> df = person.where(t.age < 18).delete()
1260
+ >>> person.where(t.age < 18).delete()
1240
1261
  """
1241
1262
  self._validate_mutable('delete', False)
1242
1263
  if not self._first_tbl.is_insertable():
pixeltable/env.py CHANGED
@@ -605,12 +605,7 @@ class Env:
605
605
  metadata.upgrade_md(self._sa_engine)
606
606
 
607
607
  @property
608
- def pxt_api_key(self) -> str:
609
- if self._pxt_api_key is None:
610
- raise excs.Error(
611
- 'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
612
- 'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
613
- )
608
+ def pxt_api_key(self) -> Optional[str]:
614
609
  return self._pxt_api_key
615
610
 
616
611
  def get_client(self, name: str) -> Any:
@@ -748,6 +743,7 @@ class Env:
748
743
  self.__register_package('whisper', library_name='openai-whisper')
749
744
  self.__register_package('whisperx')
750
745
  self.__register_package('yolox', library_name='pixeltable-yolox')
746
+ self.__register_package('lancedb')
751
747
 
752
748
  def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
753
749
  is_installed: bool
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
36
36
  return f' {self.operator} '.join([f'({e})' for e in self.components])
37
37
 
38
38
  @classmethod
39
- def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
39
+ def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
40
+ operands = [e for e in operands if e is not None]
40
41
  if len(operands) == 0:
41
42
  return None
42
43
  if len(operands) == 1: