pixeltable 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +11 -1
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/table.py +88 -118
- pixeltable/catalog/table_metadata.py +96 -0
- pixeltable/catalog/table_version.py +23 -26
- pixeltable/catalog/view.py +3 -1
- pixeltable/dataframe.py +29 -8
- pixeltable/env.py +2 -6
- pixeltable/exprs/compound_predicate.py +2 -1
- pixeltable/functions/anthropic.py +17 -6
- pixeltable/functions/groq.py +2 -2
- pixeltable/functions/openai.py +6 -3
- pixeltable/globals.py +11 -7
- pixeltable/io/__init__.py +2 -1
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/parquet.py +9 -89
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/publish.py +12 -10
- pixeltable/utils/arrow.py +97 -2
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/media_store.py +11 -0
- {pixeltable-0.4.11.dist-info → pixeltable-0.4.13.dist-info}/METADATA +162 -127
- {pixeltable-0.4.11.dist-info → pixeltable-0.4.13.dist-info}/RECORD +27 -24
- {pixeltable-0.4.11.dist-info → pixeltable-0.4.13.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.11.dist-info → pixeltable-0.4.13.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.11.dist-info → pixeltable-0.4.13.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# ruff: noqa: F401
|
|
2
2
|
|
|
3
3
|
from .__version__ import __version__, __version_tuple__
|
|
4
|
-
from .catalog import
|
|
4
|
+
from .catalog import (
|
|
5
|
+
Column,
|
|
6
|
+
ColumnMetadata,
|
|
7
|
+
IndexMetadata,
|
|
8
|
+
InsertableTable,
|
|
9
|
+
Table,
|
|
10
|
+
TableMetadata,
|
|
11
|
+
UpdateStatus,
|
|
12
|
+
VersionMetadata,
|
|
13
|
+
View,
|
|
14
|
+
)
|
|
5
15
|
from .dataframe import DataFrame
|
|
6
16
|
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
7
17
|
from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -8,7 +8,8 @@ from .insertable_table import InsertableTable
|
|
|
8
8
|
from .named_function import NamedFunction
|
|
9
9
|
from .path import Path
|
|
10
10
|
from .schema_object import SchemaObject
|
|
11
|
-
from .table import
|
|
11
|
+
from .table import Table
|
|
12
|
+
from .table_metadata import ColumnMetadata, IndexMetadata, TableMetadata, VersionMetadata
|
|
12
13
|
from .table_version import TableVersion
|
|
13
14
|
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
14
15
|
from .table_version_path import TableVersionPath
|
pixeltable/catalog/table.py
CHANGED
|
@@ -7,9 +7,7 @@ import json
|
|
|
7
7
|
import logging
|
|
8
8
|
from keyword import iskeyword as is_python_keyword
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import TYPE_CHECKING, Any,
|
|
11
|
-
|
|
12
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
|
|
13
11
|
from uuid import UUID
|
|
14
12
|
|
|
15
13
|
import pandas as pd
|
|
@@ -17,6 +15,13 @@ import sqlalchemy as sql
|
|
|
17
15
|
|
|
18
16
|
import pixeltable as pxt
|
|
19
17
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
18
|
+
from pixeltable.catalog.table_metadata import (
|
|
19
|
+
ColumnMetadata,
|
|
20
|
+
EmbeddingIndexParams,
|
|
21
|
+
IndexMetadata,
|
|
22
|
+
TableMetadata,
|
|
23
|
+
VersionMetadata,
|
|
24
|
+
)
|
|
20
25
|
from pixeltable.metadata import schema
|
|
21
26
|
from pixeltable.metadata.utils import MetadataUtils
|
|
22
27
|
|
|
@@ -37,6 +42,9 @@ from .table_version_handle import TableVersionHandle
|
|
|
37
42
|
from .table_version_path import TableVersionPath
|
|
38
43
|
from .update_status import UpdateStatus
|
|
39
44
|
|
|
45
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
46
|
+
|
|
47
|
+
|
|
40
48
|
if TYPE_CHECKING:
|
|
41
49
|
import torch.utils.data
|
|
42
50
|
|
|
@@ -95,7 +103,7 @@ class Table(SchemaObject):
|
|
|
95
103
|
|
|
96
104
|
return op()
|
|
97
105
|
|
|
98
|
-
def _get_metadata(self) ->
|
|
106
|
+
def _get_metadata(self) -> TableMetadata:
|
|
99
107
|
columns = self._tbl_version_path.columns()
|
|
100
108
|
column_info: dict[str, ColumnMetadata] = {}
|
|
101
109
|
for col in columns:
|
|
@@ -1478,12 +1486,17 @@ class Table(SchemaObject):
|
|
|
1478
1486
|
return result
|
|
1479
1487
|
|
|
1480
1488
|
def recompute_columns(
|
|
1481
|
-
self,
|
|
1489
|
+
self,
|
|
1490
|
+
*columns: str | ColumnRef,
|
|
1491
|
+
where: 'exprs.Expr' | None = None,
|
|
1492
|
+
errors_only: bool = False,
|
|
1493
|
+
cascade: bool = True,
|
|
1482
1494
|
) -> UpdateStatus:
|
|
1483
1495
|
"""Recompute the values in one or more computed columns of this table.
|
|
1484
1496
|
|
|
1485
1497
|
Args:
|
|
1486
1498
|
columns: The names or references of the computed columns to recompute.
|
|
1499
|
+
where: A predicate to filter rows to recompute.
|
|
1487
1500
|
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1488
1501
|
`errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
|
|
1489
1502
|
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
@@ -1499,6 +1512,10 @@ class Table(SchemaObject):
|
|
|
1499
1512
|
|
|
1500
1513
|
>>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
|
|
1501
1514
|
|
|
1515
|
+
Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
|
|
1516
|
+
|
|
1517
|
+
>>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
|
|
1518
|
+
|
|
1502
1519
|
Recompute column `c1` and its dependents, but only for rows that have errors in it:
|
|
1503
1520
|
|
|
1504
1521
|
>>> tbl.recompute_columns('c1', errors_only=True)
|
|
@@ -1535,7 +1552,12 @@ class Table(SchemaObject):
|
|
|
1535
1552
|
raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
|
|
1536
1553
|
col_names.append(col_name)
|
|
1537
1554
|
|
|
1538
|
-
|
|
1555
|
+
if where is not None and not where.is_bound_by([self._tbl_version_path]):
|
|
1556
|
+
raise excs.Error(f"'where' ({where}) not bound by {self._display_str()}")
|
|
1557
|
+
|
|
1558
|
+
result = self._tbl_version.get().recompute_columns(
|
|
1559
|
+
col_names, where=where, errors_only=errors_only, cascade=cascade
|
|
1560
|
+
)
|
|
1539
1561
|
FileCache.get().emit_eviction_warnings()
|
|
1540
1562
|
return result
|
|
1541
1563
|
|
|
@@ -1676,43 +1698,35 @@ class Table(SchemaObject):
|
|
|
1676
1698
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1677
1699
|
return list(self._get_schema().keys())
|
|
1678
1700
|
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
'updates': ts.IntType(nullable=True),
|
|
1686
|
-
'deletes': ts.IntType(nullable=True),
|
|
1687
|
-
'errors': ts.IntType(nullable=True),
|
|
1688
|
-
'computed': ts.IntType(),
|
|
1689
|
-
'schema_change': ts.StringType(),
|
|
1690
|
-
}
|
|
1691
|
-
|
|
1692
|
-
def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
|
|
1693
|
-
"""Returns rows of information about the versions of this table, most recent first.
|
|
1701
|
+
def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
|
|
1702
|
+
"""
|
|
1703
|
+
Returns information about versions of this table, most recent first.
|
|
1704
|
+
|
|
1705
|
+
`get_versions()` is intended for programmatic access to version metadata; for human-readable
|
|
1706
|
+
output, use [`history()`][pixeltable.Table.history] instead.
|
|
1694
1707
|
|
|
1695
1708
|
Args:
|
|
1696
|
-
n:
|
|
1709
|
+
n: if specified, will return at most `n` versions
|
|
1697
1710
|
|
|
1698
|
-
|
|
1699
|
-
|
|
1711
|
+
Returns:
|
|
1712
|
+
A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
|
|
1713
|
+
recent first.
|
|
1700
1714
|
|
|
1701
|
-
|
|
1715
|
+
Examples:
|
|
1716
|
+
Retrieve metadata about all versions of the table `tbl`:
|
|
1702
1717
|
|
|
1703
|
-
|
|
1718
|
+
>>> tbl.get_versions()
|
|
1704
1719
|
|
|
1705
|
-
|
|
1720
|
+
Retrieve metadata about the most recent 5 versions of the table `tbl`:
|
|
1706
1721
|
|
|
1707
|
-
|
|
1708
|
-
A list of information about each version, ordered from most recent to oldest version.
|
|
1722
|
+
>>> tbl.get_versions(n=5)
|
|
1709
1723
|
"""
|
|
1710
1724
|
from pixeltable.catalog import Catalog
|
|
1711
1725
|
|
|
1712
1726
|
if n is None:
|
|
1713
1727
|
n = 1_000_000_000
|
|
1714
1728
|
if not isinstance(n, int) or n < 1:
|
|
1715
|
-
raise excs.Error(f'Invalid value for n
|
|
1729
|
+
raise excs.Error(f'Invalid value for `n`: {n}')
|
|
1716
1730
|
|
|
1717
1731
|
# Retrieve the table history components from the catalog
|
|
1718
1732
|
tbl_id = self._id
|
|
@@ -1730,104 +1744,60 @@ class Table(SchemaObject):
|
|
|
1730
1744
|
else:
|
|
1731
1745
|
over_count = 0
|
|
1732
1746
|
|
|
1733
|
-
|
|
1747
|
+
metadata_dicts: list[VersionMetadata] = []
|
|
1734
1748
|
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1735
1749
|
version = vers_md.version_md.version
|
|
1736
|
-
schema_change = md_dict.get(version,
|
|
1750
|
+
schema_change = md_dict.get(version, None)
|
|
1737
1751
|
update_status = vers_md.version_md.update_status
|
|
1738
1752
|
if update_status is None:
|
|
1739
1753
|
update_status = UpdateStatus()
|
|
1740
|
-
change_type = 'schema' if schema_change
|
|
1741
|
-
if change_type == '':
|
|
1742
|
-
change_type = 'data'
|
|
1754
|
+
change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
|
|
1743
1755
|
rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1756
|
+
metadata_dicts.append(
|
|
1757
|
+
VersionMetadata(
|
|
1758
|
+
version=version,
|
|
1759
|
+
created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
|
|
1760
|
+
user=vers_md.version_md.user,
|
|
1761
|
+
change_type=change_type,
|
|
1762
|
+
inserts=rcs.ins_rows,
|
|
1763
|
+
updates=rcs.upd_rows,
|
|
1764
|
+
deletes=rcs.del_rows,
|
|
1765
|
+
errors=rcs.num_excs,
|
|
1766
|
+
computed=rcs.computed_values,
|
|
1767
|
+
schema_change=schema_change,
|
|
1768
|
+
)
|
|
1769
|
+
)
|
|
1770
|
+
|
|
1771
|
+
return metadata_dicts
|
|
1772
|
+
|
|
1773
|
+
def history(self, n: Optional[int] = None) -> pd.DataFrame:
|
|
1774
|
+
"""
|
|
1775
|
+
Returns a human-readable report about versions of this table.
|
|
1757
1776
|
|
|
1758
|
-
|
|
1777
|
+
`history()` is intended for human-readable output of version metadata; for programmatic access,
|
|
1778
|
+
use [`get_versions()`][pixeltable.Table.get_versions] instead.
|
|
1779
|
+
|
|
1780
|
+
Args:
|
|
1781
|
+
n: if specified, will return at most `n` versions
|
|
1782
|
+
|
|
1783
|
+
Returns:
|
|
1784
|
+
A report with information about each version, one per row, most recent first.
|
|
1785
|
+
|
|
1786
|
+
Examples:
|
|
1787
|
+
Report all versions of the table:
|
|
1788
|
+
|
|
1789
|
+
>>> tbl.history()
|
|
1790
|
+
|
|
1791
|
+
Report only the most recent 5 changes to the table:
|
|
1792
|
+
|
|
1793
|
+
>>> tbl.history(n=5)
|
|
1794
|
+
"""
|
|
1795
|
+
versions = self.get_versions(n)
|
|
1796
|
+
assert len(versions) > 0
|
|
1797
|
+
return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
|
|
1759
1798
|
|
|
1760
1799
|
def __check_mutable(self, op_descr: str) -> None:
|
|
1761
1800
|
if self._tbl_version_path.is_snapshot():
|
|
1762
1801
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|
|
1763
1802
|
if self._tbl_version_path.is_replica():
|
|
1764
1803
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
class ColumnMetadata(TypedDict):
|
|
1768
|
-
"""Metadata for a column of a Pixeltable table."""
|
|
1769
|
-
|
|
1770
|
-
name: str
|
|
1771
|
-
"""The name of the column."""
|
|
1772
|
-
type_: str
|
|
1773
|
-
"""The type specifier of the column."""
|
|
1774
|
-
version_added: int
|
|
1775
|
-
"""The table version when this column was added."""
|
|
1776
|
-
is_stored: bool
|
|
1777
|
-
"""`True` if this is a stored column; `False` if it is dynamically computed."""
|
|
1778
|
-
is_primary_key: bool
|
|
1779
|
-
"""`True` if this column is part of the table's primary key."""
|
|
1780
|
-
media_validation: Optional[Literal['on_read', 'on_write']]
|
|
1781
|
-
"""The media validation policy for this column."""
|
|
1782
|
-
computed_with: Optional[str]
|
|
1783
|
-
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
class IndexMetadata(TypedDict):
|
|
1787
|
-
"""Metadata for a column of a Pixeltable table."""
|
|
1788
|
-
|
|
1789
|
-
name: str
|
|
1790
|
-
"""The name of the index."""
|
|
1791
|
-
columns: list[str]
|
|
1792
|
-
"""The table columns that are indexed."""
|
|
1793
|
-
index_type: Literal['embedding']
|
|
1794
|
-
"""The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
|
|
1795
|
-
parameters: EmbeddingIndexParams
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
class EmbeddingIndexParams(TypedDict):
|
|
1799
|
-
metric: Literal['cosine', 'ip', 'l2']
|
|
1800
|
-
"""Index metric."""
|
|
1801
|
-
embeddings: list[str]
|
|
1802
|
-
"""List of embeddings defined for this index."""
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
class TableMetadata(TypedDict):
|
|
1806
|
-
"""Metadata for a Pixeltable table."""
|
|
1807
|
-
|
|
1808
|
-
name: str
|
|
1809
|
-
"""The name of the table (ex: `'my_table'`)."""
|
|
1810
|
-
path: str
|
|
1811
|
-
"""The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
|
|
1812
|
-
columns: dict[str, ColumnMetadata]
|
|
1813
|
-
"""Column metadata for all of the visible columns of the table."""
|
|
1814
|
-
indices: dict[str, IndexMetadata]
|
|
1815
|
-
"""Index metadata for all of the indices of the table."""
|
|
1816
|
-
is_replica: bool
|
|
1817
|
-
"""`True` if this table is a replica of another (shared) table."""
|
|
1818
|
-
is_view: bool
|
|
1819
|
-
"""`True` if this table is a view."""
|
|
1820
|
-
is_snapshot: bool
|
|
1821
|
-
"""`True` if this table is a snapshot."""
|
|
1822
|
-
version: int
|
|
1823
|
-
"""The current version of the table."""
|
|
1824
|
-
version_created: datetime.datetime
|
|
1825
|
-
"""The timestamp when this table version was created."""
|
|
1826
|
-
schema_version: int
|
|
1827
|
-
"""The current schema version of the table."""
|
|
1828
|
-
comment: Optional[str]
|
|
1829
|
-
"""User-provided table comment, if one exists."""
|
|
1830
|
-
media_validation: Literal['on_read', 'on_write']
|
|
1831
|
-
"""The media validation policy for this table."""
|
|
1832
|
-
base: Optional[str]
|
|
1833
|
-
"""If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Literal, Optional, TypedDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ColumnMetadata(TypedDict):
|
|
6
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
7
|
+
|
|
8
|
+
name: str
|
|
9
|
+
"""The name of the column."""
|
|
10
|
+
type_: str
|
|
11
|
+
"""The type specifier of the column."""
|
|
12
|
+
version_added: int
|
|
13
|
+
"""The table version when this column was added."""
|
|
14
|
+
is_stored: bool
|
|
15
|
+
"""`True` if this is a stored column; `False` if it is dynamically computed."""
|
|
16
|
+
is_primary_key: bool
|
|
17
|
+
"""`True` if this column is part of the table's primary key."""
|
|
18
|
+
media_validation: Optional[Literal['on_read', 'on_write']]
|
|
19
|
+
"""The media validation policy for this column."""
|
|
20
|
+
computed_with: Optional[str]
|
|
21
|
+
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EmbeddingIndexParams(TypedDict):
|
|
25
|
+
metric: Literal['cosine', 'ip', 'l2']
|
|
26
|
+
"""Index metric."""
|
|
27
|
+
embeddings: list[str]
|
|
28
|
+
"""List of embeddings defined for this index."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class IndexMetadata(TypedDict):
|
|
32
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
"""The name of the index."""
|
|
36
|
+
columns: list[str]
|
|
37
|
+
"""The table columns that are indexed."""
|
|
38
|
+
index_type: Literal['embedding']
|
|
39
|
+
"""The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
|
|
40
|
+
parameters: EmbeddingIndexParams
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TableMetadata(TypedDict):
|
|
44
|
+
"""Metadata for a Pixeltable table."""
|
|
45
|
+
|
|
46
|
+
name: str
|
|
47
|
+
"""The name of the table (ex: `'my_table'`)."""
|
|
48
|
+
path: str
|
|
49
|
+
"""The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
|
|
50
|
+
columns: dict[str, ColumnMetadata]
|
|
51
|
+
"""Column metadata for all of the visible columns of the table."""
|
|
52
|
+
indices: dict[str, IndexMetadata]
|
|
53
|
+
"""Index metadata for all of the indices of the table."""
|
|
54
|
+
is_replica: bool
|
|
55
|
+
"""`True` if this table is a replica of another (shared) table."""
|
|
56
|
+
is_view: bool
|
|
57
|
+
"""`True` if this table is a view."""
|
|
58
|
+
is_snapshot: bool
|
|
59
|
+
"""`True` if this table is a snapshot."""
|
|
60
|
+
version: int
|
|
61
|
+
"""The current version of the table."""
|
|
62
|
+
version_created: datetime.datetime
|
|
63
|
+
"""The timestamp when this table version was created."""
|
|
64
|
+
schema_version: int
|
|
65
|
+
"""The current schema version of the table."""
|
|
66
|
+
comment: Optional[str]
|
|
67
|
+
"""User-provided table comment, if one exists."""
|
|
68
|
+
media_validation: Literal['on_read', 'on_write']
|
|
69
|
+
"""The media validation policy for this table."""
|
|
70
|
+
base: Optional[str]
|
|
71
|
+
"""If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class VersionMetadata(TypedDict):
|
|
75
|
+
"""Metadata for a specific version of a Pixeltable table."""
|
|
76
|
+
|
|
77
|
+
"""The version number."""
|
|
78
|
+
version: int
|
|
79
|
+
"""The timestamp when this version was created."""
|
|
80
|
+
created_at: datetime.datetime
|
|
81
|
+
"""The user who created this version, if defined."""
|
|
82
|
+
user: str | None
|
|
83
|
+
"""The type of table transformation that this version represents (`'data'` or `'schema'`)."""
|
|
84
|
+
change_type: Literal['data', 'schema']
|
|
85
|
+
"""The number of rows inserted in this version."""
|
|
86
|
+
inserts: int
|
|
87
|
+
"""The number of rows updated in this version."""
|
|
88
|
+
updates: int
|
|
89
|
+
"""The number of rows deleted in this version."""
|
|
90
|
+
deletes: int
|
|
91
|
+
"""The number of errors encountered during this version."""
|
|
92
|
+
errors: int
|
|
93
|
+
"""The number of computed values calculated in this version."""
|
|
94
|
+
computed: int
|
|
95
|
+
"""A description of the schema change that occurred in this version, if any."""
|
|
96
|
+
schema_change: str | None
|
|
@@ -1065,21 +1065,28 @@ class TableVersion:
|
|
|
1065
1065
|
|
|
1066
1066
|
return update_targets
|
|
1067
1067
|
|
|
1068
|
-
def recompute_columns(
|
|
1068
|
+
def recompute_columns(
|
|
1069
|
+
self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
|
|
1070
|
+
) -> UpdateStatus:
|
|
1069
1071
|
assert self.is_mutable
|
|
1070
1072
|
assert all(name in self.cols_by_name for name in col_names)
|
|
1071
1073
|
assert len(col_names) > 0
|
|
1072
1074
|
assert len(col_names) == 1 or not errors_only
|
|
1073
1075
|
|
|
1076
|
+
from pixeltable.exprs import CompoundPredicate
|
|
1074
1077
|
from pixeltable.plan import Planner
|
|
1075
1078
|
|
|
1076
1079
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1077
1080
|
where_clause: Optional[exprs.Expr] = None
|
|
1081
|
+
if where is not None:
|
|
1082
|
+
self._validate_where_clause(where, error_prefix="'where' argument")
|
|
1083
|
+
where_clause = where
|
|
1078
1084
|
if errors_only:
|
|
1079
|
-
|
|
1085
|
+
errortype_pred = (
|
|
1080
1086
|
exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
|
|
1081
1087
|
!= None
|
|
1082
1088
|
)
|
|
1089
|
+
where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
|
|
1083
1090
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1084
1091
|
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1085
1092
|
)
|
|
@@ -1142,40 +1149,30 @@ class TableVersion:
|
|
|
1142
1149
|
self._write_md(new_version=True, new_schema_version=False)
|
|
1143
1150
|
return result
|
|
1144
1151
|
|
|
1145
|
-
def
|
|
1146
|
-
"""
|
|
1147
|
-
Args:
|
|
1148
|
-
where: a predicate to filter rows to delete.
|
|
1149
|
-
|
|
1150
|
-
Returns:
|
|
1151
|
-
UpdateStatus: an object containing the number of deleted rows and other statistics.
|
|
1152
|
-
"""
|
|
1152
|
+
def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
|
|
1153
|
+
"""Validates that pred can be expressed as a SQL Where clause"""
|
|
1153
1154
|
assert self.is_insertable
|
|
1154
1155
|
from pixeltable.exprs import Expr
|
|
1155
1156
|
from pixeltable.plan import Planner
|
|
1156
1157
|
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
if analysis_info.filter is not None:
|
|
1164
|
-
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1165
|
-
sql_where_clause = analysis_info.sql_where_clause
|
|
1158
|
+
if not isinstance(pred, Expr):
|
|
1159
|
+
raise excs.Error(f'{error_prefix} must be a predicate, got {type(pred)}')
|
|
1160
|
+
analysis_info = Planner.analyze(self.path, pred)
|
|
1161
|
+
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
1162
|
+
if analysis_info.filter is not None:
|
|
1163
|
+
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1166
1164
|
|
|
1167
|
-
|
|
1165
|
+
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
1166
|
+
assert self.is_insertable
|
|
1167
|
+
if where is not None:
|
|
1168
|
+
self._validate_where_clause(where, error_prefix="'where' argument")
|
|
1169
|
+
status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
|
|
1168
1170
|
return status
|
|
1169
1171
|
|
|
1170
1172
|
def propagate_delete(
|
|
1171
1173
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1172
1174
|
) -> UpdateStatus:
|
|
1173
|
-
"""Delete rows in this table and propagate to views
|
|
1174
|
-
Args:
|
|
1175
|
-
where: a predicate to filter rows to delete.
|
|
1176
|
-
Returns:
|
|
1177
|
-
number of deleted rows
|
|
1178
|
-
"""
|
|
1175
|
+
"""Delete rows in this table and propagate to views"""
|
|
1179
1176
|
# print(f'calling sql_expr()')
|
|
1180
1177
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1181
1178
|
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
pixeltable/catalog/view.py
CHANGED
|
@@ -323,7 +323,9 @@ class View(Table):
|
|
|
323
323
|
else:
|
|
324
324
|
base_descr = f'{base._path()}:{effective_version}'
|
|
325
325
|
bases_descrs.append(f'{base_descr!r}')
|
|
326
|
-
|
|
326
|
+
if len(bases_descrs) > 0:
|
|
327
|
+
# bases_descrs can be empty in the case of a table-replica
|
|
328
|
+
result.append(f' (of {", ".join(bases_descrs)})')
|
|
327
329
|
|
|
328
330
|
if self._tbl_version_path.tbl_version.get().predicate is not None:
|
|
329
331
|
result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
|
pixeltable/dataframe.py
CHANGED
|
@@ -1210,17 +1210,42 @@ class DataFrame:
|
|
|
1210
1210
|
Via the above DataFrame person, update the column 'city' to 'Oakland'
|
|
1211
1211
|
and 'state' to 'CA' in the table t:
|
|
1212
1212
|
|
|
1213
|
-
>>>
|
|
1213
|
+
>>> person.update({'city': 'Oakland', 'state': 'CA'})
|
|
1214
1214
|
|
|
1215
1215
|
Via the above DataFrame person, update the column 'age' to 30 for any
|
|
1216
1216
|
rows where 'year' is 2014 in the table t:
|
|
1217
1217
|
|
|
1218
|
-
>>>
|
|
1218
|
+
>>> person.where(t.year == 2014).update({'age': 30})
|
|
1219
1219
|
"""
|
|
1220
1220
|
self._validate_mutable('update', False)
|
|
1221
1221
|
with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
|
|
1222
1222
|
return self._first_tbl.tbl_version.get().update(value_spec, where=self.where_clause, cascade=cascade)
|
|
1223
1223
|
|
|
1224
|
+
def recompute_columns(
|
|
1225
|
+
self, *columns: str | exprs.ColumnRef, errors_only: bool = False, cascade: bool = True
|
|
1226
|
+
) -> UpdateStatus:
|
|
1227
|
+
"""Recompute one or more computed columns of the underlying table of the DataFrame.
|
|
1228
|
+
|
|
1229
|
+
Args:
|
|
1230
|
+
columns: The names or references of the computed columns to recompute.
|
|
1231
|
+
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1232
|
+
`errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
|
|
1233
|
+
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
1234
|
+
|
|
1235
|
+
Returns:
|
|
1236
|
+
UpdateStatus: the status of the operation.
|
|
1237
|
+
|
|
1238
|
+
Example:
|
|
1239
|
+
For table `person` with column `age` and computed column `height`, recompute the value of `height` for all
|
|
1240
|
+
rows where `age` is less than 18:
|
|
1241
|
+
|
|
1242
|
+
>>> df = person.where(t.age < 18).recompute_columns(person.height)
|
|
1243
|
+
"""
|
|
1244
|
+
self._validate_mutable('recompute_columns', False)
|
|
1245
|
+
with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
|
|
1246
|
+
tbl = Catalog.get().get_table_by_id(self._first_tbl.tbl_id)
|
|
1247
|
+
return tbl.recompute_columns(*columns, where=self.where_clause, errors_only=errors_only, cascade=cascade)
|
|
1248
|
+
|
|
1224
1249
|
def delete(self) -> UpdateStatus:
|
|
1225
1250
|
"""Delete rows form the underlying table of the DataFrame.
|
|
1226
1251
|
|
|
@@ -1230,13 +1255,9 @@ class DataFrame:
|
|
|
1230
1255
|
UpdateStatus: the status of the delete operation.
|
|
1231
1256
|
|
|
1232
1257
|
Example:
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
>>> person = t.select()
|
|
1236
|
-
|
|
1237
|
-
Via the above DataFrame person, delete all rows from the table t where the column 'age' is less than 18:
|
|
1258
|
+
For a table `person` with column `age`, delete all rows where 'age' is less than 18:
|
|
1238
1259
|
|
|
1239
|
-
>>>
|
|
1260
|
+
>>> person.where(t.age < 18).delete()
|
|
1240
1261
|
"""
|
|
1241
1262
|
self._validate_mutable('delete', False)
|
|
1242
1263
|
if not self._first_tbl.is_insertable():
|
pixeltable/env.py
CHANGED
|
@@ -605,12 +605,7 @@ class Env:
|
|
|
605
605
|
metadata.upgrade_md(self._sa_engine)
|
|
606
606
|
|
|
607
607
|
@property
|
|
608
|
-
def pxt_api_key(self) -> str:
|
|
609
|
-
if self._pxt_api_key is None:
|
|
610
|
-
raise excs.Error(
|
|
611
|
-
'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
|
|
612
|
-
'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
|
|
613
|
-
)
|
|
608
|
+
def pxt_api_key(self) -> Optional[str]:
|
|
614
609
|
return self._pxt_api_key
|
|
615
610
|
|
|
616
611
|
def get_client(self, name: str) -> Any:
|
|
@@ -748,6 +743,7 @@ class Env:
|
|
|
748
743
|
self.__register_package('whisper', library_name='openai-whisper')
|
|
749
744
|
self.__register_package('whisperx')
|
|
750
745
|
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
746
|
+
self.__register_package('lancedb')
|
|
751
747
|
|
|
752
748
|
def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
|
|
753
749
|
is_installed: bool
|
|
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
|
|
|
36
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def make_conjunction(cls, operands: list[Expr]) ->
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
|
|
40
|
+
operands = [e for e in operands if e is not None]
|
|
40
41
|
if len(operands) == 0:
|
|
41
42
|
return None
|
|
42
43
|
if len(operands) == 1:
|