pixeltable 0.4.19__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/_version.py +1 -1
- pixeltable/catalog/catalog.py +76 -50
- pixeltable/catalog/column.py +29 -16
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/path.py +4 -10
- pixeltable/catalog/table.py +51 -0
- pixeltable/catalog/table_version.py +40 -7
- pixeltable/catalog/view.py +2 -2
- pixeltable/config.py +1 -0
- pixeltable/env.py +2 -0
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/image.py +2 -8
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/video.py +534 -1
- pixeltable/globals.py +2 -1
- pixeltable/index/base.py +5 -18
- pixeltable/index/btree.py +6 -2
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/metadata/schema.py +7 -32
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +22 -18
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +90 -56
- pixeltable/store.py +11 -15
- pixeltable/utils/av.py +87 -1
- pixeltable/utils/dbms.py +15 -11
- pixeltable/utils/image.py +10 -0
- {pixeltable-0.4.19.dist-info → pixeltable-0.4.20.dist-info}/METADATA +2 -1
- {pixeltable-0.4.19.dist-info → pixeltable-0.4.20.dist-info}/RECORD +36 -31
- {pixeltable-0.4.19.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.19.dist-info → pixeltable-0.4.20.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.19.dist-info → pixeltable-0.4.20.dist-info}/licenses/LICENSE +0 -0
pixeltable/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = '0.4.
|
|
1
|
+
__version__: str = '0.4.20'
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -27,7 +27,7 @@ from .insertable_table import InsertableTable
|
|
|
27
27
|
from .path import Path
|
|
28
28
|
from .schema_object import SchemaObject
|
|
29
29
|
from .table import Table
|
|
30
|
-
from .table_version import TableVersion
|
|
30
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
31
31
|
from .table_version_handle import TableVersionHandle
|
|
32
32
|
from .table_version_path import TableVersionPath
|
|
33
33
|
from .tbl_ops import TableOp
|
|
@@ -103,7 +103,16 @@ def retry_loop(
|
|
|
103
103
|
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
104
104
|
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
105
105
|
# TODO: what other exceptions should we be looking for?
|
|
106
|
-
if isinstance(
|
|
106
|
+
if isinstance(
|
|
107
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
108
|
+
# which is supposed to be deadlock-free.
|
|
109
|
+
e.orig,
|
|
110
|
+
(
|
|
111
|
+
psycopg.errors.SerializationFailure,
|
|
112
|
+
psycopg.errors.LockNotAvailable,
|
|
113
|
+
psycopg.errors.DeadlockDetected,
|
|
114
|
+
),
|
|
115
|
+
):
|
|
107
116
|
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
108
117
|
num_retries += 1
|
|
109
118
|
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
@@ -452,12 +461,15 @@ class Catalog:
|
|
|
452
461
|
_logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
|
|
453
462
|
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
454
463
|
elif (
|
|
464
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
465
|
+
# which is supposed to be deadlock-free.
|
|
455
466
|
isinstance(
|
|
456
467
|
e.orig,
|
|
457
468
|
(
|
|
458
469
|
psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
|
|
459
470
|
psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
|
|
460
471
|
psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
|
|
472
|
+
psycopg.errors.DeadlockDetected, # locking protocol contention
|
|
461
473
|
),
|
|
462
474
|
)
|
|
463
475
|
and convert_db_excs
|
|
@@ -1102,7 +1114,7 @@ class Catalog:
|
|
|
1102
1114
|
tv.is_validated = False
|
|
1103
1115
|
del self._tbl_versions[tbl_id, effective_version]
|
|
1104
1116
|
|
|
1105
|
-
def create_replica(self, path: Path, md: list[
|
|
1117
|
+
def create_replica(self, path: Path, md: list[TableVersionCompleteMd]) -> None:
|
|
1106
1118
|
"""
|
|
1107
1119
|
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
1108
1120
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
@@ -1184,7 +1196,7 @@ class Catalog:
|
|
|
1184
1196
|
system_path = Path.parse('_system', allow_system_path=True)
|
|
1185
1197
|
return self._create_dir(system_path, if_exists=IfExistsParam.IGNORE, parents=False)
|
|
1186
1198
|
|
|
1187
|
-
def __store_replica_md(self, path: Path, md:
|
|
1199
|
+
def __store_replica_md(self, path: Path, md: TableVersionCompleteMd) -> None:
|
|
1188
1200
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
1189
1201
|
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1190
1202
|
assert dir is not None
|
|
@@ -1220,7 +1232,6 @@ class Catalog:
|
|
|
1220
1232
|
# New metadata is more recent than the metadata currently stored in the DB; we'll update the record
|
|
1221
1233
|
# in place in the DB.
|
|
1222
1234
|
new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
1223
|
-
|
|
1224
1235
|
# Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
|
|
1225
1236
|
# it already exists, check that the existing record is identical to the new one.
|
|
1226
1237
|
q = (
|
|
@@ -1234,9 +1245,16 @@ class Catalog:
|
|
|
1234
1245
|
is_new_tbl_version = True
|
|
1235
1246
|
else:
|
|
1236
1247
|
existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
|
|
1237
|
-
# Validate that the existing metadata are identical to the new metadata, except
|
|
1238
|
-
#
|
|
1239
|
-
if
|
|
1248
|
+
# Validate that the existing metadata are identical to the new metadata, except is_fragment
|
|
1249
|
+
# and additional_md which may differ.
|
|
1250
|
+
if (
|
|
1251
|
+
dataclasses.replace(
|
|
1252
|
+
existing_version_md,
|
|
1253
|
+
is_fragment=md.version_md.is_fragment,
|
|
1254
|
+
additional_md=md.version_md.additional_md,
|
|
1255
|
+
)
|
|
1256
|
+
!= md.version_md
|
|
1257
|
+
):
|
|
1240
1258
|
raise excs.Error(
|
|
1241
1259
|
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
1242
1260
|
'the metadata recorded from a prior replica.\n'
|
|
@@ -1716,35 +1734,42 @@ class Catalog:
|
|
|
1716
1734
|
tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
|
|
1717
1735
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1718
1736
|
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1737
|
+
tvp = self.construct_tvp(tbl_id, version, tbl_md.ancestor_ids, version_md.created_at)
|
|
1719
1738
|
|
|
1720
|
-
|
|
1739
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1740
|
+
self._tbls[tbl_id, version] = view
|
|
1741
|
+
return view
|
|
1742
|
+
|
|
1743
|
+
def construct_tvp(self, tbl_id: UUID, version: int, ancestor_ids: list[str], created_at: float) -> TableVersionPath:
|
|
1744
|
+
# Construct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
|
|
1721
1745
|
# timestamps of this table and all its ancestors.
|
|
1722
1746
|
# TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
|
|
1723
1747
|
# (which might be nondeterministic in the future).
|
|
1724
1748
|
|
|
1749
|
+
assert Env.get().conn is not None
|
|
1750
|
+
|
|
1725
1751
|
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1726
1752
|
# For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
|
|
1727
1753
|
# given TableVersion's created_at timestamp.
|
|
1728
|
-
ancestors: list[tuple[UUID, int
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
if
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1754
|
+
ancestors: list[tuple[UUID, int]] = [(tbl_id, version)]
|
|
1755
|
+
for ancestor_id in ancestor_ids:
|
|
1756
|
+
q = (
|
|
1757
|
+
sql.select(schema.TableVersion)
|
|
1758
|
+
.where(schema.TableVersion.tbl_id == ancestor_id)
|
|
1759
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= created_at)
|
|
1760
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1761
|
+
.limit(1)
|
|
1762
|
+
)
|
|
1763
|
+
row = Env.get().conn.execute(q).one_or_none()
|
|
1764
|
+
if row is None:
|
|
1765
|
+
# This can happen if an ancestor version is garbage collected; it can also happen in
|
|
1766
|
+
# rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
|
|
1767
|
+
_logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
|
|
1768
|
+
raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
|
|
1769
|
+
ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
|
|
1770
|
+
ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
|
|
1771
|
+
assert ancestor_version_md.created_at <= created_at
|
|
1772
|
+
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1748
1773
|
|
|
1749
1774
|
# Force any ancestors to be loaded (base table first).
|
|
1750
1775
|
for anc_id, anc_version in ancestors[::-1]:
|
|
@@ -1756,15 +1781,13 @@ class Catalog:
|
|
|
1756
1781
|
for anc_id, anc_version in ancestors[::-1]:
|
|
1757
1782
|
tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
|
|
1758
1783
|
|
|
1759
|
-
|
|
1760
|
-
self._tbls[tbl_id, version] = view
|
|
1761
|
-
return view
|
|
1784
|
+
return tvp
|
|
1762
1785
|
|
|
1763
1786
|
@retry_loop(for_write=False)
|
|
1764
|
-
def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[
|
|
1787
|
+
def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
|
|
1765
1788
|
return self._collect_tbl_history(tbl_id, n)
|
|
1766
1789
|
|
|
1767
|
-
def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[
|
|
1790
|
+
def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
|
|
1768
1791
|
"""
|
|
1769
1792
|
Returns the history of up to n versions of the table with the given UUID.
|
|
1770
1793
|
|
|
@@ -1792,15 +1815,15 @@ class Catalog:
|
|
|
1792
1815
|
q = q.limit(n)
|
|
1793
1816
|
src_rows = Env.get().session.execute(q).fetchall()
|
|
1794
1817
|
return [
|
|
1795
|
-
|
|
1796
|
-
schema.md_from_dict(schema.TableMd, row.Table.md),
|
|
1797
|
-
schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1798
|
-
schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1818
|
+
TableVersionCompleteMd(
|
|
1819
|
+
tbl_md=schema.md_from_dict(schema.TableMd, row.Table.md),
|
|
1820
|
+
version_md=schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1821
|
+
schema_version_md=schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1799
1822
|
)
|
|
1800
1823
|
for row in src_rows
|
|
1801
1824
|
]
|
|
1802
1825
|
|
|
1803
|
-
def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) ->
|
|
1826
|
+
def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> TableVersionCompleteMd:
|
|
1804
1827
|
"""
|
|
1805
1828
|
Loads metadata from the store for a given table UUID and version.
|
|
1806
1829
|
"""
|
|
@@ -1851,7 +1874,7 @@ class Catalog:
|
|
|
1851
1874
|
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1852
1875
|
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
1853
1876
|
|
|
1854
|
-
return
|
|
1877
|
+
return TableVersionCompleteMd(tbl_md, version_md, schema_version_md)
|
|
1855
1878
|
|
|
1856
1879
|
def store_tbl_md(
|
|
1857
1880
|
self,
|
|
@@ -1925,8 +1948,12 @@ class Catalog:
|
|
|
1925
1948
|
# This table version already exists; update it.
|
|
1926
1949
|
assert len(tv_rows) == 1 # must be unique
|
|
1927
1950
|
tv = tv_rows[0]
|
|
1928
|
-
# Validate that the only
|
|
1929
|
-
assert tv.md == dataclasses.asdict(
|
|
1951
|
+
# Validate that the only fields that can change are 'is_fragment' and 'additional_md'.
|
|
1952
|
+
assert tv.md == dataclasses.asdict(
|
|
1953
|
+
dataclasses.replace(
|
|
1954
|
+
version_md, is_fragment=tv.md['is_fragment'], additional_md=tv.md['additional_md']
|
|
1955
|
+
)
|
|
1956
|
+
)
|
|
1930
1957
|
result = session.execute(
|
|
1931
1958
|
sql.update(schema.TableVersion.__table__)
|
|
1932
1959
|
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
@@ -1977,7 +2004,7 @@ class Catalog:
|
|
|
1977
2004
|
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
1978
2005
|
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
1979
2006
|
|
|
1980
|
-
def load_replica_md(self, tbl: Table) -> list[
|
|
2007
|
+
def load_replica_md(self, tbl: Table) -> list[TableVersionCompleteMd]:
|
|
1981
2008
|
"""
|
|
1982
2009
|
Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
|
|
1983
2010
|
TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
|
|
@@ -1998,7 +2025,7 @@ class Catalog:
|
|
|
1998
2025
|
# Set the `is_replica` flag on every ancestor's TableMd.
|
|
1999
2026
|
ancestor_md.tbl_md.is_replica = True
|
|
2000
2027
|
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
2001
|
-
# match the corresponding values in
|
|
2028
|
+
# match the corresponding values in TableVersionCompleteMd and TableSchemaVersionMd. This is to ensure that,
|
|
2002
2029
|
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
2003
2030
|
# current_version and current_schema_version will always point to versions that are known to the
|
|
2004
2031
|
# destination catalog.
|
|
@@ -2017,7 +2044,10 @@ class Catalog:
|
|
|
2017
2044
|
self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
|
|
2018
2045
|
) -> TableVersion | None:
|
|
2019
2046
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
2020
|
-
|
|
2047
|
+
table_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
2048
|
+
tbl_md = table_version_md.tbl_md
|
|
2049
|
+
version_md = table_version_md.version_md
|
|
2050
|
+
schema_version_md = table_version_md.schema_version_md
|
|
2021
2051
|
view_md = tbl_md.view_md
|
|
2022
2052
|
|
|
2023
2053
|
conn = Env.get().conn
|
|
@@ -2041,10 +2071,6 @@ class Catalog:
|
|
|
2041
2071
|
|
|
2042
2072
|
# load mutable view ids for mutable TableVersions
|
|
2043
2073
|
mutable_view_ids: list[UUID] = []
|
|
2044
|
-
# If this is a replica, effective_version should not be None. We see this today, because
|
|
2045
|
-
# the replica's TV instance's Column instances contain value_expr_dicts that reference the live version.
|
|
2046
|
-
# This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
|
|
2047
|
-
# TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
|
|
2048
2074
|
if effective_version is None and not tbl_md.is_replica:
|
|
2049
2075
|
q = (
|
|
2050
2076
|
sql.select(schema.Table.id)
|
|
@@ -2061,7 +2087,7 @@ class Catalog:
|
|
|
2061
2087
|
tbl_version = TableVersion(tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views)
|
|
2062
2088
|
else:
|
|
2063
2089
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
2064
|
-
# TODO: add
|
|
2090
|
+
# TODO: add TableVersionCompleteMd.is_pure_snapshot() and use that
|
|
2065
2091
|
pure_snapshot = (
|
|
2066
2092
|
view_md.is_snapshot
|
|
2067
2093
|
and view_md.predicate is None
|
pixeltable/catalog/column.py
CHANGED
|
@@ -18,6 +18,7 @@ from .globals import MediaValidation, QColumnId, is_valid_identifier
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from .table_version import TableVersion
|
|
20
20
|
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
21
|
+
from .table_version_path import TableVersionPath
|
|
21
22
|
|
|
22
23
|
_logger = logging.getLogger('pixeltable')
|
|
23
24
|
|
|
@@ -162,26 +163,38 @@ class Column:
|
|
|
162
163
|
)
|
|
163
164
|
return col_md, sch_md
|
|
164
165
|
|
|
165
|
-
def init_value_expr(self) -> None:
|
|
166
|
+
def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
|
|
167
|
+
"""
|
|
168
|
+
Initialize the value_expr from its dict representation, if necessary.
|
|
169
|
+
|
|
170
|
+
If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
|
|
171
|
+
"""
|
|
166
172
|
from pixeltable import exprs
|
|
167
173
|
|
|
168
|
-
if self._value_expr is
|
|
174
|
+
if self._value_expr is None and self.value_expr_dict is None:
|
|
169
175
|
return
|
|
170
|
-
|
|
171
|
-
self._value_expr
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
176
|
+
|
|
177
|
+
if self._value_expr is None:
|
|
178
|
+
# Instantiate the Expr from its dict
|
|
179
|
+
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
180
|
+
self._value_expr.bind_rel_paths()
|
|
181
|
+
if not self._value_expr.is_valid:
|
|
182
|
+
message = (
|
|
183
|
+
dedent(
|
|
184
|
+
f"""
|
|
185
|
+
The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
|
|
186
|
+
{{validation_error}}
|
|
187
|
+
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
188
|
+
""" # noqa: E501
|
|
189
|
+
)
|
|
190
|
+
.strip()
|
|
191
|
+
.format(validation_error=self._value_expr.validation_error)
|
|
180
192
|
)
|
|
181
|
-
.
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
193
|
+
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
194
|
+
|
|
195
|
+
if tvp is not None:
|
|
196
|
+
# Retarget the Expr
|
|
197
|
+
self._value_expr = self._value_expr.retarget(tvp)
|
|
185
198
|
|
|
186
199
|
def get_tbl(self) -> TableVersion:
|
|
187
200
|
tv = self.tbl_handle.get()
|
|
@@ -16,7 +16,7 @@ from pixeltable.utils.pydantic import is_json_convertible
|
|
|
16
16
|
|
|
17
17
|
from .globals import MediaValidation
|
|
18
18
|
from .table import Table
|
|
19
|
-
from .table_version import TableVersion,
|
|
19
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
20
20
|
from .table_version_handle import TableVersionHandle
|
|
21
21
|
from .table_version_path import TableVersionPath
|
|
22
22
|
from .tbl_ops import CreateStoreTableOp, TableOp
|
|
@@ -73,7 +73,7 @@ class InsertableTable(Table):
|
|
|
73
73
|
comment: str,
|
|
74
74
|
media_validation: MediaValidation,
|
|
75
75
|
create_default_idxs: bool,
|
|
76
|
-
) -> tuple[
|
|
76
|
+
) -> tuple[TableVersionCompleteMd, list[TableOp]]:
|
|
77
77
|
columns = cls._create_columns(schema)
|
|
78
78
|
cls._verify_schema(columns)
|
|
79
79
|
column_names = [col.name for col in columns]
|
pixeltable/catalog/path.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from typing import NamedTuple
|
|
4
5
|
|
|
5
6
|
from pixeltable import exceptions as excs
|
|
6
7
|
|
|
@@ -9,14 +10,9 @@ from .globals import is_valid_identifier
|
|
|
9
10
|
_logger = logging.getLogger('pixeltable')
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
class Path:
|
|
13
|
+
class Path(NamedTuple):
|
|
13
14
|
components: list[str]
|
|
14
|
-
version: int | None
|
|
15
|
-
|
|
16
|
-
def __init__(self, components: list[str], version: int | None = None) -> None:
|
|
17
|
-
assert len(components) > 0
|
|
18
|
-
self.components = components
|
|
19
|
-
self.version = version
|
|
15
|
+
version: int | None = None
|
|
20
16
|
|
|
21
17
|
@classmethod
|
|
22
18
|
def parse(
|
|
@@ -50,6 +46,7 @@ class Path:
|
|
|
50
46
|
if version is not None and not allow_versioned_path:
|
|
51
47
|
raise excs.Error(f'Versioned path not allowed here: {path}')
|
|
52
48
|
|
|
49
|
+
assert len(components) > 0
|
|
53
50
|
return Path(components, version)
|
|
54
51
|
|
|
55
52
|
@property
|
|
@@ -118,8 +115,5 @@ class Path:
|
|
|
118
115
|
def __hash__(self) -> int:
|
|
119
116
|
return hash(str(self))
|
|
120
117
|
|
|
121
|
-
def __lt__(self, other: Path) -> bool:
|
|
122
|
-
return str(self) < str(other)
|
|
123
|
-
|
|
124
118
|
|
|
125
119
|
ROOT_PATH = Path([''])
|
pixeltable/catalog/table.py
CHANGED
|
@@ -1624,6 +1624,57 @@ class Table(SchemaObject):
|
|
|
1624
1624
|
# remove cached md in order to force a reload on the next operation
|
|
1625
1625
|
self._tbl_version_path.clear_cached_md()
|
|
1626
1626
|
|
|
1627
|
+
def push(self, *, version: int | None = None) -> None:
|
|
1628
|
+
from pixeltable.share import push_replica
|
|
1629
|
+
from pixeltable.share.protocol import PxtUri
|
|
1630
|
+
|
|
1631
|
+
tbl_version = self._tbl_version.get()
|
|
1632
|
+
pxt_uri = tbl_version.pxt_uri
|
|
1633
|
+
|
|
1634
|
+
if tbl_version.is_replica:
|
|
1635
|
+
raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
|
|
1636
|
+
if pxt_uri is None:
|
|
1637
|
+
raise excs.Error(
|
|
1638
|
+
f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
|
|
1639
|
+
'To publish it, use `pxt.publish()` instead.'
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
|
|
1643
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1644
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
|
|
1645
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1646
|
+
|
|
1647
|
+
if version is None:
|
|
1648
|
+
# Push this version
|
|
1649
|
+
push_replica(uuid_uri, self)
|
|
1650
|
+
else:
|
|
1651
|
+
versioned_path = catalog.Path.parse(self._path())._replace(version=version)
|
|
1652
|
+
versioned_tbl = catalog.Catalog.get().get_table(versioned_path, IfNotExistsParam.IGNORE)
|
|
1653
|
+
if versioned_tbl is None:
|
|
1654
|
+
raise excs.Error(f'Table {self._name!r} has no known version {version}')
|
|
1655
|
+
assert versioned_tbl._id == self._id
|
|
1656
|
+
push_replica(uuid_uri, versioned_tbl)
|
|
1657
|
+
|
|
1658
|
+
def pull(self, *, version: int | None = None) -> None:
|
|
1659
|
+
from pixeltable.share import pull_replica
|
|
1660
|
+
from pixeltable.share.protocol import PxtUri
|
|
1661
|
+
|
|
1662
|
+
tbl_version = self._tbl_version_path.tbl_version.get()
|
|
1663
|
+
pxt_uri = tbl_version.pxt_uri
|
|
1664
|
+
|
|
1665
|
+
if not tbl_version.is_replica:
|
|
1666
|
+
raise excs.Error(
|
|
1667
|
+
f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
|
|
1668
|
+
)
|
|
1669
|
+
assert pxt_uri is not None
|
|
1670
|
+
|
|
1671
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
|
|
1672
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1673
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db, version=version)
|
|
1674
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1675
|
+
|
|
1676
|
+
pull_replica(self._path(), uuid_uri)
|
|
1677
|
+
|
|
1627
1678
|
def external_stores(self) -> list[str]:
|
|
1628
1679
|
return list(self._tbl_version.get().external_stores.keys())
|
|
1629
1680
|
|
|
@@ -43,17 +43,25 @@ _logger = logging.getLogger('pixeltable')
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
@dataclasses.dataclass(frozen=True)
|
|
46
|
-
class
|
|
46
|
+
class TableVersionCompleteMd:
|
|
47
47
|
"""
|
|
48
48
|
Complete set of md records for a specific TableVersion instance.
|
|
49
|
-
|
|
50
|
-
TODO: subsume schema.FullTableMd
|
|
51
49
|
"""
|
|
52
50
|
|
|
53
51
|
tbl_md: schema.TableMd
|
|
54
52
|
version_md: schema.TableVersionMd
|
|
55
53
|
schema_version_md: schema.TableSchemaVersionMd
|
|
56
54
|
|
|
55
|
+
@property
|
|
56
|
+
def is_pure_snapshot(self) -> bool:
|
|
57
|
+
return (
|
|
58
|
+
self.tbl_md is not None
|
|
59
|
+
and self.tbl_md.view_md is not None
|
|
60
|
+
and self.tbl_md.view_md.is_snapshot
|
|
61
|
+
and self.tbl_md.view_md.predicate is None
|
|
62
|
+
and len(self.schema_version_md.columns) == 0
|
|
63
|
+
)
|
|
64
|
+
|
|
57
65
|
|
|
58
66
|
class TableVersion:
|
|
59
67
|
"""
|
|
@@ -240,7 +248,7 @@ class TableVersion:
|
|
|
240
248
|
media_validation: MediaValidation,
|
|
241
249
|
create_default_idxs: bool,
|
|
242
250
|
view_md: schema.ViewMd | None = None,
|
|
243
|
-
) ->
|
|
251
|
+
) -> TableVersionCompleteMd:
|
|
244
252
|
from .table_version_handle import TableVersionHandle
|
|
245
253
|
|
|
246
254
|
user = Env.get().user
|
|
@@ -337,7 +345,7 @@ class TableVersion:
|
|
|
337
345
|
media_validation=media_validation.name.lower(),
|
|
338
346
|
additional_md={},
|
|
339
347
|
)
|
|
340
|
-
return
|
|
348
|
+
return TableVersionCompleteMd(tbl_md, table_version_md, schema_version_md)
|
|
341
349
|
|
|
342
350
|
def exec_op(self, op: TableOp) -> None:
|
|
343
351
|
if op.create_store_table_op is not None:
|
|
@@ -365,7 +373,7 @@ class TableVersion:
|
|
|
365
373
|
_logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
|
|
366
374
|
|
|
367
375
|
@classmethod
|
|
368
|
-
def create_replica(cls, md:
|
|
376
|
+
def create_replica(cls, md: TableVersionCompleteMd) -> TableVersion:
|
|
369
377
|
from .catalog import Catalog, TableVersionPath
|
|
370
378
|
|
|
371
379
|
assert Env.get().in_xact
|
|
@@ -437,6 +445,8 @@ class TableVersion:
|
|
|
437
445
|
def _init_schema(self) -> None:
|
|
438
446
|
from pixeltable.store import StoreComponentView, StoreTable, StoreView
|
|
439
447
|
|
|
448
|
+
from .catalog import Catalog
|
|
449
|
+
|
|
440
450
|
# initialize IndexBase instances and collect sa_col_types
|
|
441
451
|
idxs: dict[int, index.IndexBase] = {}
|
|
442
452
|
val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
|
|
@@ -529,8 +539,16 @@ class TableVersion:
|
|
|
529
539
|
self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
|
|
530
540
|
|
|
531
541
|
# create value exprs, now that we have all lookup structures in place
|
|
542
|
+
tvp: TableVersionPath | None = None
|
|
543
|
+
if self.effective_version is not None:
|
|
544
|
+
# for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
|
|
545
|
+
# otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
|
|
546
|
+
# use for retargeting.
|
|
547
|
+
tvp = Catalog.get().construct_tvp(
|
|
548
|
+
self.id, self.effective_version, self.tbl_md.ancestor_ids, self.version_md.created_at
|
|
549
|
+
)
|
|
532
550
|
for col in self.cols_by_id.values():
|
|
533
|
-
col.init_value_expr()
|
|
551
|
+
col.init_value_expr(tvp)
|
|
534
552
|
|
|
535
553
|
# create the sqlalchemy schema, after instantiating all Columns
|
|
536
554
|
if self.is_component_view:
|
|
@@ -1413,6 +1431,21 @@ class TableVersion:
|
|
|
1413
1431
|
self._tbl_md.external_stores.pop(idx)
|
|
1414
1432
|
self._write_md(new_version=True, new_schema_version=True)
|
|
1415
1433
|
|
|
1434
|
+
@property
|
|
1435
|
+
def pxt_uri(self) -> str | None:
|
|
1436
|
+
return self._tbl_md.additional_md.get('pxt_uri')
|
|
1437
|
+
|
|
1438
|
+
def update_pxt_uri(self, pxt_uri: str | None) -> None:
|
|
1439
|
+
if self._tbl_md.additional_md.get('pxt_uri') == pxt_uri:
|
|
1440
|
+
return # Nothing to do
|
|
1441
|
+
|
|
1442
|
+
if pxt_uri is None:
|
|
1443
|
+
del self._tbl_md.additional_md['pxt_uri'] # must be present due to preceding check
|
|
1444
|
+
else:
|
|
1445
|
+
self._tbl_md.additional_md['pxt_uri'] = pxt_uri
|
|
1446
|
+
|
|
1447
|
+
self._write_md(new_version=False, new_schema_version=False)
|
|
1448
|
+
|
|
1416
1449
|
@property
|
|
1417
1450
|
def tbl_md(self) -> schema.TableMd:
|
|
1418
1451
|
return self._tbl_md
|
pixeltable/catalog/view.py
CHANGED
|
@@ -14,7 +14,7 @@ from pixeltable.iterators import ComponentIterator
|
|
|
14
14
|
from .column import Column
|
|
15
15
|
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
16
16
|
from .table import Table
|
|
17
|
-
from .table_version import TableVersion,
|
|
17
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
18
18
|
from .table_version_handle import TableVersionHandle
|
|
19
19
|
from .table_version_path import TableVersionPath
|
|
20
20
|
from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
|
|
@@ -84,7 +84,7 @@ class View(Table):
|
|
|
84
84
|
media_validation: MediaValidation,
|
|
85
85
|
iterator_cls: type[ComponentIterator] | None,
|
|
86
86
|
iterator_args: dict | None,
|
|
87
|
-
) -> tuple[
|
|
87
|
+
) -> tuple[TableVersionCompleteMd, list[TableOp] | None]:
|
|
88
88
|
from pixeltable.plan import SampleClause
|
|
89
89
|
|
|
90
90
|
# Convert select_list to more additional_columns if present
|
pixeltable/config.py
CHANGED
|
@@ -176,6 +176,7 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
176
176
|
'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
|
|
177
177
|
'hf': {'auth_token': 'Hugging Face access token'},
|
|
178
178
|
'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
|
|
179
|
+
'reve': {'api_key': 'Reve API key', 'rate_limit': 'Rate limit for Reve API requests (requests per minute)'},
|
|
179
180
|
'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
|
|
180
181
|
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
181
182
|
'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
|
pixeltable/env.py
CHANGED
|
@@ -792,6 +792,7 @@ class Env:
|
|
|
792
792
|
self.__register_package('pyarrow')
|
|
793
793
|
self.__register_package('pydantic')
|
|
794
794
|
self.__register_package('replicate')
|
|
795
|
+
self.__register_package('reve')
|
|
795
796
|
self.__register_package('sentencepiece')
|
|
796
797
|
self.__register_package('sentence_transformers', library_name='sentence-transformers')
|
|
797
798
|
self.__register_package('soundfile')
|
|
@@ -806,6 +807,7 @@ class Env:
|
|
|
806
807
|
self.__register_package('whisperx')
|
|
807
808
|
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
808
809
|
self.__register_package('lancedb')
|
|
810
|
+
self.__register_package('scenedetect')
|
|
809
811
|
|
|
810
812
|
def __register_package(self, package_name: str, library_name: str | None = None) -> None:
|
|
811
813
|
is_installed: bool
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -221,7 +221,8 @@ class ColumnRef(Expr):
|
|
|
221
221
|
return self._descriptors().to_html()
|
|
222
222
|
|
|
223
223
|
def _descriptors(self) -> DescriptionHelper:
|
|
224
|
-
|
|
224
|
+
with catalog.Catalog.get().begin_xact():
|
|
225
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
225
226
|
helper = DescriptionHelper()
|
|
226
227
|
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
227
228
|
helper.append(tbl._col_descriptor([self.col.name]))
|
pixeltable/functions/__init__.py
CHANGED
pixeltable/functions/image.py
CHANGED
|
@@ -10,14 +10,13 @@ t.select(t.img_col.convert('L')).collect()
|
|
|
10
10
|
```
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
import base64
|
|
14
|
-
|
|
15
13
|
import PIL.Image
|
|
16
14
|
|
|
17
15
|
import pixeltable as pxt
|
|
18
16
|
import pixeltable.type_system as ts
|
|
19
17
|
from pixeltable.exprs import Expr
|
|
20
18
|
from pixeltable.utils.code import local_public_names
|
|
19
|
+
from pixeltable.utils.image import to_base64
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@pxt.udf(is_method=True)
|
|
@@ -29,12 +28,7 @@ def b64_encode(img: PIL.Image.Image, image_format: str = 'png') -> str:
|
|
|
29
28
|
img: image
|
|
30
29
|
image_format: image format [supported by PIL](https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#fully-supported-formats)
|
|
31
30
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
bytes_arr = io.BytesIO()
|
|
35
|
-
img.save(bytes_arr, format=image_format)
|
|
36
|
-
b64_bytes = base64.b64encode(bytes_arr.getvalue())
|
|
37
|
-
return b64_bytes.decode('utf-8')
|
|
31
|
+
return to_base64(img, format=image_format)
|
|
38
32
|
|
|
39
33
|
|
|
40
34
|
@pxt.udf(substitute_fn=PIL.Image.alpha_composite, is_method=True)
|