pixeltable 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (36) hide show
  1. pixeltable/_version.py +1 -1
  2. pixeltable/catalog/catalog.py +76 -50
  3. pixeltable/catalog/column.py +29 -16
  4. pixeltable/catalog/insertable_table.py +2 -2
  5. pixeltable/catalog/path.py +4 -10
  6. pixeltable/catalog/table.py +51 -0
  7. pixeltable/catalog/table_version.py +40 -7
  8. pixeltable/catalog/view.py +2 -2
  9. pixeltable/config.py +1 -0
  10. pixeltable/env.py +2 -0
  11. pixeltable/exprs/column_ref.py +2 -1
  12. pixeltable/functions/__init__.py +1 -0
  13. pixeltable/functions/image.py +2 -8
  14. pixeltable/functions/reve.py +250 -0
  15. pixeltable/functions/video.py +534 -1
  16. pixeltable/globals.py +2 -1
  17. pixeltable/index/base.py +5 -18
  18. pixeltable/index/btree.py +6 -2
  19. pixeltable/index/embedding_index.py +4 -4
  20. pixeltable/metadata/schema.py +7 -32
  21. pixeltable/share/__init__.py +1 -1
  22. pixeltable/share/packager.py +22 -18
  23. pixeltable/share/protocol/__init__.py +34 -0
  24. pixeltable/share/protocol/common.py +170 -0
  25. pixeltable/share/protocol/operation_types.py +33 -0
  26. pixeltable/share/protocol/replica.py +109 -0
  27. pixeltable/share/publish.py +91 -56
  28. pixeltable/store.py +11 -15
  29. pixeltable/utils/av.py +87 -1
  30. pixeltable/utils/dbms.py +15 -11
  31. pixeltable/utils/image.py +10 -0
  32. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/METADATA +2 -1
  33. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/RECORD +36 -31
  34. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/WHEEL +0 -0
  35. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/entry_points.txt +0 -0
  36. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/licenses/LICENSE +0 -0
pixeltable/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = '0.4.19'
1
+ __version__: str = '0.4.21'
@@ -27,7 +27,7 @@ from .insertable_table import InsertableTable
27
27
  from .path import Path
28
28
  from .schema_object import SchemaObject
29
29
  from .table import Table
30
- from .table_version import TableVersion
30
+ from .table_version import TableVersion, TableVersionCompleteMd
31
31
  from .table_version_handle import TableVersionHandle
32
32
  from .table_version_path import TableVersionPath
33
33
  from .tbl_ops import TableOp
@@ -103,7 +103,16 @@ def retry_loop(
103
103
  Catalog.get()._finalize_pending_ops(e.tbl_id)
104
104
  except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
105
105
  # TODO: what other exceptions should we be looking for?
106
- if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
106
+ if isinstance(
107
+ # TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
108
+ # which is supposed to be deadlock-free.
109
+ e.orig,
110
+ (
111
+ psycopg.errors.SerializationFailure,
112
+ psycopg.errors.LockNotAvailable,
113
+ psycopg.errors.DeadlockDetected,
114
+ ),
115
+ ):
107
116
  if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
108
117
  num_retries += 1
109
118
  _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
@@ -452,12 +461,15 @@ class Catalog:
452
461
  _logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
453
462
  raise excs.Error(f'Table was dropped: {tbl_name}') from None
454
463
  elif (
464
+ # TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
465
+ # which is supposed to be deadlock-free.
455
466
  isinstance(
456
467
  e.orig,
457
468
  (
458
469
  psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
459
470
  psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
460
471
  psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
472
+ psycopg.errors.DeadlockDetected, # locking protocol contention
461
473
  ),
462
474
  )
463
475
  and convert_db_excs
@@ -1102,7 +1114,7 @@ class Catalog:
1102
1114
  tv.is_validated = False
1103
1115
  del self._tbl_versions[tbl_id, effective_version]
1104
1116
 
1105
- def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
1117
+ def create_replica(self, path: Path, md: list[TableVersionCompleteMd]) -> None:
1106
1118
  """
1107
1119
  Creates table, table_version, and table_schema_version records for a replica with the given metadata.
1108
1120
  The metadata should be presented in standard "ancestor order", with the table being replicated at
@@ -1184,7 +1196,7 @@ class Catalog:
1184
1196
  system_path = Path.parse('_system', allow_system_path=True)
1185
1197
  return self._create_dir(system_path, if_exists=IfExistsParam.IGNORE, parents=False)
1186
1198
 
1187
- def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
1199
+ def __store_replica_md(self, path: Path, md: TableVersionCompleteMd) -> None:
1188
1200
  _logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
1189
1201
  dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
1190
1202
  assert dir is not None
@@ -1220,7 +1232,6 @@ class Catalog:
1220
1232
  # New metadata is more recent than the metadata currently stored in the DB; we'll update the record
1221
1233
  # in place in the DB.
1222
1234
  new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
1223
-
1224
1235
  # Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
1225
1236
  # it already exists, check that the existing record is identical to the new one.
1226
1237
  q = (
@@ -1234,9 +1245,16 @@ class Catalog:
1234
1245
  is_new_tbl_version = True
1235
1246
  else:
1236
1247
  existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
1237
- # Validate that the existing metadata are identical to the new metadata, except that their is_fragment
1238
- # flags may differ.
1239
- if dataclasses.replace(existing_version_md, is_fragment=md.version_md.is_fragment) != md.version_md:
1248
+ # Validate that the existing metadata are identical to the new metadata, except is_fragment
1249
+ # and additional_md which may differ.
1250
+ if (
1251
+ dataclasses.replace(
1252
+ existing_version_md,
1253
+ is_fragment=md.version_md.is_fragment,
1254
+ additional_md=md.version_md.additional_md,
1255
+ )
1256
+ != md.version_md
1257
+ ):
1240
1258
  raise excs.Error(
1241
1259
  f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
1242
1260
  'the metadata recorded from a prior replica.\n'
@@ -1716,35 +1734,42 @@ class Catalog:
1716
1734
  tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
1717
1735
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1718
1736
  version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
1737
+ tvp = self.construct_tvp(tbl_id, version, tbl_md.ancestor_ids, version_md.created_at)
1719
1738
 
1720
- # Reconstruct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
1739
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
1740
+ self._tbls[tbl_id, version] = view
1741
+ return view
1742
+
1743
+ def construct_tvp(self, tbl_id: UUID, version: int, ancestor_ids: list[str], created_at: float) -> TableVersionPath:
1744
+ # Construct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
1721
1745
  # timestamps of this table and all its ancestors.
1722
1746
  # TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
1723
1747
  # (which might be nondeterministic in the future).
1724
1748
 
1749
+ assert Env.get().conn is not None
1750
+
1725
1751
  # Build the list of ancestor versions, starting with the given table and traversing back to the base table.
1726
1752
  # For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
1727
1753
  # given TableVersion's created_at timestamp.
1728
- ancestors: list[tuple[UUID, int | None]] = [(tbl_id, version)]
1729
- if tbl_md.view_md is not None:
1730
- for ancestor_id, _ in tbl_md.view_md.base_versions:
1731
- q = (
1732
- sql.select(schema.TableVersion)
1733
- .where(schema.TableVersion.tbl_id == ancestor_id)
1734
- .where(schema.TableVersion.md['created_at'].cast(sql.Float) <= version_md.created_at)
1735
- .order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
1736
- .limit(1)
1737
- )
1738
- row = conn.execute(q).one_or_none()
1739
- if row is None:
1740
- # This can happen if an ancestor version is garbage collected; it can also happen in
1741
- # rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
1742
- _logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
1743
- raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
1744
- ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
1745
- ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
1746
- assert ancestor_version_md.created_at <= version_md.created_at
1747
- ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
1754
+ ancestors: list[tuple[UUID, int]] = [(tbl_id, version)]
1755
+ for ancestor_id in ancestor_ids:
1756
+ q = (
1757
+ sql.select(schema.TableVersion)
1758
+ .where(schema.TableVersion.tbl_id == ancestor_id)
1759
+ .where(schema.TableVersion.md['created_at'].cast(sql.Float) <= created_at)
1760
+ .order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
1761
+ .limit(1)
1762
+ )
1763
+ row = Env.get().conn.execute(q).one_or_none()
1764
+ if row is None:
1765
+ # This can happen if an ancestor version is garbage collected; it can also happen in
1766
+ # rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
1767
+ _logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
1768
+ raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
1769
+ ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
1770
+ ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
1771
+ assert ancestor_version_md.created_at <= created_at
1772
+ ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
1748
1773
 
1749
1774
  # Force any ancestors to be loaded (base table first).
1750
1775
  for anc_id, anc_version in ancestors[::-1]:
@@ -1756,15 +1781,13 @@ class Catalog:
1756
1781
  for anc_id, anc_version in ancestors[::-1]:
1757
1782
  tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
1758
1783
 
1759
- view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
1760
- self._tbls[tbl_id, version] = view
1761
- return view
1784
+ return tvp
1762
1785
 
1763
1786
  @retry_loop(for_write=False)
1764
- def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1787
+ def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
1765
1788
  return self._collect_tbl_history(tbl_id, n)
1766
1789
 
1767
- def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1790
+ def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
1768
1791
  """
1769
1792
  Returns the history of up to n versions of the table with the given UUID.
1770
1793
 
@@ -1792,15 +1815,15 @@ class Catalog:
1792
1815
  q = q.limit(n)
1793
1816
  src_rows = Env.get().session.execute(q).fetchall()
1794
1817
  return [
1795
- schema.FullTableMd(
1796
- schema.md_from_dict(schema.TableMd, row.Table.md),
1797
- schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
1798
- schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
1818
+ TableVersionCompleteMd(
1819
+ tbl_md=schema.md_from_dict(schema.TableMd, row.Table.md),
1820
+ version_md=schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
1821
+ schema_version_md=schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
1799
1822
  )
1800
1823
  for row in src_rows
1801
1824
  ]
1802
1825
 
1803
- def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> schema.FullTableMd:
1826
+ def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> TableVersionCompleteMd:
1804
1827
  """
1805
1828
  Loads metadata from the store for a given table UUID and version.
1806
1829
  """
@@ -1851,7 +1874,7 @@ class Catalog:
1851
1874
  version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
1852
1875
  schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
1853
1876
 
1854
- return schema.FullTableMd(tbl_md, version_md, schema_version_md)
1877
+ return TableVersionCompleteMd(tbl_md, version_md, schema_version_md)
1855
1878
 
1856
1879
  def store_tbl_md(
1857
1880
  self,
@@ -1925,8 +1948,12 @@ class Catalog:
1925
1948
  # This table version already exists; update it.
1926
1949
  assert len(tv_rows) == 1 # must be unique
1927
1950
  tv = tv_rows[0]
1928
- # Validate that the only field that can change is 'is_fragment'.
1929
- assert tv.md == dataclasses.asdict(dataclasses.replace(version_md, is_fragment=tv.md['is_fragment']))
1951
+ # Validate that the only fields that can change are 'is_fragment' and 'additional_md'.
1952
+ assert tv.md == dataclasses.asdict(
1953
+ dataclasses.replace(
1954
+ version_md, is_fragment=tv.md['is_fragment'], additional_md=tv.md['additional_md']
1955
+ )
1956
+ )
1930
1957
  result = session.execute(
1931
1958
  sql.update(schema.TableVersion.__table__)
1932
1959
  .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
@@ -1977,7 +2004,7 @@ class Catalog:
1977
2004
  conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
1978
2005
  conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
1979
2006
 
1980
- def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
2007
+ def load_replica_md(self, tbl: Table) -> list[TableVersionCompleteMd]:
1981
2008
  """
1982
2009
  Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
1983
2010
  TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
@@ -1998,7 +2025,7 @@ class Catalog:
1998
2025
  # Set the `is_replica` flag on every ancestor's TableMd.
1999
2026
  ancestor_md.tbl_md.is_replica = True
2000
2027
  # For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
2001
- # match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
2028
+ # match the corresponding values in TableVersionCompleteMd and TableSchemaVersionMd. This is to ensure that,
2002
2029
  # when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
2003
2030
  # current_version and current_schema_version will always point to versions that are known to the
2004
2031
  # destination catalog.
@@ -2017,7 +2044,10 @@ class Catalog:
2017
2044
  self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
2018
2045
  ) -> TableVersion | None:
2019
2046
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
2020
- tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
2047
+ table_version_md = self.load_tbl_md(tbl_id, effective_version)
2048
+ tbl_md = table_version_md.tbl_md
2049
+ version_md = table_version_md.version_md
2050
+ schema_version_md = table_version_md.schema_version_md
2021
2051
  view_md = tbl_md.view_md
2022
2052
 
2023
2053
  conn = Env.get().conn
@@ -2041,10 +2071,6 @@ class Catalog:
2041
2071
 
2042
2072
  # load mutable view ids for mutable TableVersions
2043
2073
  mutable_view_ids: list[UUID] = []
2044
- # If this is a replica, effective_version should not be None. We see this today, because
2045
- # the replica's TV instance's Column instances contain value_expr_dicts that reference the live version.
2046
- # This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
2047
- # TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
2048
2074
  if effective_version is None and not tbl_md.is_replica:
2049
2075
  q = (
2050
2076
  sql.select(schema.Table.id)
@@ -2061,7 +2087,7 @@ class Catalog:
2061
2087
  tbl_version = TableVersion(tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views)
2062
2088
  else:
2063
2089
  assert len(view_md.base_versions) > 0 # a view needs to have a base
2064
- # TODO: add TableVersionMd.is_pure_snapshot() and use that
2090
+ # TODO: add TableVersionCompleteMd.is_pure_snapshot() and use that
2065
2091
  pure_snapshot = (
2066
2092
  view_md.is_snapshot
2067
2093
  and view_md.predicate is None
@@ -18,6 +18,7 @@ from .globals import MediaValidation, QColumnId, is_valid_identifier
18
18
  if TYPE_CHECKING:
19
19
  from .table_version import TableVersion
20
20
  from .table_version_handle import ColumnHandle, TableVersionHandle
21
+ from .table_version_path import TableVersionPath
21
22
 
22
23
  _logger = logging.getLogger('pixeltable')
23
24
 
@@ -162,26 +163,38 @@ class Column:
162
163
  )
163
164
  return col_md, sch_md
164
165
 
165
- def init_value_expr(self) -> None:
166
+ def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
167
+ """
168
+ Initialize the value_expr from its dict representation, if necessary.
169
+
170
+ If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
171
+ """
166
172
  from pixeltable import exprs
167
173
 
168
- if self._value_expr is not None or self.value_expr_dict is None:
174
+ if self._value_expr is None and self.value_expr_dict is None:
169
175
  return
170
- self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
171
- self._value_expr.bind_rel_paths()
172
- if not self._value_expr.is_valid:
173
- message = (
174
- dedent(
175
- f"""
176
- The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
177
- {{validation_error}}
178
- You can continue to query existing data from this column, but evaluating it on new data will raise an error.
179
- """ # noqa: E501
176
+
177
+ if self._value_expr is None:
178
+ # Instantiate the Expr from its dict
179
+ self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
180
+ self._value_expr.bind_rel_paths()
181
+ if not self._value_expr.is_valid:
182
+ message = (
183
+ dedent(
184
+ f"""
185
+ The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
186
+ {{validation_error}}
187
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
188
+ """ # noqa: E501
189
+ )
190
+ .strip()
191
+ .format(validation_error=self._value_expr.validation_error)
180
192
  )
181
- .strip()
182
- .format(validation_error=self._value_expr.validation_error)
183
- )
184
- warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
193
+ warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
194
+
195
+ if tvp is not None:
196
+ # Retarget the Expr
197
+ self._value_expr = self._value_expr.retarget(tvp)
185
198
 
186
199
  def get_tbl(self) -> TableVersion:
187
200
  tv = self.tbl_handle.get()
@@ -16,7 +16,7 @@ from pixeltable.utils.pydantic import is_json_convertible
16
16
 
17
17
  from .globals import MediaValidation
18
18
  from .table import Table
19
- from .table_version import TableVersion, TableVersionMd
19
+ from .table_version import TableVersion, TableVersionCompleteMd
20
20
  from .table_version_handle import TableVersionHandle
21
21
  from .table_version_path import TableVersionPath
22
22
  from .tbl_ops import CreateStoreTableOp, TableOp
@@ -73,7 +73,7 @@ class InsertableTable(Table):
73
73
  comment: str,
74
74
  media_validation: MediaValidation,
75
75
  create_default_idxs: bool,
76
- ) -> tuple[TableVersionMd, list[TableOp]]:
76
+ ) -> tuple[TableVersionCompleteMd, list[TableOp]]:
77
77
  columns = cls._create_columns(schema)
78
78
  cls._verify_schema(columns)
79
79
  column_names = [col.name for col in columns]
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from typing import NamedTuple
4
5
 
5
6
  from pixeltable import exceptions as excs
6
7
 
@@ -9,14 +10,9 @@ from .globals import is_valid_identifier
9
10
  _logger = logging.getLogger('pixeltable')
10
11
 
11
12
 
12
- class Path:
13
+ class Path(NamedTuple):
13
14
  components: list[str]
14
- version: int | None
15
-
16
- def __init__(self, components: list[str], version: int | None = None) -> None:
17
- assert len(components) > 0
18
- self.components = components
19
- self.version = version
15
+ version: int | None = None
20
16
 
21
17
  @classmethod
22
18
  def parse(
@@ -50,6 +46,7 @@ class Path:
50
46
  if version is not None and not allow_versioned_path:
51
47
  raise excs.Error(f'Versioned path not allowed here: {path}')
52
48
 
49
+ assert len(components) > 0
53
50
  return Path(components, version)
54
51
 
55
52
  @property
@@ -118,8 +115,5 @@ class Path:
118
115
  def __hash__(self) -> int:
119
116
  return hash(str(self))
120
117
 
121
- def __lt__(self, other: Path) -> bool:
122
- return str(self) < str(other)
123
-
124
118
 
125
119
  ROOT_PATH = Path([''])
@@ -1624,6 +1624,57 @@ class Table(SchemaObject):
1624
1624
  # remove cached md in order to force a reload on the next operation
1625
1625
  self._tbl_version_path.clear_cached_md()
1626
1626
 
1627
+ def push(self, *, version: int | None = None) -> None:
1628
+ from pixeltable.share import push_replica
1629
+ from pixeltable.share.protocol import PxtUri
1630
+
1631
+ tbl_version = self._tbl_version.get()
1632
+ pxt_uri = tbl_version.pxt_uri
1633
+
1634
+ if tbl_version.is_replica:
1635
+ raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
1636
+ if pxt_uri is None:
1637
+ raise excs.Error(
1638
+ f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
1639
+ 'To publish it, use `pxt.publish()` instead.'
1640
+ )
1641
+
1642
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
1643
+ parsed_uri = PxtUri(uri=pxt_uri)
1644
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
1645
+ uuid_uri = str(uuid_uri_obj)
1646
+
1647
+ if version is None:
1648
+ # Push this version
1649
+ push_replica(uuid_uri, self)
1650
+ else:
1651
+ versioned_path = catalog.Path.parse(self._path())._replace(version=version)
1652
+ versioned_tbl = catalog.Catalog.get().get_table(versioned_path, IfNotExistsParam.IGNORE)
1653
+ if versioned_tbl is None:
1654
+ raise excs.Error(f'Table {self._name!r} has no known version {version}')
1655
+ assert versioned_tbl._id == self._id
1656
+ push_replica(uuid_uri, versioned_tbl)
1657
+
1658
+ def pull(self, *, version: int | None = None) -> None:
1659
+ from pixeltable.share import pull_replica
1660
+ from pixeltable.share.protocol import PxtUri
1661
+
1662
+ tbl_version = self._tbl_version_path.tbl_version.get()
1663
+ pxt_uri = tbl_version.pxt_uri
1664
+
1665
+ if not tbl_version.is_replica:
1666
+ raise excs.Error(
1667
+ f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
1668
+ )
1669
+ assert pxt_uri is not None
1670
+
1671
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
1672
+ parsed_uri = PxtUri(uri=pxt_uri)
1673
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db, version=version)
1674
+ uuid_uri = str(uuid_uri_obj)
1675
+
1676
+ pull_replica(self._path(), uuid_uri)
1677
+
1627
1678
  def external_stores(self) -> list[str]:
1628
1679
  return list(self._tbl_version.get().external_stores.keys())
1629
1680
 
@@ -43,17 +43,25 @@ _logger = logging.getLogger('pixeltable')
43
43
 
44
44
 
45
45
  @dataclasses.dataclass(frozen=True)
46
- class TableVersionMd:
46
+ class TableVersionCompleteMd:
47
47
  """
48
48
  Complete set of md records for a specific TableVersion instance.
49
-
50
- TODO: subsume schema.FullTableMd
51
49
  """
52
50
 
53
51
  tbl_md: schema.TableMd
54
52
  version_md: schema.TableVersionMd
55
53
  schema_version_md: schema.TableSchemaVersionMd
56
54
 
55
+ @property
56
+ def is_pure_snapshot(self) -> bool:
57
+ return (
58
+ self.tbl_md is not None
59
+ and self.tbl_md.view_md is not None
60
+ and self.tbl_md.view_md.is_snapshot
61
+ and self.tbl_md.view_md.predicate is None
62
+ and len(self.schema_version_md.columns) == 0
63
+ )
64
+
57
65
 
58
66
  class TableVersion:
59
67
  """
@@ -240,7 +248,7 @@ class TableVersion:
240
248
  media_validation: MediaValidation,
241
249
  create_default_idxs: bool,
242
250
  view_md: schema.ViewMd | None = None,
243
- ) -> TableVersionMd:
251
+ ) -> TableVersionCompleteMd:
244
252
  from .table_version_handle import TableVersionHandle
245
253
 
246
254
  user = Env.get().user
@@ -337,7 +345,7 @@ class TableVersion:
337
345
  media_validation=media_validation.name.lower(),
338
346
  additional_md={},
339
347
  )
340
- return TableVersionMd(tbl_md, table_version_md, schema_version_md)
348
+ return TableVersionCompleteMd(tbl_md, table_version_md, schema_version_md)
341
349
 
342
350
  def exec_op(self, op: TableOp) -> None:
343
351
  if op.create_store_table_op is not None:
@@ -365,7 +373,7 @@ class TableVersion:
365
373
  _logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
366
374
 
367
375
  @classmethod
368
- def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
376
+ def create_replica(cls, md: TableVersionCompleteMd) -> TableVersion:
369
377
  from .catalog import Catalog, TableVersionPath
370
378
 
371
379
  assert Env.get().in_xact
@@ -437,6 +445,8 @@ class TableVersion:
437
445
  def _init_schema(self) -> None:
438
446
  from pixeltable.store import StoreComponentView, StoreTable, StoreView
439
447
 
448
+ from .catalog import Catalog
449
+
440
450
  # initialize IndexBase instances and collect sa_col_types
441
451
  idxs: dict[int, index.IndexBase] = {}
442
452
  val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
@@ -529,8 +539,16 @@ class TableVersion:
529
539
  self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
530
540
 
531
541
  # create value exprs, now that we have all lookup structures in place
542
+ tvp: TableVersionPath | None = None
543
+ if self.effective_version is not None:
544
+ # for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
545
+ # otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
546
+ # use for retargeting.
547
+ tvp = Catalog.get().construct_tvp(
548
+ self.id, self.effective_version, self.tbl_md.ancestor_ids, self.version_md.created_at
549
+ )
532
550
  for col in self.cols_by_id.values():
533
- col.init_value_expr()
551
+ col.init_value_expr(tvp)
534
552
 
535
553
  # create the sqlalchemy schema, after instantiating all Columns
536
554
  if self.is_component_view:
@@ -1413,6 +1431,21 @@ class TableVersion:
1413
1431
  self._tbl_md.external_stores.pop(idx)
1414
1432
  self._write_md(new_version=True, new_schema_version=True)
1415
1433
 
1434
+ @property
1435
+ def pxt_uri(self) -> str | None:
1436
+ return self._tbl_md.additional_md.get('pxt_uri')
1437
+
1438
+ def update_pxt_uri(self, pxt_uri: str | None) -> None:
1439
+ if self._tbl_md.additional_md.get('pxt_uri') == pxt_uri:
1440
+ return # Nothing to do
1441
+
1442
+ if pxt_uri is None:
1443
+ del self._tbl_md.additional_md['pxt_uri'] # must be present due to preceding check
1444
+ else:
1445
+ self._tbl_md.additional_md['pxt_uri'] = pxt_uri
1446
+
1447
+ self._write_md(new_version=False, new_schema_version=False)
1448
+
1416
1449
  @property
1417
1450
  def tbl_md(self) -> schema.TableMd:
1418
1451
  return self._tbl_md
@@ -14,7 +14,7 @@ from pixeltable.iterators import ComponentIterator
14
14
  from .column import Column
15
15
  from .globals import _POS_COLUMN_NAME, MediaValidation
16
16
  from .table import Table
17
- from .table_version import TableVersion, TableVersionMd
17
+ from .table_version import TableVersion, TableVersionCompleteMd
18
18
  from .table_version_handle import TableVersionHandle
19
19
  from .table_version_path import TableVersionPath
20
20
  from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
@@ -84,7 +84,7 @@ class View(Table):
84
84
  media_validation: MediaValidation,
85
85
  iterator_cls: type[ComponentIterator] | None,
86
86
  iterator_args: dict | None,
87
- ) -> tuple[TableVersionMd, list[TableOp] | None]:
87
+ ) -> tuple[TableVersionCompleteMd, list[TableOp] | None]:
88
88
  from pixeltable.plan import SampleClause
89
89
 
90
90
  # Convert select_list to more additional_columns if present
pixeltable/config.py CHANGED
@@ -176,6 +176,7 @@ KNOWN_CONFIG_OPTIONS = {
176
176
  'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
177
177
  'hf': {'auth_token': 'Hugging Face access token'},
178
178
  'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
179
+ 'reve': {'api_key': 'Reve API key', 'rate_limit': 'Rate limit for Reve API requests (requests per minute)'},
179
180
  'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
180
181
  'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
181
182
  'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
pixeltable/env.py CHANGED
@@ -792,6 +792,7 @@ class Env:
792
792
  self.__register_package('pyarrow')
793
793
  self.__register_package('pydantic')
794
794
  self.__register_package('replicate')
795
+ self.__register_package('reve')
795
796
  self.__register_package('sentencepiece')
796
797
  self.__register_package('sentence_transformers', library_name='sentence-transformers')
797
798
  self.__register_package('soundfile')
@@ -806,6 +807,7 @@ class Env:
806
807
  self.__register_package('whisperx')
807
808
  self.__register_package('yolox', library_name='pixeltable-yolox')
808
809
  self.__register_package('lancedb')
810
+ self.__register_package('scenedetect')
809
811
 
810
812
  def __register_package(self, package_name: str, library_name: str | None = None) -> None:
811
813
  is_installed: bool
@@ -221,7 +221,8 @@ class ColumnRef(Expr):
221
221
  return self._descriptors().to_html()
222
222
 
223
223
  def _descriptors(self) -> DescriptionHelper:
224
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
224
+ with catalog.Catalog.get().begin_xact():
225
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
225
226
  helper = DescriptionHelper()
226
227
  helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
227
228
  helper.append(tbl._col_descriptor([self.col.name]))
@@ -21,6 +21,7 @@ from . import (
21
21
  openai,
22
22
  openrouter,
23
23
  replicate,
24
+ reve,
24
25
  string,
25
26
  timestamp,
26
27
  together,
@@ -10,14 +10,13 @@ t.select(t.img_col.convert('L')).collect()
10
10
  ```
11
11
  """
12
12
 
13
- import base64
14
-
15
13
  import PIL.Image
16
14
 
17
15
  import pixeltable as pxt
18
16
  import pixeltable.type_system as ts
19
17
  from pixeltable.exprs import Expr
20
18
  from pixeltable.utils.code import local_public_names
19
+ from pixeltable.utils.image import to_base64
21
20
 
22
21
 
23
22
  @pxt.udf(is_method=True)
@@ -29,12 +28,7 @@ def b64_encode(img: PIL.Image.Image, image_format: str = 'png') -> str:
29
28
  img: image
30
29
  image_format: image format [supported by PIL](https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#fully-supported-formats)
31
30
  """
32
- import io
33
-
34
- bytes_arr = io.BytesIO()
35
- img.save(bytes_arr, format=image_format)
36
- b64_bytes = base64.b64encode(bytes_arr.getvalue())
37
- return b64_bytes.decode('utf-8')
31
+ return to_base64(img, format=image_format)
38
32
 
39
33
 
40
34
  @pxt.udf(substitute_fn=PIL.Image.alpha_composite, is_method=True)