pixeltable 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (40) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +106 -71
  3. pixeltable/catalog/path.py +59 -20
  4. pixeltable/catalog/schema_object.py +1 -0
  5. pixeltable/catalog/table.py +6 -0
  6. pixeltable/catalog/table_version.py +2 -1
  7. pixeltable/catalog/view.py +21 -10
  8. pixeltable/config.py +12 -4
  9. pixeltable/dataframe.py +57 -1
  10. pixeltable/env.py +25 -13
  11. pixeltable/exec/aggregation_node.py +1 -1
  12. pixeltable/exec/cache_prefetch_node.py +2 -6
  13. pixeltable/exec/component_iteration_node.py +4 -3
  14. pixeltable/exec/data_row_batch.py +10 -53
  15. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  16. pixeltable/exec/in_memory_data_node.py +13 -11
  17. pixeltable/exec/sql_node.py +6 -7
  18. pixeltable/exprs/data_row.py +13 -13
  19. pixeltable/exprs/row_builder.py +16 -4
  20. pixeltable/exprs/string_op.py +1 -1
  21. pixeltable/func/expr_template_function.py +1 -4
  22. pixeltable/functions/date.py +1 -1
  23. pixeltable/functions/math.py +1 -1
  24. pixeltable/functions/openai.py +8 -4
  25. pixeltable/functions/timestamp.py +6 -6
  26. pixeltable/globals.py +14 -10
  27. pixeltable/metadata/schema.py +1 -1
  28. pixeltable/plan.py +5 -14
  29. pixeltable/share/packager.py +13 -13
  30. pixeltable/store.py +9 -6
  31. pixeltable/type_system.py +2 -1
  32. pixeltable/utils/filecache.py +1 -1
  33. pixeltable/utils/http_server.py +2 -3
  34. pixeltable/utils/media_store.py +84 -39
  35. {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/METADATA +40 -41
  36. {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/RECORD +44 -44
  37. {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/WHEEL +1 -1
  38. pixeltable-0.4.6.dist-info/entry_points.txt +2 -0
  39. pixeltable-0.4.4.dist-info/entry_points.txt +0 -3
  40. {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info/licenses}/LICENSE +0 -0
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.4.4'
3
- __version_tuple__ = (0, 4, 4)
2
+ __version__ = '0.0.0'
3
+ __version_tuple__ = (0, 0, 0)
@@ -165,7 +165,7 @@ class Catalog:
165
165
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
166
166
  # - snapshot versions: records the version of the snapshot
167
167
  _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
168
- _tbls: dict[UUID, Table]
168
+ _tbls: dict[tuple[UUID, Optional[int]], Table]
169
169
  _in_write_xact: bool # True if we're in a write transaction
170
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
171
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
@@ -698,6 +698,7 @@ class Catalog:
698
698
 
699
699
  def get_dir_path(self, dir_id: UUID) -> Path:
700
700
  """Return path for directory with given id"""
701
+ assert isinstance(dir_id, UUID)
701
702
  conn = Env.get().conn
702
703
  names: list[str] = []
703
704
  while True:
@@ -708,7 +709,7 @@ class Catalog:
708
709
  break
709
710
  names.insert(0, dir.md['name'])
710
711
  dir_id = dir.parent_id
711
- return Path('.'.join(names), empty_is_valid=True, allow_system_paths=True)
712
+ return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
712
713
 
713
714
  @dataclasses.dataclass
714
715
  class DirEntry:
@@ -825,7 +826,9 @@ class Catalog:
825
826
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
826
827
  return add_obj, add_dir_obj, drop_obj
827
828
 
828
- def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
829
+ def _get_dir_entry(
830
+ self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
831
+ ) -> Optional[SchemaObject]:
829
832
  user = Env.get().user
830
833
  conn = Env.get().conn
831
834
 
@@ -854,9 +857,7 @@ class Catalog:
854
857
  )
855
858
  tbl_id = conn.execute(q).scalar_one_or_none()
856
859
  if tbl_id is not None:
857
- if tbl_id not in self._tbls:
858
- _ = self._load_tbl(tbl_id)
859
- return self._tbls[tbl_id]
860
+ return self.get_table_by_id(tbl_id, version)
860
861
 
861
862
  return None
862
863
 
@@ -872,7 +873,7 @@ class Catalog:
872
873
  """Return the schema object at the given path, or None if it doesn't exist.
873
874
 
874
875
  Raises Error if
875
- - the parent directory doesn't exist'
876
+ - the parent directory doesn't exist
876
877
  - raise_if_exists is True and the path exists
877
878
  - raise_if_not_exists is True and the path does not exist
878
879
  - expected is not None and the existing object has a different type
@@ -892,7 +893,7 @@ class Catalog:
892
893
  parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
893
894
  if parent_dir is None:
894
895
  raise excs.Error(f'Directory {parent_path!r} does not exist.')
895
- obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
896
+ obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
896
897
 
897
898
  if obj is None and raise_if_not_exists:
898
899
  raise excs.Error(f'Path {path!r} does not exist.')
@@ -903,18 +904,14 @@ class Catalog:
903
904
  raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
904
905
  return obj
905
906
 
906
- def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
907
+ def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
907
908
  """Must be executed inside a transaction. Might raise PendingTableOpsError."""
908
- if tbl_id in self._tbls:
909
- return self._tbls[tbl_id]
910
- tbl = self._load_tbl(tbl_id)
911
- # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
912
- # # dependencies
913
- # tbl_version = tbl._tbl_version.get()
914
- # if tbl_version.is_mutable:
915
- # for v in tbl_version.mutable_views:
916
- # _ = self.get_table_by_id(v.id)
917
- return tbl
909
+ if (tbl_id, version) not in self._tbls:
910
+ if version is None:
911
+ self._load_tbl(tbl_id)
912
+ else:
913
+ self._load_tbl_at_version(tbl_id, version)
914
+ return self._tbls.get((tbl_id, version))
918
915
 
919
916
  @retry_loop(for_write=True)
920
917
  def create_table(
@@ -946,7 +943,7 @@ class Catalog:
946
943
  comment=comment,
947
944
  media_validation=media_validation,
948
945
  )
949
- self._tbls[tbl._id] = tbl
946
+ self._tbls[tbl._id, None] = tbl
950
947
  return tbl
951
948
 
952
949
  def create_view(
@@ -1045,12 +1042,12 @@ class Catalog:
1045
1042
  )
1046
1043
 
1047
1044
  # Ensure that the system directory exists.
1048
- self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
1045
+ self._create_dir(Path.parse('_system', allow_system_path=True), if_exists=IfExistsParam.IGNORE, parents=False)
1049
1046
 
1050
1047
  # Now check to see if this table already exists in the catalog.
1051
1048
  existing = self.get_table_by_id(tbl_id)
1052
1049
  if existing is not None:
1053
- existing_path = Path(existing._path(), allow_system_paths=True)
1050
+ existing_path = Path.parse(existing._path(), allow_system_path=True)
1054
1051
  if existing_path != path:
1055
1052
  # It does exist, under a different path from the specified one.
1056
1053
  if not existing_path.is_system_path:
@@ -1073,12 +1070,12 @@ class Catalog:
1073
1070
  replica_path: Path
1074
1071
  if replica is None:
1075
1072
  # We've never seen this table before. Create a new anonymous system table for it.
1076
- replica_path = Path(f'_system.replica_{ancestor_id.hex}', allow_system_paths=True)
1073
+ replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
1077
1074
  else:
1078
1075
  # The table already exists in the catalog. The existing path might be a system path (if the table
1079
1076
  # was created as an anonymous base table of some other table), or it might not (if it's a snapshot
1080
1077
  # that was directly replicated by the user at some point). In either case, use the existing path.
1081
- replica_path = Path(replica._path(), allow_system_paths=True)
1078
+ replica_path = Path.parse(replica._path(), allow_system_path=True)
1082
1079
 
1083
1080
  # Store the metadata; it could be a new version (in which case a new record will be created), or a known
1084
1081
  # version (in which case the newly received metadata will be validated as identical).
@@ -1138,7 +1135,7 @@ class Catalog:
1138
1135
  q = (
1139
1136
  sql.select(schema.TableVersion.md)
1140
1137
  .where(schema.TableVersion.tbl_id == tbl_id)
1141
- .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {md.version_md.version}"))
1138
+ .where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
1142
1139
  )
1143
1140
  existing_version_md_row = conn.execute(q).one_or_none()
1144
1141
  if existing_version_md_row is None:
@@ -1157,10 +1154,7 @@ class Catalog:
1157
1154
  sql.select(schema.TableSchemaVersion.md)
1158
1155
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
1159
1156
  .where(
1160
- sql.text(
1161
- f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
1162
- f'{md.schema_version_md.schema_version}'
1163
- )
1157
+ schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
1164
1158
  )
1165
1159
  )
1166
1160
  existing_schema_version_md_row = conn.execute(q).one_or_none()
@@ -1274,8 +1268,10 @@ class Catalog:
1274
1268
  tv.drop()
1275
1269
 
1276
1270
  self.delete_tbl_md(tbl._id)
1277
- assert tbl._id in self._tbls
1278
- del self._tbls[tbl._id]
1271
+ assert (tbl._id, None) in self._tbls
1272
+ versions = [k[1] for k in self._tbls if k[0] == tbl._id]
1273
+ for version in versions:
1274
+ del self._tbls[tbl._id, version]
1279
1275
  _logger.info(f'Dropped table `{tbl._path()}`.')
1280
1276
 
1281
1277
  @retry_loop(for_write=True)
@@ -1363,7 +1359,7 @@ class Catalog:
1363
1359
  tbl_count = conn.execute(q).scalar()
1364
1360
  if tbl_count == 0:
1365
1361
  raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1366
- q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
1362
+ q = sql.select(schema.Table.id).where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
1367
1363
  if for_update:
1368
1364
  q = q.with_for_update()
1369
1365
  result = [r[0] for r in conn.execute(q).all()]
@@ -1462,7 +1458,7 @@ class Catalog:
1462
1458
  row = conn.execute(q).one_or_none()
1463
1459
  return schema.Dir(**row._mapping) if row is not None else None
1464
1460
 
1465
- def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
1461
+ def _load_tbl(self, tbl_id: UUID) -> None:
1466
1462
  """Loads metadata for the table with the given id and caches it."""
1467
1463
  _logger.info(f'Loading table {tbl_id}')
1468
1464
  from .insertable_table import InsertableTable
@@ -1480,12 +1476,8 @@ class Catalog:
1480
1476
  sql.select(schema.Table, schema.TableSchemaVersion)
1481
1477
  .join(schema.TableSchemaVersion)
1482
1478
  .where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
1483
- # Table.md['current_schema_version'] == TableSchemaVersion.schema_version
1484
1479
  .where(
1485
- sql.text(
1486
- f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
1487
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1488
- )
1480
+ schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
1489
1481
  )
1490
1482
  .where(schema.Table.id == tbl_id)
1491
1483
  )
@@ -1501,8 +1493,8 @@ class Catalog:
1501
1493
  if (tbl_id, None) not in self._tbl_versions:
1502
1494
  _ = self._load_tbl_version(tbl_id, None)
1503
1495
  tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
1504
- self._tbls[tbl_id] = tbl
1505
- return tbl
1496
+ self._tbls[tbl_id, None] = tbl
1497
+ return
1506
1498
 
1507
1499
  # this is a view; determine the sequence of TableVersions to load
1508
1500
  tbl_version_path: list[tuple[UUID, Optional[int]]] = []
@@ -1526,8 +1518,68 @@ class Catalog:
1526
1518
  view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
1527
1519
  base_path = view_path
1528
1520
  view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
1529
- self._tbls[tbl_id] = view
1530
- return view
1521
+ self._tbls[tbl_id, None] = view
1522
+
1523
+ def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> None:
1524
+ from .view import View
1525
+
1526
+ # Load the specified TableMd and TableVersionMd records from the db.
1527
+ conn = Env.get().conn
1528
+ q: sql.Executable = (
1529
+ sql.select(schema.Table, schema.TableVersion)
1530
+ .join(schema.TableVersion)
1531
+ .where(schema.Table.id == tbl_id)
1532
+ .where(schema.Table.id == schema.TableVersion.tbl_id)
1533
+ .where(schema.TableVersion.version == version)
1534
+ )
1535
+ row = conn.execute(q).one_or_none()
1536
+ if row is None:
1537
+ return None
1538
+ tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
1539
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1540
+ version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
1541
+
1542
+ # Reconstruct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
1543
+ # timestamps of this table and all its ancestors.
1544
+ # TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
1545
+ # (which might be nondeterministic in the future).
1546
+
1547
+ # Build the list of ancestor versions, starting with the given table and traversing back to the base table.
1548
+ # For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
1549
+ # given TableVersion's created_at timestamp.
1550
+ ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
1551
+ if tbl_md.view_md is not None:
1552
+ for ancestor_id, _ in tbl_md.view_md.base_versions:
1553
+ q = (
1554
+ sql.select(schema.TableVersion)
1555
+ .where(schema.TableVersion.tbl_id == ancestor_id)
1556
+ .where(schema.TableVersion.md['created_at'].cast(sql.Float) <= version_md.created_at)
1557
+ .order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
1558
+ .limit(1)
1559
+ )
1560
+ row = conn.execute(q).one_or_none()
1561
+ if row is None:
1562
+ # This can happen if an ancestor version is garbage collected; it can also happen in
1563
+ # rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
1564
+ _logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
1565
+ raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
1566
+ ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
1567
+ ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
1568
+ assert ancestor_version_md.created_at <= version_md.created_at
1569
+ ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
1570
+
1571
+ # Force any ancestors to be loaded (base table first).
1572
+ for anc_id, anc_version in ancestors[::-1]:
1573
+ if (anc_id, anc_version) not in self._tbl_versions:
1574
+ _ = self._load_tbl_version(anc_id, anc_version)
1575
+
1576
+ # Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
1577
+ tvp: Optional[TableVersionPath] = None
1578
+ for anc_id, anc_version in ancestors[::-1]:
1579
+ tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
1580
+
1581
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
1582
+ self._tbls[tbl_id, version] = view
1531
1583
 
1532
1584
  @retry_loop(for_write=False)
1533
1585
  def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
@@ -1547,8 +1599,7 @@ class Catalog:
1547
1599
  .select_from(schema.TableVersion)
1548
1600
  .join(
1549
1601
  schema.TableSchemaVersion,
1550
- sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
1551
- == schema.TableSchemaVersion.schema_version,
1602
+ schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1552
1603
  )
1553
1604
  .where(schema.TableVersion.tbl_id == tbl_id)
1554
1605
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
@@ -1590,13 +1641,9 @@ class Catalog:
1590
1641
  # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
1591
1642
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
1592
1643
  # WHERE t.id = tbl_id
1593
- q = q.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}")).where(
1594
- sql.text(
1595
- (
1596
- f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
1597
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1598
- )
1599
- )
1644
+ q = q.where(
1645
+ schema.TableVersion.md['version'].cast(sql.Integer) == effective_version,
1646
+ schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1600
1647
  )
1601
1648
  else:
1602
1649
  # we are loading the current version
@@ -1606,17 +1653,8 @@ class Catalog:
1606
1653
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
1607
1654
  # WHERE t.id = tbl_id
1608
1655
  q = q.where(
1609
- sql.text(
1610
- f"({schema.Table.__table__}.md->>'current_version')::int = "
1611
- f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
1612
- )
1613
- ).where(
1614
- sql.text(
1615
- (
1616
- f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
1617
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1618
- )
1619
- )
1656
+ schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
1657
+ schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1620
1658
  )
1621
1659
 
1622
1660
  row = conn.execute(q).one_or_none()
@@ -1719,9 +1757,7 @@ class Catalog:
1719
1757
  stmt = (
1720
1758
  sql.update(schema.TableVersion)
1721
1759
  .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
1722
- .values(
1723
- md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
1724
- )
1760
+ .values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
1725
1761
  )
1726
1762
 
1727
1763
  res = conn.execute(stmt)
@@ -1798,11 +1834,10 @@ class Catalog:
1798
1834
  # This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
1799
1835
  # TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
1800
1836
  if effective_version is None and not tbl_md.is_replica:
1801
- q = sql.select(schema.Table.id).where(
1802
- sql.text(
1803
- f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
1804
- "AND md->'view_md'->'base_versions'->0->>1 IS NULL"
1805
- )
1837
+ q = (
1838
+ sql.select(schema.Table.id)
1839
+ .where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
1840
+ .where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
1806
1841
  )
1807
1842
  mutable_view_ids = [r[0] for r in conn.execute(q).all()]
1808
1843
 
@@ -1839,9 +1874,9 @@ class Catalog:
1839
1874
  version_md,
1840
1875
  effective_version,
1841
1876
  schema_version_md,
1877
+ mutable_views,
1842
1878
  base_path=base_path,
1843
1879
  base=base,
1844
- mutable_views=mutable_views,
1845
1880
  )
1846
1881
 
1847
1882
  # register the instance before init()
@@ -1,20 +1,57 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Iterator
4
+ from typing import Optional
5
5
 
6
6
  from pixeltable import exceptions as excs
7
7
 
8
- from .globals import is_valid_path
8
+ from .globals import is_valid_identifier
9
9
 
10
10
  _logger = logging.getLogger('pixeltable')
11
11
 
12
12
 
13
13
  class Path:
14
- def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
15
- if not is_valid_path(path, empty_is_valid, allow_system_paths):
16
- raise excs.Error(f"Invalid path format: '{path}'")
17
- self.components = path.split('.')
14
+ components: list[str]
15
+ version: Optional[int]
16
+
17
+ def __init__(self, components: list[str], version: Optional[int] = None) -> None:
18
+ assert len(components) > 0
19
+ self.components = components
20
+ self.version = version
21
+
22
+ @classmethod
23
+ def parse(
24
+ cls,
25
+ path: str,
26
+ allow_empty_path: bool = False,
27
+ allow_system_path: bool = False,
28
+ allow_versioned_path: bool = False,
29
+ ) -> Path:
30
+ components: list[str]
31
+ version: Optional[int]
32
+ if ':' in path:
33
+ parts = path.split(':')
34
+ if len(parts) != 2:
35
+ raise excs.Error(f'Invalid path: {path}')
36
+ try:
37
+ components = parts[0].split('.')
38
+ version = int(parts[1])
39
+ except ValueError:
40
+ raise excs.Error(f'Invalid path: {path}') from None
41
+ else:
42
+ components = path.split('.')
43
+ version = None
44
+
45
+ if components == [''] and not allow_empty_path:
46
+ raise excs.Error(f'Invalid path: {path}')
47
+
48
+ if components != [''] and not all(is_valid_identifier(c, allow_system_path) for c in components):
49
+ raise excs.Error(f'Invalid path: {path}')
50
+
51
+ if version is not None and not allow_versioned_path:
52
+ raise excs.Error(f'Versioned path not allowed here: {path}')
53
+
54
+ return Path(components, version)
18
55
 
19
56
  @property
20
57
  def len(self) -> int:
@@ -22,7 +59,6 @@ class Path:
22
59
 
23
60
  @property
24
61
  def name(self) -> str:
25
- assert len(self.components) > 0
26
62
  return self.components[-1]
27
63
 
28
64
  @property
@@ -36,18 +72,15 @@ class Path:
36
72
  @property
37
73
  def parent(self) -> Path:
38
74
  if len(self.components) == 1:
39
- if self.is_root:
40
- return self
41
- else:
42
- return Path('', empty_is_valid=True, allow_system_paths=True)
75
+ return ROOT_PATH # Includes the case of the root path, which is its own parent.
43
76
  else:
44
- return Path('.'.join(self.components[:-1]), allow_system_paths=True)
77
+ return Path(self.components[:-1])
45
78
 
46
79
  def append(self, name: str) -> Path:
47
80
  if self.is_root:
48
- return Path(name, allow_system_paths=True)
81
+ return Path([name])
49
82
  else:
50
- return Path(f'{self}.{name}', allow_system_paths=True)
83
+ return Path([*self.components, name])
51
84
 
52
85
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
53
86
  """
@@ -60,22 +93,25 @@ class Path:
60
93
  is_prefix = self.components == other.components[: self.len]
61
94
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
62
95
 
63
- def ancestors(self) -> Iterator[Path]:
96
+ def ancestors(self) -> list[Path]:
64
97
  """
65
- Return all ancestors of this path in top-down order including root.
98
+ Return all proper ancestors of this path in top-down order including root.
66
99
  If this path is for the root directory, which has no parent, then None is returned.
67
100
  """
68
101
  if self.is_root:
69
- return
102
+ return []
70
103
  else:
71
- for i in range(0, len(self.components)):
72
- yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
104
+ return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
73
105
 
74
106
  def __repr__(self) -> str:
75
107
  return repr(str(self))
76
108
 
77
109
  def __str__(self) -> str:
78
- return '.'.join(self.components)
110
+ base = '.'.join(self.components)
111
+ if self.version is not None:
112
+ return f'{base}:{self.version}'
113
+ else:
114
+ return base
79
115
 
80
116
  def __eq__(self, other: object) -> bool:
81
117
  return isinstance(other, Path) and str(self) == str(other)
@@ -85,3 +121,6 @@ class Path:
85
121
 
86
122
  def __lt__(self, other: Path) -> bool:
87
123
  return str(self) < str(other)
124
+
125
+
126
+ ROOT_PATH = Path([''])
@@ -18,6 +18,7 @@ class SchemaObject:
18
18
 
19
19
  def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
20
20
  # make these private so they don't collide with column names (id and name are fairly common)
21
+ assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
21
22
  self._id = obj_id
22
23
  self._name = name
23
24
  self._dir_id = dir_id
@@ -89,6 +89,8 @@ class Table(SchemaObject):
89
89
 
90
90
  ```python
91
91
  {
92
+ 'name': 'my_table',
93
+ 'path': 'my_dir.my_subdir.my_table',
92
94
  'base': None, # If this is a view or snapshot, will contain the name of its base table
93
95
  'schema': {
94
96
  'col1': StringType(),
@@ -96,6 +98,7 @@ class Table(SchemaObject):
96
98
  },
97
99
  'is_replica': False,
98
100
  'version': 22,
101
+ 'version_created': datetime.datetime(...),
99
102
  'schema_version': 1,
100
103
  'comment': '',
101
104
  'num_retained_versions': 10,
@@ -112,6 +115,9 @@ class Table(SchemaObject):
112
115
  md['schema'] = self._get_schema()
113
116
  md['is_replica'] = self._tbl_version_path.is_replica()
114
117
  md['version'] = self._get_version()
118
+ md['version_created'] = datetime.datetime.fromtimestamp(
119
+ self._tbl_version_path.tbl_version.get().created_at, tz=datetime.timezone.utc
120
+ )
115
121
  md['schema_version'] = self._tbl_version_path.schema_version()
116
122
  md['comment'] = self._get_comment()
117
123
  md['num_retained_versions'] = self._get_num_retained_versions()
@@ -773,6 +773,7 @@ class TableVersion:
773
773
  cols_to_add = list(cols)
774
774
  row_count = self.store_tbl.count()
775
775
  for col in cols_to_add:
776
+ assert col.tbl is self
776
777
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
777
778
  raise excs.Error(
778
779
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
@@ -1325,7 +1326,7 @@ class TableVersion:
1325
1326
  )
1326
1327
 
1327
1328
  # delete newly-added data
1328
- MediaStore.delete(self.id, version=self.version)
1329
+ MediaStore.delete(self.id, tbl_version=self.version)
1329
1330
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1330
1331
 
1331
1332
  # revert new deletions
@@ -255,10 +255,20 @@ class View(Table):
255
255
  base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
256
256
  )
257
257
 
258
+ def _is_anonymous_snapshot(self) -> bool:
259
+ """
260
+ Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
261
+ """
262
+ return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
263
+
258
264
  def _get_metadata(self) -> dict[str, Any]:
259
265
  md = super()._get_metadata()
260
266
  md['is_view'] = True
261
267
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
268
+ if self._is_anonymous_snapshot():
269
+ # Update name and path with version qualifiers.
270
+ md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
271
+ md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
262
272
  base_tbl = self._get_base_table()
263
273
  if base_tbl is None:
264
274
  md['base'] = None
@@ -284,21 +294,22 @@ class View(Table):
284
294
  raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
285
295
 
286
296
  def _get_base_table(self) -> Optional['Table']:
287
- if self._tbl_version_path.base is None and not self._snapshot_only:
288
- return None # this can happen for a replica of a base table
289
- # if this is a pure snapshot, our tbl_version_path only reflects the base (there is no TableVersion instance
290
- # for the snapshot itself)
291
- from pixeltable.catalog import Catalog
292
-
293
- base_id = self._tbl_version_path.tbl_id if self._snapshot_only else self._tbl_version_path.base.tbl_id
294
- with Catalog.get().begin_xact(tbl_id=base_id, for_write=False):
297
+ if self._tbl_version_path.tbl_id != self._id:
298
+ # _tbl_version_path represents a different schema object from this one. This can only happen if this is a
299
+ # named pure snapshot.
300
+ base_id = self._tbl_version_path.tbl_id
301
+ elif self._tbl_version_path.base is None:
302
+ return None
303
+ else:
304
+ base_id = self._tbl_version_path.base.tbl_id
305
+ with catalog.Catalog.get().begin_xact(tbl_id=base_id, for_write=False):
295
306
  return catalog.Catalog.get().get_table_by_id(base_id)
296
307
 
297
308
  @property
298
309
  def _effective_base_versions(self) -> list[Optional[int]]:
299
310
  effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
300
- if self._snapshot_only:
301
- return effective_versions
311
+ if self._snapshot_only and not self._is_anonymous_snapshot():
312
+ return effective_versions # Named pure snapshot
302
313
  else:
303
314
  return effective_versions[1:]
304
315
 
pixeltable/config.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any, ClassVar, Optional, TypeVar
8
8
 
9
9
  import toml
10
10
 
11
- from pixeltable import exceptions as excs
11
+ from pixeltable import env, exceptions as excs
12
12
 
13
13
  _logger = logging.getLogger('pixeltable')
14
14
 
@@ -82,7 +82,11 @@ class Config:
82
82
  return cls.__instance
83
83
 
84
84
  @classmethod
85
- def init(cls, config_overrides: dict[str, Any]) -> None:
85
+ def init(cls, config_overrides: dict[str, Any], reinit: bool = False) -> None:
86
+ if reinit:
87
+ cls.__instance = None
88
+ for cl in env._registered_clients.values():
89
+ cl.client_obj = None
86
90
  if cls.__instance is None:
87
91
  cls.__instance = cls(config_overrides)
88
92
  elif len(config_overrides) > 0:
@@ -102,7 +106,7 @@ class Config:
102
106
  env_var = f'{section.upper()}_{key.upper()}'
103
107
  if override_var in self.__config_overrides:
104
108
  return self.__config_overrides[override_var]
105
- if env_var in os.environ:
109
+ if env_var in os.environ and len(os.environ[env_var]) > 0:
106
110
  return os.environ[env_var]
107
111
  return default
108
112
 
@@ -157,7 +161,11 @@ KNOWN_CONFIG_OPTIONS = {
157
161
  'groq': {'api_key': 'Groq API key'},
158
162
  'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
159
163
  'mistral': {'api_key': 'Mistral API key'},
160
- 'openai': {'api_key': 'OpenAI API key'},
164
+ 'openai': {
165
+ 'api_key': 'OpenAI API key',
166
+ 'base_url': 'OpenAI API base URL',
167
+ 'api_version': 'API version if using Azure OpenAI',
168
+ },
161
169
  'replicate': {'api_token': 'Replicate API token'},
162
170
  'together': {'api_key': 'Together API key'},
163
171
  'pypi': {'api_key': 'PyPI API key (for internal use only)'},