pixeltable 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +106 -71
- pixeltable/catalog/path.py +59 -20
- pixeltable/catalog/schema_object.py +1 -0
- pixeltable/catalog/table.py +6 -0
- pixeltable/catalog/table_version.py +2 -1
- pixeltable/catalog/view.py +21 -10
- pixeltable/config.py +12 -4
- pixeltable/dataframe.py +57 -1
- pixeltable/env.py +25 -13
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +2 -6
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +10 -53
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/in_memory_data_node.py +13 -11
- pixeltable/exec/sql_node.py +6 -7
- pixeltable/exprs/data_row.py +13 -13
- pixeltable/exprs/row_builder.py +16 -4
- pixeltable/exprs/string_op.py +1 -1
- pixeltable/func/expr_template_function.py +1 -4
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/openai.py +8 -4
- pixeltable/functions/timestamp.py +6 -6
- pixeltable/globals.py +14 -10
- pixeltable/metadata/schema.py +1 -1
- pixeltable/plan.py +5 -14
- pixeltable/share/packager.py +13 -13
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +2 -1
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/media_store.py +84 -39
- {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/METADATA +40 -41
- {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/RECORD +44 -44
- {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info}/WHEEL +1 -1
- pixeltable-0.4.6.dist-info/entry_points.txt +2 -0
- pixeltable-0.4.4.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.4.dist-info → pixeltable-0.4.6.dist-info/licenses}/LICENSE +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.
|
|
3
|
-
__version_tuple__ = (0,
|
|
2
|
+
__version__ = '0.0.0'
|
|
3
|
+
__version_tuple__ = (0, 0, 0)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -165,7 +165,7 @@ class Catalog:
|
|
|
165
165
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
166
166
|
# - snapshot versions: records the version of the snapshot
|
|
167
167
|
_tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
|
|
168
|
-
_tbls: dict[UUID, Table]
|
|
168
|
+
_tbls: dict[tuple[UUID, Optional[int]], Table]
|
|
169
169
|
_in_write_xact: bool # True if we're in a write transaction
|
|
170
170
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
171
171
|
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
@@ -698,6 +698,7 @@ class Catalog:
|
|
|
698
698
|
|
|
699
699
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
700
700
|
"""Return path for directory with given id"""
|
|
701
|
+
assert isinstance(dir_id, UUID)
|
|
701
702
|
conn = Env.get().conn
|
|
702
703
|
names: list[str] = []
|
|
703
704
|
while True:
|
|
@@ -708,7 +709,7 @@ class Catalog:
|
|
|
708
709
|
break
|
|
709
710
|
names.insert(0, dir.md['name'])
|
|
710
711
|
dir_id = dir.parent_id
|
|
711
|
-
return Path('.'.join(names),
|
|
712
|
+
return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
|
|
712
713
|
|
|
713
714
|
@dataclasses.dataclass
|
|
714
715
|
class DirEntry:
|
|
@@ -825,7 +826,9 @@ class Catalog:
|
|
|
825
826
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
826
827
|
return add_obj, add_dir_obj, drop_obj
|
|
827
828
|
|
|
828
|
-
def _get_dir_entry(
|
|
829
|
+
def _get_dir_entry(
|
|
830
|
+
self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
|
|
831
|
+
) -> Optional[SchemaObject]:
|
|
829
832
|
user = Env.get().user
|
|
830
833
|
conn = Env.get().conn
|
|
831
834
|
|
|
@@ -854,9 +857,7 @@ class Catalog:
|
|
|
854
857
|
)
|
|
855
858
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
856
859
|
if tbl_id is not None:
|
|
857
|
-
|
|
858
|
-
_ = self._load_tbl(tbl_id)
|
|
859
|
-
return self._tbls[tbl_id]
|
|
860
|
+
return self.get_table_by_id(tbl_id, version)
|
|
860
861
|
|
|
861
862
|
return None
|
|
862
863
|
|
|
@@ -872,7 +873,7 @@ class Catalog:
|
|
|
872
873
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
873
874
|
|
|
874
875
|
Raises Error if
|
|
875
|
-
- the parent directory doesn't exist
|
|
876
|
+
- the parent directory doesn't exist
|
|
876
877
|
- raise_if_exists is True and the path exists
|
|
877
878
|
- raise_if_not_exists is True and the path does not exist
|
|
878
879
|
- expected is not None and the existing object has a different type
|
|
@@ -892,7 +893,7 @@ class Catalog:
|
|
|
892
893
|
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
893
894
|
if parent_dir is None:
|
|
894
895
|
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
895
|
-
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
896
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
|
|
896
897
|
|
|
897
898
|
if obj is None and raise_if_not_exists:
|
|
898
899
|
raise excs.Error(f'Path {path!r} does not exist.')
|
|
@@ -903,18 +904,14 @@ class Catalog:
|
|
|
903
904
|
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
904
905
|
return obj
|
|
905
906
|
|
|
906
|
-
def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
|
|
907
|
+
def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
|
|
907
908
|
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
908
|
-
if tbl_id in self._tbls:
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
# if tbl_version.is_mutable:
|
|
915
|
-
# for v in tbl_version.mutable_views:
|
|
916
|
-
# _ = self.get_table_by_id(v.id)
|
|
917
|
-
return tbl
|
|
909
|
+
if (tbl_id, version) not in self._tbls:
|
|
910
|
+
if version is None:
|
|
911
|
+
self._load_tbl(tbl_id)
|
|
912
|
+
else:
|
|
913
|
+
self._load_tbl_at_version(tbl_id, version)
|
|
914
|
+
return self._tbls.get((tbl_id, version))
|
|
918
915
|
|
|
919
916
|
@retry_loop(for_write=True)
|
|
920
917
|
def create_table(
|
|
@@ -946,7 +943,7 @@ class Catalog:
|
|
|
946
943
|
comment=comment,
|
|
947
944
|
media_validation=media_validation,
|
|
948
945
|
)
|
|
949
|
-
self._tbls[tbl._id] = tbl
|
|
946
|
+
self._tbls[tbl._id, None] = tbl
|
|
950
947
|
return tbl
|
|
951
948
|
|
|
952
949
|
def create_view(
|
|
@@ -1045,12 +1042,12 @@ class Catalog:
|
|
|
1045
1042
|
)
|
|
1046
1043
|
|
|
1047
1044
|
# Ensure that the system directory exists.
|
|
1048
|
-
self._create_dir(Path('_system',
|
|
1045
|
+
self._create_dir(Path.parse('_system', allow_system_path=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
1049
1046
|
|
|
1050
1047
|
# Now check to see if this table already exists in the catalog.
|
|
1051
1048
|
existing = self.get_table_by_id(tbl_id)
|
|
1052
1049
|
if existing is not None:
|
|
1053
|
-
existing_path = Path(existing._path(),
|
|
1050
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1054
1051
|
if existing_path != path:
|
|
1055
1052
|
# It does exist, under a different path from the specified one.
|
|
1056
1053
|
if not existing_path.is_system_path:
|
|
@@ -1073,12 +1070,12 @@ class Catalog:
|
|
|
1073
1070
|
replica_path: Path
|
|
1074
1071
|
if replica is None:
|
|
1075
1072
|
# We've never seen this table before. Create a new anonymous system table for it.
|
|
1076
|
-
replica_path = Path(f'_system.replica_{ancestor_id.hex}',
|
|
1073
|
+
replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
|
|
1077
1074
|
else:
|
|
1078
1075
|
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
1079
1076
|
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
1080
1077
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
1081
|
-
replica_path = Path(replica._path(),
|
|
1078
|
+
replica_path = Path.parse(replica._path(), allow_system_path=True)
|
|
1082
1079
|
|
|
1083
1080
|
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
1084
1081
|
# version (in which case the newly received metadata will be validated as identical).
|
|
@@ -1138,7 +1135,7 @@ class Catalog:
|
|
|
1138
1135
|
q = (
|
|
1139
1136
|
sql.select(schema.TableVersion.md)
|
|
1140
1137
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1141
|
-
.where(
|
|
1138
|
+
.where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
|
|
1142
1139
|
)
|
|
1143
1140
|
existing_version_md_row = conn.execute(q).one_or_none()
|
|
1144
1141
|
if existing_version_md_row is None:
|
|
@@ -1157,10 +1154,7 @@ class Catalog:
|
|
|
1157
1154
|
sql.select(schema.TableSchemaVersion.md)
|
|
1158
1155
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1159
1156
|
.where(
|
|
1160
|
-
sql.
|
|
1161
|
-
f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
|
|
1162
|
-
f'{md.schema_version_md.schema_version}'
|
|
1163
|
-
)
|
|
1157
|
+
schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
|
|
1164
1158
|
)
|
|
1165
1159
|
)
|
|
1166
1160
|
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
@@ -1274,8 +1268,10 @@ class Catalog:
|
|
|
1274
1268
|
tv.drop()
|
|
1275
1269
|
|
|
1276
1270
|
self.delete_tbl_md(tbl._id)
|
|
1277
|
-
assert tbl._id in self._tbls
|
|
1278
|
-
|
|
1271
|
+
assert (tbl._id, None) in self._tbls
|
|
1272
|
+
versions = [k[1] for k in self._tbls if k[0] == tbl._id]
|
|
1273
|
+
for version in versions:
|
|
1274
|
+
del self._tbls[tbl._id, version]
|
|
1279
1275
|
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
1280
1276
|
|
|
1281
1277
|
@retry_loop(for_write=True)
|
|
@@ -1363,7 +1359,7 @@ class Catalog:
|
|
|
1363
1359
|
tbl_count = conn.execute(q).scalar()
|
|
1364
1360
|
if tbl_count == 0:
|
|
1365
1361
|
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1366
|
-
q = sql.select(schema.Table.id).where(
|
|
1362
|
+
q = sql.select(schema.Table.id).where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
1367
1363
|
if for_update:
|
|
1368
1364
|
q = q.with_for_update()
|
|
1369
1365
|
result = [r[0] for r in conn.execute(q).all()]
|
|
@@ -1462,7 +1458,7 @@ class Catalog:
|
|
|
1462
1458
|
row = conn.execute(q).one_or_none()
|
|
1463
1459
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
1464
1460
|
|
|
1465
|
-
def _load_tbl(self, tbl_id: UUID) ->
|
|
1461
|
+
def _load_tbl(self, tbl_id: UUID) -> None:
|
|
1466
1462
|
"""Loads metadata for the table with the given id and caches it."""
|
|
1467
1463
|
_logger.info(f'Loading table {tbl_id}')
|
|
1468
1464
|
from .insertable_table import InsertableTable
|
|
@@ -1480,12 +1476,8 @@ class Catalog:
|
|
|
1480
1476
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1481
1477
|
.join(schema.TableSchemaVersion)
|
|
1482
1478
|
.where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
|
|
1483
|
-
# Table.md['current_schema_version'] == TableSchemaVersion.schema_version
|
|
1484
1479
|
.where(
|
|
1485
|
-
sql.
|
|
1486
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
1487
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1488
|
-
)
|
|
1480
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1489
1481
|
)
|
|
1490
1482
|
.where(schema.Table.id == tbl_id)
|
|
1491
1483
|
)
|
|
@@ -1501,8 +1493,8 @@ class Catalog:
|
|
|
1501
1493
|
if (tbl_id, None) not in self._tbl_versions:
|
|
1502
1494
|
_ = self._load_tbl_version(tbl_id, None)
|
|
1503
1495
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
1504
|
-
self._tbls[tbl_id] = tbl
|
|
1505
|
-
return
|
|
1496
|
+
self._tbls[tbl_id, None] = tbl
|
|
1497
|
+
return
|
|
1506
1498
|
|
|
1507
1499
|
# this is a view; determine the sequence of TableVersions to load
|
|
1508
1500
|
tbl_version_path: list[tuple[UUID, Optional[int]]] = []
|
|
@@ -1526,8 +1518,68 @@ class Catalog:
|
|
|
1526
1518
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
1527
1519
|
base_path = view_path
|
|
1528
1520
|
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1529
|
-
self._tbls[tbl_id] = view
|
|
1530
|
-
|
|
1521
|
+
self._tbls[tbl_id, None] = view
|
|
1522
|
+
|
|
1523
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> None:
|
|
1524
|
+
from .view import View
|
|
1525
|
+
|
|
1526
|
+
# Load the specified TableMd and TableVersionMd records from the db.
|
|
1527
|
+
conn = Env.get().conn
|
|
1528
|
+
q: sql.Executable = (
|
|
1529
|
+
sql.select(schema.Table, schema.TableVersion)
|
|
1530
|
+
.join(schema.TableVersion)
|
|
1531
|
+
.where(schema.Table.id == tbl_id)
|
|
1532
|
+
.where(schema.Table.id == schema.TableVersion.tbl_id)
|
|
1533
|
+
.where(schema.TableVersion.version == version)
|
|
1534
|
+
)
|
|
1535
|
+
row = conn.execute(q).one_or_none()
|
|
1536
|
+
if row is None:
|
|
1537
|
+
return None
|
|
1538
|
+
tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
|
|
1539
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1540
|
+
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1541
|
+
|
|
1542
|
+
# Reconstruct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
|
|
1543
|
+
# timestamps of this table and all its ancestors.
|
|
1544
|
+
# TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
|
|
1545
|
+
# (which might be nondeterministic in the future).
|
|
1546
|
+
|
|
1547
|
+
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1548
|
+
# For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
|
|
1549
|
+
# given TableVersion's created_at timestamp.
|
|
1550
|
+
ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
|
|
1551
|
+
if tbl_md.view_md is not None:
|
|
1552
|
+
for ancestor_id, _ in tbl_md.view_md.base_versions:
|
|
1553
|
+
q = (
|
|
1554
|
+
sql.select(schema.TableVersion)
|
|
1555
|
+
.where(schema.TableVersion.tbl_id == ancestor_id)
|
|
1556
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= version_md.created_at)
|
|
1557
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1558
|
+
.limit(1)
|
|
1559
|
+
)
|
|
1560
|
+
row = conn.execute(q).one_or_none()
|
|
1561
|
+
if row is None:
|
|
1562
|
+
# This can happen if an ancestor version is garbage collected; it can also happen in
|
|
1563
|
+
# rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
|
|
1564
|
+
_logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
|
|
1565
|
+
raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
|
|
1566
|
+
ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
|
|
1567
|
+
ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
|
|
1568
|
+
assert ancestor_version_md.created_at <= version_md.created_at
|
|
1569
|
+
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1570
|
+
|
|
1571
|
+
# Force any ancestors to be loaded (base table first).
|
|
1572
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1573
|
+
if (anc_id, anc_version) not in self._tbl_versions:
|
|
1574
|
+
_ = self._load_tbl_version(anc_id, anc_version)
|
|
1575
|
+
|
|
1576
|
+
# Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
|
|
1577
|
+
tvp: Optional[TableVersionPath] = None
|
|
1578
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1579
|
+
tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
|
|
1580
|
+
|
|
1581
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1582
|
+
self._tbls[tbl_id, version] = view
|
|
1531
1583
|
|
|
1532
1584
|
@retry_loop(for_write=False)
|
|
1533
1585
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
@@ -1547,8 +1599,7 @@ class Catalog:
|
|
|
1547
1599
|
.select_from(schema.TableVersion)
|
|
1548
1600
|
.join(
|
|
1549
1601
|
schema.TableSchemaVersion,
|
|
1550
|
-
|
|
1551
|
-
== schema.TableSchemaVersion.schema_version,
|
|
1602
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1552
1603
|
)
|
|
1553
1604
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1554
1605
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
@@ -1590,13 +1641,9 @@ class Catalog:
|
|
|
1590
1641
|
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
1591
1642
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
1592
1643
|
# WHERE t.id = tbl_id
|
|
1593
|
-
q = q.where(
|
|
1594
|
-
sql.
|
|
1595
|
-
|
|
1596
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
1597
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1598
|
-
)
|
|
1599
|
-
)
|
|
1644
|
+
q = q.where(
|
|
1645
|
+
schema.TableVersion.md['version'].cast(sql.Integer) == effective_version,
|
|
1646
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1600
1647
|
)
|
|
1601
1648
|
else:
|
|
1602
1649
|
# we are loading the current version
|
|
@@ -1606,17 +1653,8 @@ class Catalog:
|
|
|
1606
1653
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
1607
1654
|
# WHERE t.id = tbl_id
|
|
1608
1655
|
q = q.where(
|
|
1609
|
-
sql.
|
|
1610
|
-
|
|
1611
|
-
f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
|
|
1612
|
-
)
|
|
1613
|
-
).where(
|
|
1614
|
-
sql.text(
|
|
1615
|
-
(
|
|
1616
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
1617
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1618
|
-
)
|
|
1619
|
-
)
|
|
1656
|
+
schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
|
|
1657
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1620
1658
|
)
|
|
1621
1659
|
|
|
1622
1660
|
row = conn.execute(q).one_or_none()
|
|
@@ -1719,9 +1757,7 @@ class Catalog:
|
|
|
1719
1757
|
stmt = (
|
|
1720
1758
|
sql.update(schema.TableVersion)
|
|
1721
1759
|
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
1722
|
-
.values(
|
|
1723
|
-
md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
|
|
1724
|
-
)
|
|
1760
|
+
.values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
|
|
1725
1761
|
)
|
|
1726
1762
|
|
|
1727
1763
|
res = conn.execute(stmt)
|
|
@@ -1798,11 +1834,10 @@ class Catalog:
|
|
|
1798
1834
|
# This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
|
|
1799
1835
|
# TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
|
|
1800
1836
|
if effective_version is None and not tbl_md.is_replica:
|
|
1801
|
-
q =
|
|
1802
|
-
sql.
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
)
|
|
1837
|
+
q = (
|
|
1838
|
+
sql.select(schema.Table.id)
|
|
1839
|
+
.where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
1840
|
+
.where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
|
|
1806
1841
|
)
|
|
1807
1842
|
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
1808
1843
|
|
|
@@ -1839,9 +1874,9 @@ class Catalog:
|
|
|
1839
1874
|
version_md,
|
|
1840
1875
|
effective_version,
|
|
1841
1876
|
schema_version_md,
|
|
1877
|
+
mutable_views,
|
|
1842
1878
|
base_path=base_path,
|
|
1843
1879
|
base=base,
|
|
1844
|
-
mutable_views=mutable_views,
|
|
1845
1880
|
)
|
|
1846
1881
|
|
|
1847
1882
|
# register the instance before init()
|
pixeltable/catalog/path.py
CHANGED
|
@@ -1,20 +1,57 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Optional
|
|
5
5
|
|
|
6
6
|
from pixeltable import exceptions as excs
|
|
7
7
|
|
|
8
|
-
from .globals import
|
|
8
|
+
from .globals import is_valid_identifier
|
|
9
9
|
|
|
10
10
|
_logger = logging.getLogger('pixeltable')
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Path:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
components: list[str]
|
|
15
|
+
version: Optional[int]
|
|
16
|
+
|
|
17
|
+
def __init__(self, components: list[str], version: Optional[int] = None) -> None:
|
|
18
|
+
assert len(components) > 0
|
|
19
|
+
self.components = components
|
|
20
|
+
self.version = version
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def parse(
|
|
24
|
+
cls,
|
|
25
|
+
path: str,
|
|
26
|
+
allow_empty_path: bool = False,
|
|
27
|
+
allow_system_path: bool = False,
|
|
28
|
+
allow_versioned_path: bool = False,
|
|
29
|
+
) -> Path:
|
|
30
|
+
components: list[str]
|
|
31
|
+
version: Optional[int]
|
|
32
|
+
if ':' in path:
|
|
33
|
+
parts = path.split(':')
|
|
34
|
+
if len(parts) != 2:
|
|
35
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
36
|
+
try:
|
|
37
|
+
components = parts[0].split('.')
|
|
38
|
+
version = int(parts[1])
|
|
39
|
+
except ValueError:
|
|
40
|
+
raise excs.Error(f'Invalid path: {path}') from None
|
|
41
|
+
else:
|
|
42
|
+
components = path.split('.')
|
|
43
|
+
version = None
|
|
44
|
+
|
|
45
|
+
if components == [''] and not allow_empty_path:
|
|
46
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
47
|
+
|
|
48
|
+
if components != [''] and not all(is_valid_identifier(c, allow_system_path) for c in components):
|
|
49
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
50
|
+
|
|
51
|
+
if version is not None and not allow_versioned_path:
|
|
52
|
+
raise excs.Error(f'Versioned path not allowed here: {path}')
|
|
53
|
+
|
|
54
|
+
return Path(components, version)
|
|
18
55
|
|
|
19
56
|
@property
|
|
20
57
|
def len(self) -> int:
|
|
@@ -22,7 +59,6 @@ class Path:
|
|
|
22
59
|
|
|
23
60
|
@property
|
|
24
61
|
def name(self) -> str:
|
|
25
|
-
assert len(self.components) > 0
|
|
26
62
|
return self.components[-1]
|
|
27
63
|
|
|
28
64
|
@property
|
|
@@ -36,18 +72,15 @@ class Path:
|
|
|
36
72
|
@property
|
|
37
73
|
def parent(self) -> Path:
|
|
38
74
|
if len(self.components) == 1:
|
|
39
|
-
|
|
40
|
-
return self
|
|
41
|
-
else:
|
|
42
|
-
return Path('', empty_is_valid=True, allow_system_paths=True)
|
|
75
|
+
return ROOT_PATH # Includes the case of the root path, which is its own parent.
|
|
43
76
|
else:
|
|
44
|
-
return Path(
|
|
77
|
+
return Path(self.components[:-1])
|
|
45
78
|
|
|
46
79
|
def append(self, name: str) -> Path:
|
|
47
80
|
if self.is_root:
|
|
48
|
-
return Path(name
|
|
81
|
+
return Path([name])
|
|
49
82
|
else:
|
|
50
|
-
return Path(
|
|
83
|
+
return Path([*self.components, name])
|
|
51
84
|
|
|
52
85
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
53
86
|
"""
|
|
@@ -60,22 +93,25 @@ class Path:
|
|
|
60
93
|
is_prefix = self.components == other.components[: self.len]
|
|
61
94
|
return is_prefix and (self.len == (other.len - 1) or not is_parent)
|
|
62
95
|
|
|
63
|
-
def ancestors(self) ->
|
|
96
|
+
def ancestors(self) -> list[Path]:
|
|
64
97
|
"""
|
|
65
|
-
Return all ancestors of this path in top-down order including root.
|
|
98
|
+
Return all proper ancestors of this path in top-down order including root.
|
|
66
99
|
If this path is for the root directory, which has no parent, then None is returned.
|
|
67
100
|
"""
|
|
68
101
|
if self.is_root:
|
|
69
|
-
return
|
|
102
|
+
return []
|
|
70
103
|
else:
|
|
71
|
-
for i in range(
|
|
72
|
-
yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
|
|
104
|
+
return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
|
|
73
105
|
|
|
74
106
|
def __repr__(self) -> str:
|
|
75
107
|
return repr(str(self))
|
|
76
108
|
|
|
77
109
|
def __str__(self) -> str:
|
|
78
|
-
|
|
110
|
+
base = '.'.join(self.components)
|
|
111
|
+
if self.version is not None:
|
|
112
|
+
return f'{base}:{self.version}'
|
|
113
|
+
else:
|
|
114
|
+
return base
|
|
79
115
|
|
|
80
116
|
def __eq__(self, other: object) -> bool:
|
|
81
117
|
return isinstance(other, Path) and str(self) == str(other)
|
|
@@ -85,3 +121,6 @@ class Path:
|
|
|
85
121
|
|
|
86
122
|
def __lt__(self, other: Path) -> bool:
|
|
87
123
|
return str(self) < str(other)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
ROOT_PATH = Path([''])
|
|
@@ -18,6 +18,7 @@ class SchemaObject:
|
|
|
18
18
|
|
|
19
19
|
def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
|
|
20
20
|
# make these private so they don't collide with column names (id and name are fairly common)
|
|
21
|
+
assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
|
|
21
22
|
self._id = obj_id
|
|
22
23
|
self._name = name
|
|
23
24
|
self._dir_id = dir_id
|
pixeltable/catalog/table.py
CHANGED
|
@@ -89,6 +89,8 @@ class Table(SchemaObject):
|
|
|
89
89
|
|
|
90
90
|
```python
|
|
91
91
|
{
|
|
92
|
+
'name': 'my_table',
|
|
93
|
+
'path': 'my_dir.my_subdir.my_table',
|
|
92
94
|
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
93
95
|
'schema': {
|
|
94
96
|
'col1': StringType(),
|
|
@@ -96,6 +98,7 @@ class Table(SchemaObject):
|
|
|
96
98
|
},
|
|
97
99
|
'is_replica': False,
|
|
98
100
|
'version': 22,
|
|
101
|
+
'version_created': datetime.datetime(...),
|
|
99
102
|
'schema_version': 1,
|
|
100
103
|
'comment': '',
|
|
101
104
|
'num_retained_versions': 10,
|
|
@@ -112,6 +115,9 @@ class Table(SchemaObject):
|
|
|
112
115
|
md['schema'] = self._get_schema()
|
|
113
116
|
md['is_replica'] = self._tbl_version_path.is_replica()
|
|
114
117
|
md['version'] = self._get_version()
|
|
118
|
+
md['version_created'] = datetime.datetime.fromtimestamp(
|
|
119
|
+
self._tbl_version_path.tbl_version.get().created_at, tz=datetime.timezone.utc
|
|
120
|
+
)
|
|
115
121
|
md['schema_version'] = self._tbl_version_path.schema_version()
|
|
116
122
|
md['comment'] = self._get_comment()
|
|
117
123
|
md['num_retained_versions'] = self._get_num_retained_versions()
|
|
@@ -773,6 +773,7 @@ class TableVersion:
|
|
|
773
773
|
cols_to_add = list(cols)
|
|
774
774
|
row_count = self.store_tbl.count()
|
|
775
775
|
for col in cols_to_add:
|
|
776
|
+
assert col.tbl is self
|
|
776
777
|
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
777
778
|
raise excs.Error(
|
|
778
779
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
@@ -1325,7 +1326,7 @@ class TableVersion:
|
|
|
1325
1326
|
)
|
|
1326
1327
|
|
|
1327
1328
|
# delete newly-added data
|
|
1328
|
-
MediaStore.delete(self.id,
|
|
1329
|
+
MediaStore.delete(self.id, tbl_version=self.version)
|
|
1329
1330
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1330
1331
|
|
|
1331
1332
|
# revert new deletions
|
pixeltable/catalog/view.py
CHANGED
|
@@ -255,10 +255,20 @@ class View(Table):
|
|
|
255
255
|
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
256
256
|
)
|
|
257
257
|
|
|
258
|
+
def _is_anonymous_snapshot(self) -> bool:
|
|
259
|
+
"""
|
|
260
|
+
Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
|
|
261
|
+
"""
|
|
262
|
+
return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
|
|
263
|
+
|
|
258
264
|
def _get_metadata(self) -> dict[str, Any]:
|
|
259
265
|
md = super()._get_metadata()
|
|
260
266
|
md['is_view'] = True
|
|
261
267
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
268
|
+
if self._is_anonymous_snapshot():
|
|
269
|
+
# Update name and path with version qualifiers.
|
|
270
|
+
md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
|
|
271
|
+
md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
|
|
262
272
|
base_tbl = self._get_base_table()
|
|
263
273
|
if base_tbl is None:
|
|
264
274
|
md['base'] = None
|
|
@@ -284,21 +294,22 @@ class View(Table):
|
|
|
284
294
|
raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
|
|
285
295
|
|
|
286
296
|
def _get_base_table(self) -> Optional['Table']:
|
|
287
|
-
if self._tbl_version_path.
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
297
|
+
if self._tbl_version_path.tbl_id != self._id:
|
|
298
|
+
# _tbl_version_path represents a different schema object from this one. This can only happen if this is a
|
|
299
|
+
# named pure snapshot.
|
|
300
|
+
base_id = self._tbl_version_path.tbl_id
|
|
301
|
+
elif self._tbl_version_path.base is None:
|
|
302
|
+
return None
|
|
303
|
+
else:
|
|
304
|
+
base_id = self._tbl_version_path.base.tbl_id
|
|
305
|
+
with catalog.Catalog.get().begin_xact(tbl_id=base_id, for_write=False):
|
|
295
306
|
return catalog.Catalog.get().get_table_by_id(base_id)
|
|
296
307
|
|
|
297
308
|
@property
|
|
298
309
|
def _effective_base_versions(self) -> list[Optional[int]]:
|
|
299
310
|
effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
|
|
300
|
-
if self._snapshot_only:
|
|
301
|
-
return effective_versions
|
|
311
|
+
if self._snapshot_only and not self._is_anonymous_snapshot():
|
|
312
|
+
return effective_versions # Named pure snapshot
|
|
302
313
|
else:
|
|
303
314
|
return effective_versions[1:]
|
|
304
315
|
|
pixeltable/config.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Any, ClassVar, Optional, TypeVar
|
|
|
8
8
|
|
|
9
9
|
import toml
|
|
10
10
|
|
|
11
|
-
from pixeltable import exceptions as excs
|
|
11
|
+
from pixeltable import env, exceptions as excs
|
|
12
12
|
|
|
13
13
|
_logger = logging.getLogger('pixeltable')
|
|
14
14
|
|
|
@@ -82,7 +82,11 @@ class Config:
|
|
|
82
82
|
return cls.__instance
|
|
83
83
|
|
|
84
84
|
@classmethod
|
|
85
|
-
def init(cls, config_overrides: dict[str, Any]) -> None:
|
|
85
|
+
def init(cls, config_overrides: dict[str, Any], reinit: bool = False) -> None:
|
|
86
|
+
if reinit:
|
|
87
|
+
cls.__instance = None
|
|
88
|
+
for cl in env._registered_clients.values():
|
|
89
|
+
cl.client_obj = None
|
|
86
90
|
if cls.__instance is None:
|
|
87
91
|
cls.__instance = cls(config_overrides)
|
|
88
92
|
elif len(config_overrides) > 0:
|
|
@@ -102,7 +106,7 @@ class Config:
|
|
|
102
106
|
env_var = f'{section.upper()}_{key.upper()}'
|
|
103
107
|
if override_var in self.__config_overrides:
|
|
104
108
|
return self.__config_overrides[override_var]
|
|
105
|
-
if env_var in os.environ:
|
|
109
|
+
if env_var in os.environ and len(os.environ[env_var]) > 0:
|
|
106
110
|
return os.environ[env_var]
|
|
107
111
|
return default
|
|
108
112
|
|
|
@@ -157,7 +161,11 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
157
161
|
'groq': {'api_key': 'Groq API key'},
|
|
158
162
|
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
159
163
|
'mistral': {'api_key': 'Mistral API key'},
|
|
160
|
-
'openai': {
|
|
164
|
+
'openai': {
|
|
165
|
+
'api_key': 'OpenAI API key',
|
|
166
|
+
'base_url': 'OpenAI API base URL',
|
|
167
|
+
'api_version': 'API version if using Azure OpenAI',
|
|
168
|
+
},
|
|
161
169
|
'replicate': {'api_token': 'Replicate API token'},
|
|
162
170
|
'together': {'api_key': 'Together API key'},
|
|
163
171
|
'pypi': {'api_key': 'PyPI API key (for internal use only)'},
|