pixeltable 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +526 -197
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +9 -9
- pixeltable/catalog/schema_object.py +9 -4
- pixeltable/catalog/table.py +45 -53
- pixeltable/catalog/table_version.py +214 -155
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/tbl_ops.py +44 -0
- pixeltable/catalog/view.py +47 -60
- pixeltable/dataframe.py +18 -5
- pixeltable/env.py +21 -4
- pixeltable/exec/data_row_batch.py +3 -1
- pixeltable/exec/in_memory_data_node.py +6 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/functions/gemini.py +4 -4
- pixeltable/functions/openai.py +1 -2
- pixeltable/functions/video.py +2 -6
- pixeltable/globals.py +50 -25
- pixeltable/io/datarows.py +2 -1
- pixeltable/io/pandas.py +1 -0
- pixeltable/io/table_data_conduit.py +12 -13
- pixeltable/iterators/audio.py +17 -8
- pixeltable/iterators/image.py +5 -2
- pixeltable/metadata/schema.py +38 -1
- pixeltable/store.py +22 -1
- pixeltable/utils/media_store.py +11 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/RECORD +33 -32
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/dir.py
CHANGED
|
@@ -54,8 +54,8 @@ class InsertableTable(Table):
|
|
|
54
54
|
super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
|
|
55
55
|
self._tbl_version = tbl_version
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
def _display_name(self) -> str:
|
|
58
|
+
assert not self._tbl_version_path.is_replica()
|
|
59
59
|
return 'table'
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
@@ -75,10 +75,10 @@ class InsertableTable(Table):
|
|
|
75
75
|
column_names = [col.name for col in columns]
|
|
76
76
|
for pk_col in primary_key:
|
|
77
77
|
if pk_col not in column_names:
|
|
78
|
-
raise excs.Error(f'Primary key column {pk_col} not found in table schema')
|
|
78
|
+
raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
|
|
79
79
|
col = columns[column_names.index(pk_col)]
|
|
80
80
|
if col.col_type.nullable:
|
|
81
|
-
raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
|
|
81
|
+
raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
|
|
82
82
|
col.is_pk = True
|
|
83
83
|
|
|
84
84
|
_, tbl_version = TableVersion.create(
|
|
@@ -101,8 +101,8 @@ class InsertableTable(Table):
|
|
|
101
101
|
tbl_version.insert(None, df, fail_on_exception=True)
|
|
102
102
|
session.commit()
|
|
103
103
|
|
|
104
|
-
_logger.info(f'Created table
|
|
105
|
-
Env.get().console_logger.info(f'Created table
|
|
104
|
+
_logger.info(f'Created table {name!r}, id={tbl_version.id}')
|
|
105
|
+
Env.get().console_logger.info(f'Created table {name!r}.')
|
|
106
106
|
return tbl
|
|
107
107
|
|
|
108
108
|
def _get_metadata(self) -> dict[str, Any]:
|
|
@@ -204,9 +204,9 @@ class InsertableTable(Table):
|
|
|
204
204
|
|
|
205
205
|
for col_name, val in row.items():
|
|
206
206
|
if col_name not in valid_col_names:
|
|
207
|
-
raise excs.Error(f'Unknown column name {col_name} in row {row}')
|
|
207
|
+
raise excs.Error(f'Unknown column name {col_name!r} in row {row}')
|
|
208
208
|
if col_name in computed_col_names:
|
|
209
|
-
raise excs.Error(f'Value for computed column {col_name} in row {row}')
|
|
209
|
+
raise excs.Error(f'Value for computed column {col_name!r} in row {row}')
|
|
210
210
|
|
|
211
211
|
# validate data
|
|
212
212
|
col = self._tbl_version_path.get_column(col_name)
|
|
@@ -246,4 +246,4 @@ class InsertableTable(Table):
|
|
|
246
246
|
return []
|
|
247
247
|
|
|
248
248
|
def _table_descriptor(self) -> str:
|
|
249
|
-
return
|
|
249
|
+
return self._display_str()
|
|
@@ -42,22 +42,27 @@ class SchemaObject:
|
|
|
42
42
|
|
|
43
43
|
def get_metadata(self) -> dict[str, Any]:
|
|
44
44
|
"""Returns metadata associated with this schema object."""
|
|
45
|
-
from pixeltable.catalog import
|
|
45
|
+
from pixeltable.catalog import retry_loop
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
@retry_loop(for_write=False)
|
|
48
|
+
def op() -> dict[str, Any]:
|
|
48
49
|
return self._get_metadata()
|
|
49
50
|
|
|
51
|
+
return op()
|
|
52
|
+
|
|
50
53
|
def _get_metadata(self) -> dict[str, Any]:
|
|
51
54
|
return {'name': self._name, 'path': self._path()}
|
|
52
55
|
|
|
53
|
-
@classmethod
|
|
54
56
|
@abstractmethod
|
|
55
|
-
def _display_name(
|
|
57
|
+
def _display_name(self) -> str:
|
|
56
58
|
"""
|
|
57
59
|
Return name displayed in error messages.
|
|
58
60
|
"""
|
|
59
61
|
pass
|
|
60
62
|
|
|
63
|
+
def _display_str(self) -> str:
|
|
64
|
+
return f'{self._display_name()} {self._path()!r}'
|
|
65
|
+
|
|
61
66
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
62
67
|
"""Subclasses need to override this to make the change persistent"""
|
|
63
68
|
self._name = new_name
|
pixeltable/catalog/table.py
CHANGED
|
@@ -147,11 +147,15 @@ class Table(SchemaObject):
|
|
|
147
147
|
Returns:
|
|
148
148
|
A list of view paths.
|
|
149
149
|
"""
|
|
150
|
-
from pixeltable.catalog import
|
|
150
|
+
from pixeltable.catalog import retry_loop
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
# we need retry_loop() here, because we end up loading Tables for the views
|
|
153
|
+
@retry_loop(tbl=self._tbl_version_path, for_write=False)
|
|
154
|
+
def op() -> list[str]:
|
|
153
155
|
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
154
156
|
|
|
157
|
+
return op()
|
|
158
|
+
|
|
155
159
|
def _get_views(self, *, recursive: bool = True, include_snapshots: bool = True) -> list['Table']:
|
|
156
160
|
cat = catalog.Catalog.get()
|
|
157
161
|
view_ids = cat.get_view_ids(self._id)
|
|
@@ -178,7 +182,7 @@ class Table(SchemaObject):
|
|
|
178
182
|
"""
|
|
179
183
|
from pixeltable.catalog import Catalog
|
|
180
184
|
|
|
181
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
185
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
182
186
|
return self._df().select(*items, **named_items)
|
|
183
187
|
|
|
184
188
|
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
@@ -188,7 +192,7 @@ class Table(SchemaObject):
|
|
|
188
192
|
"""
|
|
189
193
|
from pixeltable.catalog import Catalog
|
|
190
194
|
|
|
191
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
195
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
192
196
|
return self._df().where(pred)
|
|
193
197
|
|
|
194
198
|
def join(
|
|
@@ -201,7 +205,7 @@ class Table(SchemaObject):
|
|
|
201
205
|
"""Join this table with another table."""
|
|
202
206
|
from pixeltable.catalog import Catalog
|
|
203
207
|
|
|
204
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
208
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
205
209
|
return self._df().join(other, on=on, how=how)
|
|
206
210
|
|
|
207
211
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
@@ -211,7 +215,7 @@ class Table(SchemaObject):
|
|
|
211
215
|
"""
|
|
212
216
|
from pixeltable.catalog import Catalog
|
|
213
217
|
|
|
214
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
218
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
215
219
|
return self._df().order_by(*items, asc=asc)
|
|
216
220
|
|
|
217
221
|
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
@@ -221,7 +225,7 @@ class Table(SchemaObject):
|
|
|
221
225
|
"""
|
|
222
226
|
from pixeltable.catalog import Catalog
|
|
223
227
|
|
|
224
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
228
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
225
229
|
return self._df().group_by(*items)
|
|
226
230
|
|
|
227
231
|
def distinct(self) -> 'pxt.DataFrame':
|
|
@@ -277,10 +281,7 @@ class Table(SchemaObject):
|
|
|
277
281
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
278
282
|
|
|
279
283
|
def get_base_table(self) -> Optional['Table']:
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
283
|
-
return self._get_base_table()
|
|
284
|
+
return self._get_base_table()
|
|
284
285
|
|
|
285
286
|
@abc.abstractmethod
|
|
286
287
|
def _get_base_table(self) -> Optional['Table']:
|
|
@@ -321,7 +322,7 @@ class Table(SchemaObject):
|
|
|
321
322
|
"""
|
|
322
323
|
from pixeltable.catalog import Catalog
|
|
323
324
|
|
|
324
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
325
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
325
326
|
helper = DescriptionHelper()
|
|
326
327
|
helper.append(self._table_descriptor())
|
|
327
328
|
helper.append(self._col_descriptor())
|
|
@@ -492,8 +493,7 @@ class Table(SchemaObject):
|
|
|
492
493
|
|
|
493
494
|
# lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
|
|
494
495
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
495
|
-
|
|
496
|
-
raise excs.Error('Cannot add column to a snapshot.')
|
|
496
|
+
self.__check_mutable('add columns to')
|
|
497
497
|
col_schema = {
|
|
498
498
|
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
499
499
|
for col_name, spec in schema.items()
|
|
@@ -553,24 +553,18 @@ class Table(SchemaObject):
|
|
|
553
553
|
|
|
554
554
|
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
555
555
|
"""
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
col_type = next(iter(kwargs.values()))
|
|
569
|
-
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
570
|
-
raise excs.Error(
|
|
571
|
-
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
572
|
-
)
|
|
573
|
-
return self.add_columns(kwargs, if_exists=if_exists)
|
|
556
|
+
# verify kwargs and construct column schema dict
|
|
557
|
+
if len(kwargs) != 1:
|
|
558
|
+
raise excs.Error(
|
|
559
|
+
f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
|
|
560
|
+
f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
|
|
561
|
+
)
|
|
562
|
+
col_type = next(iter(kwargs.values()))
|
|
563
|
+
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
564
|
+
raise excs.Error(
|
|
565
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
566
|
+
)
|
|
567
|
+
return self.add_columns(kwargs, if_exists=if_exists)
|
|
574
568
|
|
|
575
569
|
def add_computed_column(
|
|
576
570
|
self,
|
|
@@ -622,8 +616,7 @@ class Table(SchemaObject):
|
|
|
622
616
|
from pixeltable.catalog import Catalog
|
|
623
617
|
|
|
624
618
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
625
|
-
|
|
626
|
-
raise excs.Error('Cannot add column to a snapshot.')
|
|
619
|
+
self.__check_mutable('add columns to')
|
|
627
620
|
if len(kwargs) != 1:
|
|
628
621
|
raise excs.Error(
|
|
629
622
|
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
@@ -808,10 +801,10 @@ class Table(SchemaObject):
|
|
|
808
801
|
from pixeltable.catalog import Catalog
|
|
809
802
|
|
|
810
803
|
cat = Catalog.get()
|
|
804
|
+
|
|
811
805
|
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
812
806
|
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
813
|
-
|
|
814
|
-
raise excs.Error('Cannot drop column from a snapshot.')
|
|
807
|
+
self.__check_mutable('drop columns from')
|
|
815
808
|
col: Column = None
|
|
816
809
|
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
817
810
|
|
|
@@ -835,7 +828,7 @@ class Table(SchemaObject):
|
|
|
835
828
|
dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
|
|
836
829
|
if len(dependent_user_cols) > 0:
|
|
837
830
|
raise excs.Error(
|
|
838
|
-
f'Cannot drop column
|
|
831
|
+
f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
|
|
839
832
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
840
833
|
)
|
|
841
834
|
|
|
@@ -989,8 +982,7 @@ class Table(SchemaObject):
|
|
|
989
982
|
from pixeltable.catalog import Catalog
|
|
990
983
|
|
|
991
984
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
992
|
-
|
|
993
|
-
raise excs.Error('Cannot add an index to a snapshot')
|
|
985
|
+
self.__check_mutable('add an index to')
|
|
994
986
|
col = self._resolve_column_parameter(column)
|
|
995
987
|
|
|
996
988
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
@@ -1174,8 +1166,7 @@ class Table(SchemaObject):
|
|
|
1174
1166
|
) -> None:
|
|
1175
1167
|
from pixeltable.catalog import Catalog
|
|
1176
1168
|
|
|
1177
|
-
|
|
1178
|
-
raise excs.Error('Cannot drop an index from a snapshot')
|
|
1169
|
+
self.__check_mutable('drop an index from')
|
|
1179
1170
|
assert (col is None) != (idx_name is None)
|
|
1180
1171
|
|
|
1181
1172
|
if idx_name is not None:
|
|
@@ -1347,8 +1338,7 @@ class Table(SchemaObject):
|
|
|
1347
1338
|
from pixeltable.catalog import Catalog
|
|
1348
1339
|
|
|
1349
1340
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1350
|
-
|
|
1351
|
-
raise excs.Error('Cannot update a snapshot')
|
|
1341
|
+
self.__check_mutable('update')
|
|
1352
1342
|
result = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1353
1343
|
FileCache.get().emit_eviction_warnings()
|
|
1354
1344
|
return result
|
|
@@ -1387,8 +1377,7 @@ class Table(SchemaObject):
|
|
|
1387
1377
|
from pixeltable.catalog import Catalog
|
|
1388
1378
|
|
|
1389
1379
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1390
|
-
|
|
1391
|
-
raise excs.Error('Cannot update a snapshot')
|
|
1380
|
+
self.__check_mutable('update')
|
|
1392
1381
|
rows = list(rows)
|
|
1393
1382
|
|
|
1394
1383
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
@@ -1456,8 +1445,7 @@ class Table(SchemaObject):
|
|
|
1456
1445
|
cat = Catalog.get()
|
|
1457
1446
|
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
1458
1447
|
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1459
|
-
|
|
1460
|
-
raise excs.Error('Cannot recompute columns of a snapshot.')
|
|
1448
|
+
self.__check_mutable('recompute columns of')
|
|
1461
1449
|
if len(columns) == 0:
|
|
1462
1450
|
raise excs.Error('At least one column must be specified to recompute')
|
|
1463
1451
|
if errors_only and len(columns) > 1:
|
|
@@ -1514,8 +1502,7 @@ class Table(SchemaObject):
|
|
|
1514
1502
|
from pixeltable.catalog import Catalog
|
|
1515
1503
|
|
|
1516
1504
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1517
|
-
|
|
1518
|
-
raise excs.Error('Cannot revert a snapshot')
|
|
1505
|
+
self.__check_mutable('revert')
|
|
1519
1506
|
self._tbl_version.get().revert()
|
|
1520
1507
|
# remove cached md in order to force a reload on the next operation
|
|
1521
1508
|
self._tbl_version_path.clear_cached_md()
|
|
@@ -1530,8 +1517,7 @@ class Table(SchemaObject):
|
|
|
1530
1517
|
from pixeltable.catalog import Catalog
|
|
1531
1518
|
|
|
1532
1519
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1533
|
-
|
|
1534
|
-
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1520
|
+
self.__check_mutable('link an external store to')
|
|
1535
1521
|
if store.name in self.external_stores():
|
|
1536
1522
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1537
1523
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
@@ -1560,7 +1546,7 @@ class Table(SchemaObject):
|
|
|
1560
1546
|
"""
|
|
1561
1547
|
from pixeltable.catalog import Catalog
|
|
1562
1548
|
|
|
1563
|
-
if self._tbl_version_path.
|
|
1549
|
+
if not self._tbl_version_path.is_mutable():
|
|
1564
1550
|
return
|
|
1565
1551
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1566
1552
|
all_stores = self.external_stores()
|
|
@@ -1600,7 +1586,7 @@ class Table(SchemaObject):
|
|
|
1600
1586
|
"""
|
|
1601
1587
|
from pixeltable.catalog import Catalog
|
|
1602
1588
|
|
|
1603
|
-
if self._tbl_version_path.
|
|
1589
|
+
if not self._tbl_version_path.is_mutable():
|
|
1604
1590
|
return UpdateStatus()
|
|
1605
1591
|
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1606
1592
|
# have their updates propagated down the tree
|
|
@@ -1711,3 +1697,9 @@ class Table(SchemaObject):
|
|
|
1711
1697
|
report_lines.append(report_line)
|
|
1712
1698
|
|
|
1713
1699
|
return pxt.dataframe.DataFrameResultSet(report_lines, self._REPORT_SCHEMA)
|
|
1700
|
+
|
|
1701
|
+
def __check_mutable(self, op_descr: str) -> None:
|
|
1702
|
+
if self._tbl_version_path.is_snapshot():
|
|
1703
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|
|
1704
|
+
if self._tbl_version_path.is_replica():
|
|
1705
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
|