pixeltable 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/table.py +247 -83
- pixeltable/catalog/view.py +5 -2
- pixeltable/dataframe.py +240 -92
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +6 -7
- pixeltable/exec/sql_node.py +91 -44
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +1 -1
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +29 -2
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/expr.py +11 -5
- pixeltable/exprs/expr_set.py +8 -0
- pixeltable/exprs/function_call.py +14 -11
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/exprs/method_ref.py +1 -1
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/exprs/similarity_expr.py +4 -1
- pixeltable/exprs/sql_element_cache.py +4 -0
- pixeltable/exprs/type_cast.py +2 -2
- pixeltable/exprs/variable.py +3 -0
- pixeltable/func/expr_template_function.py +3 -0
- pixeltable/func/function.py +37 -1
- pixeltable/func/signature.py +1 -0
- pixeltable/functions/mistralai.py +0 -2
- pixeltable/functions/ollama.py +4 -4
- pixeltable/globals.py +32 -18
- pixeltable/index/embedding_index.py +6 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/parquet.py +39 -19
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/image.py +100 -0
- pixeltable/iterators/video.py +7 -8
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_22.py +17 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +129 -51
- pixeltable/store.py +1 -1
- pixeltable/tool/create_test_db_dump.py +4 -1
- pixeltable/type_system.py +1 -1
- pixeltable/utils/arrow.py +8 -3
- pixeltable/utils/description_helper.py +89 -0
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/METADATA +28 -12
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/RECORD +54 -51
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/WHEEL +1 -1
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.26"
|
|
3
|
+
__version_tuple__ = (0, 2, 26)
|
pixeltable/catalog/table.py
CHANGED
|
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Se
|
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
13
|
-
import pandas.io.formats.style
|
|
14
13
|
import sqlalchemy as sql
|
|
15
14
|
|
|
16
15
|
import pixeltable as pxt
|
|
@@ -21,16 +20,19 @@ import pixeltable.exprs as exprs
|
|
|
21
20
|
import pixeltable.index as index
|
|
22
21
|
import pixeltable.metadata.schema as schema
|
|
23
22
|
import pixeltable.type_system as ts
|
|
24
|
-
from pixeltable.utils.filecache import FileCache
|
|
25
23
|
|
|
24
|
+
from ..exprs import ColumnRef
|
|
25
|
+
from ..utils.description_helper import DescriptionHelper
|
|
26
|
+
from ..utils.filecache import FileCache
|
|
26
27
|
from .column import Column
|
|
27
|
-
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
28
|
+
from .globals import _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
28
29
|
from .schema_object import SchemaObject
|
|
29
30
|
from .table_version import TableVersion
|
|
30
31
|
from .table_version_path import TableVersionPath
|
|
31
32
|
|
|
32
33
|
if TYPE_CHECKING:
|
|
33
34
|
import torch.utils.data
|
|
35
|
+
import pixeltable.plan
|
|
34
36
|
|
|
35
37
|
_logger = logging.getLogger('pixeltable')
|
|
36
38
|
|
|
@@ -45,7 +47,7 @@ class Table(SchemaObject):
|
|
|
45
47
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
46
48
|
super().__init__(id, name, dir_id)
|
|
47
49
|
self._is_dropped = False
|
|
48
|
-
self.
|
|
50
|
+
self.__tbl_version_path = tbl_version_path
|
|
49
51
|
self.__query_scope = self.QueryScope(self)
|
|
50
52
|
|
|
51
53
|
class QueryScope:
|
|
@@ -62,6 +64,7 @@ class Table(SchemaObject):
|
|
|
62
64
|
raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
|
|
63
65
|
|
|
64
66
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
67
|
+
self._check_is_dropped()
|
|
65
68
|
super()._move(new_name, new_dir_id)
|
|
66
69
|
with env.Env.get().engine.begin() as conn:
|
|
67
70
|
stmt = sql.text((
|
|
@@ -95,6 +98,7 @@ class Table(SchemaObject):
|
|
|
95
98
|
}
|
|
96
99
|
```
|
|
97
100
|
"""
|
|
101
|
+
self._check_is_dropped()
|
|
98
102
|
md = super().get_metadata()
|
|
99
103
|
md['base'] = self._base._path if self._base is not None else None
|
|
100
104
|
md['schema'] = self._schema
|
|
@@ -115,6 +119,12 @@ class Table(SchemaObject):
|
|
|
115
119
|
"""Return TableVersion for just this table."""
|
|
116
120
|
return self._tbl_version_path.tbl_version
|
|
117
121
|
|
|
122
|
+
@property
|
|
123
|
+
def _tbl_version_path(self) -> TableVersionPath:
|
|
124
|
+
"""Return TableVersionPath for just this table."""
|
|
125
|
+
self._check_is_dropped()
|
|
126
|
+
return self.__tbl_version_path
|
|
127
|
+
|
|
118
128
|
def __hash__(self) -> int:
|
|
119
129
|
return hash(self._tbl_version.id)
|
|
120
130
|
|
|
@@ -152,6 +162,7 @@ class Table(SchemaObject):
|
|
|
152
162
|
Returns:
|
|
153
163
|
A list of view paths.
|
|
154
164
|
"""
|
|
165
|
+
self._check_is_dropped()
|
|
155
166
|
return [t._path for t in self._get_views(recursive=recursive)]
|
|
156
167
|
|
|
157
168
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
@@ -165,7 +176,8 @@ class Table(SchemaObject):
|
|
|
165
176
|
"""Return a DataFrame for this table.
|
|
166
177
|
"""
|
|
167
178
|
# local import: avoid circular imports
|
|
168
|
-
|
|
179
|
+
from pixeltable.plan import FromClause
|
|
180
|
+
return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
|
|
169
181
|
|
|
170
182
|
@property
|
|
171
183
|
def queries(self) -> 'Table.QueryScope':
|
|
@@ -179,6 +191,13 @@ class Table(SchemaObject):
|
|
|
179
191
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
180
192
|
return self._df().where(pred)
|
|
181
193
|
|
|
194
|
+
def join(
|
|
195
|
+
self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
|
|
196
|
+
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
|
|
197
|
+
) -> 'pxt.DataFrame':
|
|
198
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
199
|
+
return self._df().join(other, on=on, how=how)
|
|
200
|
+
|
|
182
201
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
183
202
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
184
203
|
return self._df().order_by(*items, asc=asc)
|
|
@@ -244,6 +263,18 @@ class Table(SchemaObject):
|
|
|
244
263
|
base_id = self._tbl_version_path.base.tbl_version.id
|
|
245
264
|
return catalog.Catalog.get().tbls[base_id]
|
|
246
265
|
|
|
266
|
+
@property
|
|
267
|
+
def _bases(self) -> list['Table']:
|
|
268
|
+
"""
|
|
269
|
+
The ancestor list of bases of this table, starting with its immediate base.
|
|
270
|
+
"""
|
|
271
|
+
bases = []
|
|
272
|
+
base = self._base
|
|
273
|
+
while base is not None:
|
|
274
|
+
bases.append(base)
|
|
275
|
+
base = base._base
|
|
276
|
+
return bases
|
|
277
|
+
|
|
247
278
|
@property
|
|
248
279
|
def _comment(self) -> str:
|
|
249
280
|
return self._tbl_version.comment
|
|
@@ -256,48 +287,103 @@ class Table(SchemaObject):
|
|
|
256
287
|
def _media_validation(self) -> MediaValidation:
|
|
257
288
|
return self._tbl_version.media_validation
|
|
258
289
|
|
|
259
|
-
def
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
.
|
|
275
|
-
|
|
290
|
+
def __repr__(self) -> str:
|
|
291
|
+
return self._descriptors().to_string()
|
|
292
|
+
|
|
293
|
+
def _repr_html_(self) -> str:
|
|
294
|
+
return self._descriptors().to_html()
|
|
295
|
+
|
|
296
|
+
def _descriptors(self) -> DescriptionHelper:
|
|
297
|
+
"""
|
|
298
|
+
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
299
|
+
"""
|
|
300
|
+
helper = DescriptionHelper()
|
|
301
|
+
helper.append(self._title_descriptor())
|
|
302
|
+
helper.append(self._col_descriptor())
|
|
303
|
+
idxs = self._index_descriptor()
|
|
304
|
+
if not idxs.empty:
|
|
305
|
+
helper.append(idxs)
|
|
306
|
+
stores = self._external_store_descriptor()
|
|
307
|
+
if not stores.empty:
|
|
308
|
+
helper.append(stores)
|
|
309
|
+
if self._comment:
|
|
310
|
+
helper.append(f'COMMENT: {self._comment}')
|
|
311
|
+
return helper
|
|
312
|
+
|
|
313
|
+
def _title_descriptor(self) -> str:
|
|
314
|
+
title: str
|
|
315
|
+
if self._base is None:
|
|
316
|
+
title = f'Table\n{self._path!r}'
|
|
317
|
+
else:
|
|
318
|
+
title = f'View\n{self._path!r}'
|
|
319
|
+
title += f'\n(of {self.__bases_to_desc()})'
|
|
320
|
+
return title
|
|
321
|
+
|
|
322
|
+
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
323
|
+
return pd.DataFrame(
|
|
324
|
+
{
|
|
325
|
+
'Column Name': col.name,
|
|
326
|
+
'Type': col.col_type._to_str(as_schema=True),
|
|
327
|
+
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
|
|
328
|
+
}
|
|
329
|
+
for col in self.__tbl_version_path.columns()
|
|
330
|
+
if columns is None or col.name in columns
|
|
276
331
|
)
|
|
277
332
|
|
|
333
|
+
def __bases_to_desc(self) -> str:
|
|
334
|
+
bases = self._bases
|
|
335
|
+
assert len(bases) >= 1
|
|
336
|
+
if len(bases) <= 2:
|
|
337
|
+
return ', '.join(repr(b._path) for b in bases)
|
|
338
|
+
else:
|
|
339
|
+
return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
|
|
340
|
+
|
|
341
|
+
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
342
|
+
from pixeltable import index
|
|
343
|
+
|
|
344
|
+
pd_rows = []
|
|
345
|
+
for name, info in self._tbl_version.idxs_by_name.items():
|
|
346
|
+
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
347
|
+
display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
|
|
348
|
+
if info.idx.string_embed is not None and info.idx.image_embed is not None:
|
|
349
|
+
embed_str = f'{display_embed} (+1)'
|
|
350
|
+
else:
|
|
351
|
+
embed_str = str(display_embed)
|
|
352
|
+
row = {
|
|
353
|
+
'Index Name': name,
|
|
354
|
+
'Column': info.col.name,
|
|
355
|
+
'Metric': str(info.idx.metric.name.lower()),
|
|
356
|
+
'Embedding': embed_str,
|
|
357
|
+
}
|
|
358
|
+
pd_rows.append(row)
|
|
359
|
+
return pd.DataFrame(pd_rows)
|
|
360
|
+
|
|
361
|
+
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
362
|
+
pd_rows = []
|
|
363
|
+
for name, store in self._tbl_version.external_stores.items():
|
|
364
|
+
row = {
|
|
365
|
+
'External Store': name,
|
|
366
|
+
'Type': type(store).__name__,
|
|
367
|
+
}
|
|
368
|
+
pd_rows.append(row)
|
|
369
|
+
return pd.DataFrame(pd_rows)
|
|
370
|
+
|
|
278
371
|
def describe(self) -> None:
|
|
279
372
|
"""
|
|
280
373
|
Print the table schema.
|
|
281
374
|
"""
|
|
375
|
+
self._check_is_dropped()
|
|
282
376
|
if getattr(builtins, '__IPYTHON__', False):
|
|
283
377
|
from IPython.display import display
|
|
284
|
-
display(self.
|
|
378
|
+
display(self._repr_html_())
|
|
285
379
|
else:
|
|
286
380
|
print(repr(self))
|
|
287
381
|
|
|
288
|
-
# TODO: Display comments in _repr_html()
|
|
289
|
-
def __repr__(self) -> str:
|
|
290
|
-
description_str = self._description().to_string(index=False)
|
|
291
|
-
if self._comment is None:
|
|
292
|
-
comment = ''
|
|
293
|
-
else:
|
|
294
|
-
comment = f'{self._comment}\n'
|
|
295
|
-
return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
296
|
-
|
|
297
|
-
def _repr_html_(self) -> str:
|
|
298
|
-
return self._description_html()._repr_html_() # type: ignore[attr-defined]
|
|
299
|
-
|
|
300
382
|
def _drop(self) -> None:
|
|
383
|
+
cat = catalog.Catalog.get()
|
|
384
|
+
# verify all dependents are deleted by now
|
|
385
|
+
for dep in cat.tbl_dependents[self._id]:
|
|
386
|
+
assert dep._is_dropped
|
|
301
387
|
self._check_is_dropped()
|
|
302
388
|
self._tbl_version.drop()
|
|
303
389
|
self._is_dropped = True
|
|
@@ -331,6 +417,7 @@ class Table(SchemaObject):
|
|
|
331
417
|
|
|
332
418
|
For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
|
|
333
419
|
"""
|
|
420
|
+
self._check_is_dropped()
|
|
334
421
|
if not isinstance(col_name, str):
|
|
335
422
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
336
423
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
@@ -598,31 +685,49 @@ class Table(SchemaObject):
|
|
|
598
685
|
cls._verify_column(col, column_names)
|
|
599
686
|
column_names.add(col.name)
|
|
600
687
|
|
|
601
|
-
def
|
|
688
|
+
def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
|
|
689
|
+
col = self._tbl_version_path.get_column(column_name, include_bases)
|
|
690
|
+
if col is None:
|
|
691
|
+
raise excs.Error(f'Column {column_name!r} unknown')
|
|
692
|
+
|
|
693
|
+
def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
|
|
694
|
+
exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
|
|
695
|
+
if not exists:
|
|
696
|
+
raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
|
|
697
|
+
|
|
698
|
+
def drop_column(self, column: Union[str, ColumnRef]) -> None:
|
|
602
699
|
"""Drop a column from the table.
|
|
603
700
|
|
|
604
701
|
Args:
|
|
605
|
-
|
|
702
|
+
column: The name or reference of the column to drop.
|
|
606
703
|
|
|
607
704
|
Raises:
|
|
608
705
|
Error: If the column does not exist or if it is referenced by a dependent computed column.
|
|
609
706
|
|
|
610
707
|
Examples:
|
|
611
|
-
Drop the column `col` from the table `my_table
|
|
708
|
+
Drop the column `col` from the table `my_table` by column name:
|
|
612
709
|
|
|
613
710
|
>>> tbl = pxt.get_table('my_table')
|
|
614
711
|
... tbl.drop_column('col')
|
|
712
|
+
|
|
713
|
+
Drop the column `col` from the table `my_table` by column reference:
|
|
714
|
+
|
|
715
|
+
>>> tbl = pxt.get_table('my_table')
|
|
716
|
+
... tbl.drop_column(tbl.col)
|
|
615
717
|
"""
|
|
616
718
|
self._check_is_dropped()
|
|
617
|
-
|
|
618
|
-
if
|
|
619
|
-
|
|
620
|
-
|
|
719
|
+
col: Column = None
|
|
720
|
+
if isinstance(column, str):
|
|
721
|
+
self.__check_column_name_exists(column)
|
|
722
|
+
col = self._tbl_version.cols_by_name[column]
|
|
723
|
+
else:
|
|
724
|
+
self.__check_column_ref_exists(column)
|
|
725
|
+
col = column.col
|
|
621
726
|
|
|
622
727
|
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
623
728
|
if len(dependent_user_cols) > 0:
|
|
624
729
|
raise excs.Error(
|
|
625
|
-
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
730
|
+
f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
|
|
626
731
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
627
732
|
)
|
|
628
733
|
|
|
@@ -640,7 +745,7 @@ class Table(SchemaObject):
|
|
|
640
745
|
for view, store in dependent_stores
|
|
641
746
|
]
|
|
642
747
|
raise excs.Error(
|
|
643
|
-
f'Cannot drop column `{name}` because the following external stores depend on it:\n'
|
|
748
|
+
f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
|
|
644
749
|
f'{", ".join(dependent_store_names)}'
|
|
645
750
|
)
|
|
646
751
|
|
|
@@ -662,11 +767,10 @@ class Table(SchemaObject):
|
|
|
662
767
|
>>> tbl = pxt.get_table('my_table')
|
|
663
768
|
... tbl.rename_column('col1', 'col2')
|
|
664
769
|
"""
|
|
665
|
-
self._check_is_dropped()
|
|
666
770
|
self._tbl_version.rename_column(old_name, new_name)
|
|
667
771
|
|
|
668
772
|
def add_embedding_index(
|
|
669
|
-
self,
|
|
773
|
+
self, column: Union[str, ColumnRef], *, idx_name: Optional[str] = None,
|
|
670
774
|
string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
|
|
671
775
|
metric: str = 'cosine'
|
|
672
776
|
) -> None:
|
|
@@ -680,7 +784,7 @@ class Table(SchemaObject):
|
|
|
680
784
|
of text over an image column.
|
|
681
785
|
|
|
682
786
|
Args:
|
|
683
|
-
|
|
787
|
+
column: The name of, or reference to, the column to index; must be a `String` or `Image` column.
|
|
684
788
|
idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
|
|
685
789
|
If specified, the name must be unique for this table.
|
|
686
790
|
string_embed: A function to embed text; required if the column is a `String` column.
|
|
@@ -692,11 +796,15 @@ class Table(SchemaObject):
|
|
|
692
796
|
Error: If an index with that name already exists for the table, or if the specified column does not exist.
|
|
693
797
|
|
|
694
798
|
Examples:
|
|
695
|
-
Add an index to the `img` column of the table `my_table
|
|
799
|
+
Add an index to the `img` column of the table `my_table` by column name:
|
|
696
800
|
|
|
697
801
|
>>> tbl = pxt.get_table('my_table')
|
|
698
802
|
... tbl.add_embedding_index('img', image_embed=my_image_func)
|
|
699
803
|
|
|
804
|
+
Add an index to the `img` column of the table `my_table` by column reference:
|
|
805
|
+
>>> tbl = pxt.get_table('my_table')
|
|
806
|
+
... tbl.add_embedding_index(tbl.img, image_embed=my_image_func)
|
|
807
|
+
|
|
700
808
|
Add another index to the `img` column, using the inner product as the distance metric,
|
|
701
809
|
and with a specific name; `string_embed` is also specified in order to search with text:
|
|
702
810
|
|
|
@@ -707,13 +815,27 @@ class Table(SchemaObject):
|
|
|
707
815
|
... string_embed=my_string_func,
|
|
708
816
|
... metric='ip'
|
|
709
817
|
... )
|
|
818
|
+
|
|
819
|
+
Alternatively:
|
|
820
|
+
|
|
821
|
+
>>> tbl.add_embedding_index(
|
|
822
|
+
... tbl.img,
|
|
823
|
+
... idx_name='clip_idx',
|
|
824
|
+
... image_embed=my_image_func,
|
|
825
|
+
... string_embed=my_string_func,
|
|
826
|
+
... metric='ip'
|
|
827
|
+
... )
|
|
710
828
|
"""
|
|
711
829
|
if self._tbl_version_path.is_snapshot():
|
|
712
830
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
831
|
+
col: Column
|
|
832
|
+
if isinstance(column, str):
|
|
833
|
+
self.__check_column_name_exists(column, include_bases=True)
|
|
834
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
835
|
+
else:
|
|
836
|
+
self.__check_column_ref_exists(column, include_bases=True)
|
|
837
|
+
col = column.col
|
|
838
|
+
|
|
717
839
|
if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
|
|
718
840
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
719
841
|
from pixeltable.index import EmbeddingIndex
|
|
@@ -724,82 +846,128 @@ class Table(SchemaObject):
|
|
|
724
846
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
725
847
|
FileCache.get().emit_eviction_warnings()
|
|
726
848
|
|
|
727
|
-
def drop_embedding_index(
|
|
849
|
+
def drop_embedding_index(
|
|
850
|
+
self, *,
|
|
851
|
+
column: Union[str, ColumnRef, None] = None,
|
|
852
|
+
idx_name: Optional[str] = None) -> None:
|
|
728
853
|
"""
|
|
729
854
|
Drop an embedding index from the table. Either a column name or an index name (but not both) must be
|
|
730
|
-
specified. If a column name is specified, it must be a column containing exactly one
|
|
731
|
-
otherwise the specific index name must be provided instead.
|
|
855
|
+
specified. If a column name or reference is specified, it must be a column containing exactly one
|
|
856
|
+
embedding index; otherwise the specific index name must be provided instead.
|
|
732
857
|
|
|
733
858
|
Args:
|
|
734
|
-
|
|
735
|
-
|
|
859
|
+
column: The name of, or reference to, the column from which to drop the index.
|
|
860
|
+
The column must have only one embedding index.
|
|
736
861
|
idx_name: The name of the index to drop.
|
|
737
862
|
|
|
738
863
|
Raises:
|
|
739
|
-
Error: If `
|
|
864
|
+
Error: If `column` is specified, but the column does not exist, or it contains no embedding
|
|
740
865
|
indices or multiple embedding indices.
|
|
741
866
|
Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
|
|
742
867
|
|
|
743
868
|
Examples:
|
|
744
|
-
Drop the embedding index on the `img` column of the table `my_table
|
|
869
|
+
Drop the embedding index on the `img` column of the table `my_table` by column name:
|
|
745
870
|
|
|
746
871
|
>>> tbl = pxt.get_table('my_table')
|
|
747
|
-
... tbl.drop_embedding_index(
|
|
748
|
-
|
|
749
|
-
|
|
872
|
+
... tbl.drop_embedding_index(column='img')
|
|
873
|
+
|
|
874
|
+
Drop the embedding index on the `img` column of the table `my_table` by column reference:
|
|
875
|
+
|
|
876
|
+
>>> tbl = pxt.get_table('my_table')
|
|
877
|
+
... tbl.drop_embedding_index(column=tbl.img)
|
|
878
|
+
|
|
879
|
+
Drop the embedding index `idx1` of the table `my_table` by index name:
|
|
880
|
+
>>> tbl = pxt.get_table('my_table')
|
|
881
|
+
... tbl.drop_embedding_index(idx_name='idx1')
|
|
750
882
|
|
|
751
|
-
|
|
883
|
+
"""
|
|
884
|
+
if (column is None) == (idx_name is None):
|
|
885
|
+
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
886
|
+
|
|
887
|
+
col: Column = None
|
|
888
|
+
if idx_name is None:
|
|
889
|
+
if isinstance(column, str):
|
|
890
|
+
self.__check_column_name_exists(column, include_bases=True)
|
|
891
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
892
|
+
else:
|
|
893
|
+
self.__check_column_ref_exists(column, include_bases=True)
|
|
894
|
+
col = column.col
|
|
895
|
+
assert col is not None
|
|
896
|
+
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
|
|
897
|
+
|
|
898
|
+
def drop_index(
|
|
899
|
+
self, *,
|
|
900
|
+
column: Union[str, ColumnRef, None] = None,
|
|
901
|
+
idx_name: Optional[str] = None) -> None:
|
|
752
902
|
"""
|
|
753
903
|
Drop an index from the table. Either a column name or an index name (but not both) must be
|
|
754
|
-
specified. If a column name is specified, it must be a column containing exactly one index;
|
|
904
|
+
specified. If a column name or reference is specified, it must be a column containing exactly one index;
|
|
755
905
|
otherwise the specific index name must be provided instead.
|
|
756
906
|
|
|
757
907
|
Args:
|
|
758
|
-
|
|
759
|
-
|
|
908
|
+
column: The name of, or reference to, the column from which to drop the index.
|
|
909
|
+
The column must have only one embedding index.
|
|
760
910
|
idx_name: The name of the index to drop.
|
|
761
911
|
|
|
762
912
|
Raises:
|
|
763
|
-
Error: If `
|
|
913
|
+
Error: If `column` is specified, but the column does not exist, or it contains no
|
|
764
914
|
indices or multiple indices.
|
|
765
915
|
Error: If `idx_name` is specified, but the index does not exist.
|
|
766
916
|
|
|
767
917
|
Examples:
|
|
768
|
-
Drop the index on the `img` column of the table `my_table
|
|
918
|
+
Drop the index on the `img` column of the table `my_table` by column name:
|
|
769
919
|
|
|
770
920
|
>>> tbl = pxt.get_table('my_table')
|
|
771
921
|
... tbl.drop_index(column_name='img')
|
|
922
|
+
|
|
923
|
+
Drop the index on the `img` column of the table `my_table` by column reference:
|
|
924
|
+
|
|
925
|
+
>>> tbl = pxt.get_table('my_table')
|
|
926
|
+
... tbl.drop_index(tbl.img)
|
|
927
|
+
|
|
928
|
+
Drop the index `idx1` of the table `my_table` by index name:
|
|
929
|
+
>>> tbl = pxt.get_table('my_table')
|
|
930
|
+
... tbl.drop_index(idx_name='idx1')
|
|
931
|
+
|
|
772
932
|
"""
|
|
773
|
-
|
|
933
|
+
if (column is None) == (idx_name is None):
|
|
934
|
+
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
935
|
+
|
|
936
|
+
col: Column = None
|
|
937
|
+
if idx_name is None:
|
|
938
|
+
if isinstance(column, str):
|
|
939
|
+
self.__check_column_name_exists(column, include_bases=True)
|
|
940
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
941
|
+
else:
|
|
942
|
+
self.__check_column_ref_exists(column, include_bases=True)
|
|
943
|
+
col = column.col
|
|
944
|
+
assert col is not None
|
|
945
|
+
self._drop_index(col=col, idx_name=idx_name)
|
|
774
946
|
|
|
775
947
|
def _drop_index(
|
|
776
|
-
self, *,
|
|
948
|
+
self, *, col: Optional[Column] = None,
|
|
949
|
+
idx_name: Optional[str] = None,
|
|
777
950
|
_idx_class: Optional[type[index.IndexBase]] = None
|
|
778
951
|
) -> None:
|
|
779
952
|
if self._tbl_version_path.is_snapshot():
|
|
780
953
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
781
|
-
|
|
782
|
-
if (column_name is None) == (idx_name is None):
|
|
783
|
-
raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
|
|
954
|
+
assert (col is None) != (idx_name is None)
|
|
784
955
|
|
|
785
956
|
if idx_name is not None:
|
|
786
957
|
if idx_name not in self._tbl_version.idxs_by_name:
|
|
787
958
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
788
959
|
idx_id = self._tbl_version.idxs_by_name[idx_name].id
|
|
789
960
|
else:
|
|
790
|
-
col = self._tbl_version_path.get_column(column_name, include_bases=True)
|
|
791
|
-
if col is None:
|
|
792
|
-
raise excs.Error(f'Column {column_name!r} unknown')
|
|
793
961
|
if col.tbl.id != self._tbl_version.id:
|
|
794
962
|
raise excs.Error(
|
|
795
|
-
f'Column {
|
|
963
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
|
|
796
964
|
idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
797
965
|
if _idx_class is not None:
|
|
798
966
|
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
799
967
|
if len(idx_info) == 0:
|
|
800
|
-
raise excs.Error(f'Column {
|
|
968
|
+
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
801
969
|
if len(idx_info) > 1:
|
|
802
|
-
raise excs.Error(f"Column {
|
|
970
|
+
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
803
971
|
idx_id = idx_info[0].id
|
|
804
972
|
self._tbl_version.drop_index(idx_id)
|
|
805
973
|
|
|
@@ -919,7 +1087,6 @@ class Table(SchemaObject):
|
|
|
919
1087
|
|
|
920
1088
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
921
1089
|
"""
|
|
922
|
-
self._check_is_dropped()
|
|
923
1090
|
status = self._tbl_version.update(value_spec, where, cascade)
|
|
924
1091
|
FileCache.get().emit_eviction_warnings()
|
|
925
1092
|
return status
|
|
@@ -955,7 +1122,6 @@ class Table(SchemaObject):
|
|
|
955
1122
|
"""
|
|
956
1123
|
if self._tbl_version_path.is_snapshot():
|
|
957
1124
|
raise excs.Error('Cannot update a snapshot')
|
|
958
|
-
self._check_is_dropped()
|
|
959
1125
|
rows = list(rows)
|
|
960
1126
|
|
|
961
1127
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
@@ -1010,7 +1176,6 @@ class Table(SchemaObject):
|
|
|
1010
1176
|
"""
|
|
1011
1177
|
if self._tbl_version_path.is_snapshot():
|
|
1012
1178
|
raise excs.Error('Cannot revert a snapshot')
|
|
1013
|
-
self._check_is_dropped()
|
|
1014
1179
|
self._tbl_version.revert()
|
|
1015
1180
|
|
|
1016
1181
|
@overload
|
|
@@ -1060,7 +1225,6 @@ class Table(SchemaObject):
|
|
|
1060
1225
|
"""
|
|
1061
1226
|
if self._tbl_version.is_snapshot:
|
|
1062
1227
|
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1063
|
-
self._check_is_dropped()
|
|
1064
1228
|
if store.name in self.external_stores:
|
|
1065
1229
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1066
1230
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
pixeltable/catalog/view.py
CHANGED
|
@@ -59,7 +59,7 @@ class View(Table):
|
|
|
59
59
|
|
|
60
60
|
# verify that filter can be evaluated in the context of the base
|
|
61
61
|
if predicate is not None:
|
|
62
|
-
if not predicate.is_bound_by(base):
|
|
62
|
+
if not predicate.is_bound_by([base]):
|
|
63
63
|
raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
|
|
64
64
|
# create a copy that we can modify and store
|
|
65
65
|
predicate = predicate.copy()
|
|
@@ -69,7 +69,7 @@ class View(Table):
|
|
|
69
69
|
if not col.is_computed:
|
|
70
70
|
continue
|
|
71
71
|
# make sure that the value can be computed in the context of the base
|
|
72
|
-
if col.value_expr is not None and not col.value_expr.is_bound_by(base):
|
|
72
|
+
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
73
73
|
raise excs.Error(
|
|
74
74
|
f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
|
|
75
75
|
|
|
@@ -194,6 +194,9 @@ class View(Table):
|
|
|
194
194
|
|
|
195
195
|
def _drop(self) -> None:
|
|
196
196
|
cat = catalog.Catalog.get()
|
|
197
|
+
# verify all dependents are deleted by now
|
|
198
|
+
for dep in cat.tbl_dependents[self._id]:
|
|
199
|
+
assert dep._is_dropped
|
|
197
200
|
if self._snapshot_only:
|
|
198
201
|
# there is not TableVersion to drop
|
|
199
202
|
self._check_is_dropped()
|