pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/schema_object.py +28 -2
- pixeltable/catalog/table.py +68 -30
- pixeltable/catalog/table_version.py +14 -43
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +8 -7
- pixeltable/exec/expr_eval_node.py +8 -1
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/__init__.py +0 -1
- pixeltable/exprs/column_ref.py +2 -7
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +12 -12
- pixeltable/exprs/expr.py +32 -0
- pixeltable/exprs/in_predicate.py +3 -3
- pixeltable/exprs/is_null.py +5 -5
- pixeltable/exprs/similarity_expr.py +27 -16
- pixeltable/func/aggregate_function.py +10 -4
- pixeltable/func/callable_function.py +4 -0
- pixeltable/func/function_registry.py +2 -0
- pixeltable/functions/globals.py +36 -1
- pixeltable/functions/huggingface.py +62 -4
- pixeltable/functions/image.py +17 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/string.py +622 -7
- pixeltable/functions/video.py +26 -8
- pixeltable/globals.py +54 -50
- pixeltable/index/embedding_index.py +28 -27
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +54 -5
- pixeltable/io/label_studio.py +45 -5
- pixeltable/io/pandas.py +18 -7
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_17.py +26 -0
- pixeltable/plan.py +6 -6
- pixeltable/tool/create_test_db_dump.py +2 -2
- pixeltable/tool/doc_plugins/griffe.py +77 -0
- pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- pixeltable/utils/s3.py +1 -1
- pixeltable-0.2.13.dist-info/METADATA +206 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
- pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
- pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.11.dist-info/METADATA +0 -137
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
|
|
|
3
3
|
from .exceptions import Error
|
|
4
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
5
5
|
from .func import Function, udf, Aggregator, uda, expr_udf
|
|
6
|
-
from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir,
|
|
7
|
-
list_dirs, list_functions,
|
|
6
|
+
from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
|
|
7
|
+
list_dirs, list_functions, configure_logging
|
|
8
8
|
from .type_system import (
|
|
9
9
|
ColumnType,
|
|
10
10
|
StringType,
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.13"
|
|
3
|
+
__version_tuple__ = (0, 2, 13)
|
|
@@ -129,11 +129,11 @@ class InsertableTable(Table):
|
|
|
129
129
|
msg = str(e)
|
|
130
130
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
|
|
131
131
|
|
|
132
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
132
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
133
133
|
"""Delete rows in this table.
|
|
134
134
|
|
|
135
135
|
Args:
|
|
136
|
-
where: a
|
|
136
|
+
where: a predicate to filter rows to delete.
|
|
137
137
|
|
|
138
138
|
Examples:
|
|
139
139
|
Delete all rows in a table:
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from pixeltable import catalog
|
|
7
|
+
|
|
5
8
|
|
|
6
9
|
class SchemaObject:
|
|
7
10
|
"""
|
|
@@ -17,9 +20,32 @@ class SchemaObject:
|
|
|
17
20
|
def _get_id(self) -> UUID:
|
|
18
21
|
return self._id
|
|
19
22
|
|
|
20
|
-
|
|
23
|
+
@property
|
|
24
|
+
def name(self) -> str:
|
|
25
|
+
"""Returns the name of this schema object."""
|
|
21
26
|
return self._name
|
|
22
27
|
|
|
28
|
+
@property
|
|
29
|
+
def parent(self) -> Optional['catalog.Dir']:
|
|
30
|
+
"""Returns the parent directory of this schema object."""
|
|
31
|
+
from pixeltable import catalog
|
|
32
|
+
if self._dir_id is None:
|
|
33
|
+
return None
|
|
34
|
+
dir = catalog.Catalog.get().paths.get_schema_obj(self._dir_id)
|
|
35
|
+
assert isinstance(dir, catalog.Dir)
|
|
36
|
+
return dir
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def path(self) -> str:
|
|
40
|
+
"""Returns the path to this schema object."""
|
|
41
|
+
parent = self.parent
|
|
42
|
+
if parent is None or parent.parent is None:
|
|
43
|
+
# Either this is the root directory, with empty path, or its parent is the
|
|
44
|
+
# root directory. Either way, we return just the name.
|
|
45
|
+
return self.name
|
|
46
|
+
else:
|
|
47
|
+
return f'{parent.path}.{self.name}'
|
|
48
|
+
|
|
23
49
|
@classmethod
|
|
24
50
|
@abstractmethod
|
|
25
51
|
def display_name(cls) -> str:
|
pixeltable/catalog/table.py
CHANGED
|
@@ -82,14 +82,22 @@ class Table(SchemaObject):
|
|
|
82
82
|
return self._queries[index]
|
|
83
83
|
return self._tbl_version_path.__getitem__(index)
|
|
84
84
|
|
|
85
|
-
def
|
|
85
|
+
def list_views(self, *, recursive: bool = True) -> list[str]:
|
|
86
86
|
"""
|
|
87
|
-
|
|
87
|
+
Returns a list of all views and snapshots of this `Table`.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
|
|
91
|
+
all sub-views (including views of views, etc.)
|
|
88
92
|
"""
|
|
93
|
+
return [t.path for t in self._get_views(recursive=recursive)]
|
|
94
|
+
|
|
95
|
+
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
96
|
+
dependents = catalog.Catalog.get().tbl_dependents[self._get_id()]
|
|
89
97
|
if recursive:
|
|
90
|
-
return
|
|
98
|
+
return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
|
|
91
99
|
else:
|
|
92
|
-
return
|
|
100
|
+
return dependents
|
|
93
101
|
|
|
94
102
|
def _df(self) -> 'pixeltable.dataframe.DataFrame':
|
|
95
103
|
"""Return a DataFrame for this table.
|
|
@@ -105,7 +113,7 @@ class Table(SchemaObject):
|
|
|
105
113
|
from pixeltable.dataframe import DataFrame
|
|
106
114
|
return DataFrame(self._tbl_version_path).select(*items, **named_items)
|
|
107
115
|
|
|
108
|
-
def where(self, pred: 'exprs.
|
|
116
|
+
def where(self, pred: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
|
|
109
117
|
"""Return a DataFrame for this table.
|
|
110
118
|
"""
|
|
111
119
|
# local import: avoid circular imports
|
|
@@ -500,7 +508,37 @@ class Table(SchemaObject):
|
|
|
500
508
|
>>> tbl.drop_column('factorial')
|
|
501
509
|
"""
|
|
502
510
|
self._check_is_dropped()
|
|
503
|
-
|
|
511
|
+
|
|
512
|
+
if name not in self._tbl_version.cols_by_name:
|
|
513
|
+
raise excs.Error(f'Unknown column: {name}')
|
|
514
|
+
col = self._tbl_version.cols_by_name[name]
|
|
515
|
+
|
|
516
|
+
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
517
|
+
if len(dependent_user_cols) > 0:
|
|
518
|
+
raise excs.Error(
|
|
519
|
+
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
520
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
# See if this column has a dependent store. We need to look through all stores in all
|
|
524
|
+
# (transitive) views of this table.
|
|
525
|
+
dependent_stores = [
|
|
526
|
+
(view, store)
|
|
527
|
+
for view in [self] + self._get_views(recursive=True)
|
|
528
|
+
for store in view._tbl_version.external_stores.values()
|
|
529
|
+
if col in store.get_local_columns()
|
|
530
|
+
]
|
|
531
|
+
if len(dependent_stores) > 0:
|
|
532
|
+
dependent_store_names = [
|
|
533
|
+
store.name if view._get_id() == self._get_id() else f'{store.name} (in view `{view.name}`)'
|
|
534
|
+
for view, store in dependent_stores
|
|
535
|
+
]
|
|
536
|
+
raise excs.Error(
|
|
537
|
+
f'Cannot drop column `{name}` because the following external stores depend on it:\n'
|
|
538
|
+
f'{", ".join(dependent_store_names)}'
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
self._tbl_version.drop_column(col)
|
|
504
542
|
|
|
505
543
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
506
544
|
"""Rename a column.
|
|
@@ -522,15 +560,15 @@ class Table(SchemaObject):
|
|
|
522
560
|
|
|
523
561
|
def add_embedding_index(
|
|
524
562
|
self, col_name: str, *, idx_name: Optional[str] = None,
|
|
525
|
-
|
|
563
|
+
string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
|
|
526
564
|
metric: str = 'cosine'
|
|
527
565
|
) -> None:
|
|
528
566
|
"""Add an index to the table.
|
|
529
567
|
Args:
|
|
530
568
|
col_name: name of column to index
|
|
531
569
|
idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
|
|
532
|
-
|
|
533
|
-
|
|
570
|
+
string_embed: function to embed text; required if the column is a text column
|
|
571
|
+
image_embed: function to embed images; required if the column is an image column
|
|
534
572
|
metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
|
|
535
573
|
|
|
536
574
|
Raises:
|
|
@@ -539,13 +577,13 @@ class Table(SchemaObject):
|
|
|
539
577
|
Examples:
|
|
540
578
|
Add an index to the ``img`` column:
|
|
541
579
|
|
|
542
|
-
>>> tbl.add_embedding_index('img',
|
|
580
|
+
>>> tbl.add_embedding_index('img', image_embed=...)
|
|
543
581
|
|
|
544
582
|
Add another index to the ``img`` column, using the inner product as the distance metric,
|
|
545
|
-
and with a specific name; ``
|
|
583
|
+
and with a specific name; ``string_embed`` is also specified in order to search with text:
|
|
546
584
|
|
|
547
585
|
>>> tbl.add_embedding_index(
|
|
548
|
-
'img', idx_name='clip_idx',
|
|
586
|
+
'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
|
|
549
587
|
"""
|
|
550
588
|
if self._tbl_version_path.is_snapshot():
|
|
551
589
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
@@ -557,7 +595,7 @@ class Table(SchemaObject):
|
|
|
557
595
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
558
596
|
from pixeltable.index import EmbeddingIndex
|
|
559
597
|
# create the EmbeddingIndex instance to verify args
|
|
560
|
-
idx = EmbeddingIndex(col, metric=metric,
|
|
598
|
+
idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
|
|
561
599
|
status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
562
600
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
563
601
|
|
|
@@ -604,26 +642,26 @@ class Table(SchemaObject):
|
|
|
604
642
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
605
643
|
self._check_is_dropped()
|
|
606
644
|
if (column_name is None) == (idx_name is None):
|
|
607
|
-
raise excs.Error(
|
|
645
|
+
raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
|
|
608
646
|
|
|
609
647
|
if idx_name is not None:
|
|
610
648
|
if idx_name not in self._tbl_version.idxs_by_name:
|
|
611
|
-
raise excs.Error(f'Index {idx_name} does not exist')
|
|
649
|
+
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
612
650
|
idx_id = self._tbl_version.idxs_by_name[idx_name].id
|
|
613
651
|
else:
|
|
614
652
|
col = self._tbl_version_path.get_column(column_name, include_bases=True)
|
|
615
653
|
if col is None:
|
|
616
|
-
raise excs.Error(f'Column {column_name} unknown')
|
|
654
|
+
raise excs.Error(f'Column {column_name!r} unknown')
|
|
617
655
|
if col.tbl.id != self._tbl_version.id:
|
|
618
656
|
raise excs.Error(
|
|
619
|
-
f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
|
|
657
|
+
f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
|
|
620
658
|
idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
621
659
|
if _idx_class is not None:
|
|
622
660
|
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
623
661
|
if len(idx_info) == 0:
|
|
624
|
-
raise excs.Error(f'Column {column_name} does not have an index')
|
|
662
|
+
raise excs.Error(f'Column {column_name!r} does not have an index')
|
|
625
663
|
if len(idx_info) > 1:
|
|
626
|
-
raise excs.Error(f
|
|
664
|
+
raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
|
|
627
665
|
idx_id = idx_info[0].id
|
|
628
666
|
self._tbl_version.drop_index(idx_id)
|
|
629
667
|
|
|
@@ -678,13 +716,13 @@ class Table(SchemaObject):
|
|
|
678
716
|
raise NotImplementedError
|
|
679
717
|
|
|
680
718
|
def update(
|
|
681
|
-
self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.
|
|
719
|
+
self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
|
|
682
720
|
) -> UpdateStatus:
|
|
683
721
|
"""Update rows in this table.
|
|
684
722
|
|
|
685
723
|
Args:
|
|
686
724
|
value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
|
|
687
|
-
where: a
|
|
725
|
+
where: a predicate to filter rows to update.
|
|
688
726
|
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
689
727
|
|
|
690
728
|
Examples:
|
|
@@ -748,11 +786,11 @@ class Table(SchemaObject):
|
|
|
748
786
|
row_updates.append(col_vals)
|
|
749
787
|
return self._tbl_version.batch_update(row_updates, rowids, cascade)
|
|
750
788
|
|
|
751
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
789
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
752
790
|
"""Delete rows in this table.
|
|
753
791
|
|
|
754
792
|
Args:
|
|
755
|
-
where: a
|
|
793
|
+
where: a predicate to filter rows to delete.
|
|
756
794
|
|
|
757
795
|
Examples:
|
|
758
796
|
Delete all rows in a table:
|
|
@@ -823,13 +861,13 @@ class Table(SchemaObject):
|
|
|
823
861
|
Links the specified `ExternalStore` to this table.
|
|
824
862
|
"""
|
|
825
863
|
if self._tbl_version.is_snapshot:
|
|
826
|
-
raise excs.Error(f'Table `{self.
|
|
864
|
+
raise excs.Error(f'Table `{self.name}` is a snapshot, so it cannot be linked to an external store.')
|
|
827
865
|
self._check_is_dropped()
|
|
828
866
|
if store.name in self.external_stores:
|
|
829
|
-
raise excs.Error(f'Table `{self.
|
|
830
|
-
_logger.info(f'Linking external store `{store.name}` to table `{self.
|
|
867
|
+
raise excs.Error(f'Table `{self.name}` already has an external store with that name: {store.name}')
|
|
868
|
+
_logger.info(f'Linking external store `{store.name}` to table `{self.name}`')
|
|
831
869
|
self._tbl_version.link_external_store(store)
|
|
832
|
-
print(f'Linked external store `{store.name}` to table `{self.
|
|
870
|
+
print(f'Linked external store `{store.name}` to table `{self.name}`.')
|
|
833
871
|
|
|
834
872
|
def unlink_external_stores(
|
|
835
873
|
self,
|
|
@@ -861,11 +899,11 @@ class Table(SchemaObject):
|
|
|
861
899
|
if not ignore_errors:
|
|
862
900
|
for store in stores:
|
|
863
901
|
if store not in all_stores:
|
|
864
|
-
raise excs.Error(f'Table `{self.
|
|
902
|
+
raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
|
|
865
903
|
|
|
866
904
|
for store in stores:
|
|
867
905
|
self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
|
|
868
|
-
print(f'Unlinked external store from table `{self.
|
|
906
|
+
print(f'Unlinked external store from table `{self.name}`: {store}')
|
|
869
907
|
|
|
870
908
|
def sync(
|
|
871
909
|
self,
|
|
@@ -893,7 +931,7 @@ class Table(SchemaObject):
|
|
|
893
931
|
|
|
894
932
|
for store in stores:
|
|
895
933
|
if store not in all_stores:
|
|
896
|
-
raise excs.Error(f'Table `{self.
|
|
934
|
+
raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
|
|
897
935
|
|
|
898
936
|
from pixeltable.io import SyncStatus
|
|
899
937
|
|
|
@@ -540,39 +540,12 @@ class TableVersion:
|
|
|
540
540
|
num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
|
|
541
541
|
cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
|
|
542
542
|
|
|
543
|
-
def drop_column(self,
|
|
543
|
+
def drop_column(self, col: Column) -> None:
|
|
544
544
|
"""Drop a column from the table.
|
|
545
545
|
"""
|
|
546
546
|
from pixeltable.catalog import Catalog
|
|
547
547
|
|
|
548
548
|
assert not self.is_snapshot
|
|
549
|
-
if name not in self.cols_by_name:
|
|
550
|
-
raise excs.Error(f'Unknown column: {name}')
|
|
551
|
-
col = self.cols_by_name[name]
|
|
552
|
-
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
553
|
-
if len(dependent_user_cols) > 0:
|
|
554
|
-
raise excs.Error(
|
|
555
|
-
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
556
|
-
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
557
|
-
)
|
|
558
|
-
# See if this column has a dependent store. We need to look through all stores in all
|
|
559
|
-
# (transitive) views of this table.
|
|
560
|
-
transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
|
|
561
|
-
dependent_stores = [
|
|
562
|
-
(view, store)
|
|
563
|
-
for view in transitive_views
|
|
564
|
-
for store in view._tbl_version.external_stores.values()
|
|
565
|
-
if col in store.get_local_columns()
|
|
566
|
-
]
|
|
567
|
-
if len(dependent_stores) > 0:
|
|
568
|
-
dependent_store_names = [
|
|
569
|
-
store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
|
|
570
|
-
for view, store in dependent_stores
|
|
571
|
-
]
|
|
572
|
-
raise excs.Error(
|
|
573
|
-
f'Cannot drop column `{name}` because the following external stores depend on it:\n'
|
|
574
|
-
f'{", ".join(dependent_store_names)}'
|
|
575
|
-
)
|
|
576
549
|
|
|
577
550
|
# we're creating a new schema version
|
|
578
551
|
self.version += 1
|
|
@@ -596,7 +569,7 @@ class TableVersion:
|
|
|
596
569
|
del self.idxs_by_name[idx_name]
|
|
597
570
|
self._drop_columns(dropped_cols)
|
|
598
571
|
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
599
|
-
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
572
|
+
_logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
|
|
600
573
|
|
|
601
574
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
602
575
|
"""Mark columns as dropped"""
|
|
@@ -705,12 +678,12 @@ class TableVersion:
|
|
|
705
678
|
return result
|
|
706
679
|
|
|
707
680
|
def update(
|
|
708
|
-
self, value_spec: dict[str, Any], where: Optional['exprs.
|
|
681
|
+
self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
|
|
709
682
|
) -> UpdateStatus:
|
|
710
683
|
"""Update rows in this TableVersionPath.
|
|
711
684
|
Args:
|
|
712
685
|
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
713
|
-
where: a
|
|
686
|
+
where: a predicate to filter rows to update.
|
|
714
687
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
715
688
|
including within views.
|
|
716
689
|
"""
|
|
@@ -721,8 +694,8 @@ class TableVersion:
|
|
|
721
694
|
|
|
722
695
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
723
696
|
if where is not None:
|
|
724
|
-
if not isinstance(where, exprs.
|
|
725
|
-
raise excs.Error(f"'where' argument must be a
|
|
697
|
+
if not isinstance(where, exprs.Expr):
|
|
698
|
+
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
726
699
|
analysis_info = Planner.analyze(self.path, where)
|
|
727
700
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
728
701
|
if analysis_info.filter is not None:
|
|
@@ -784,7 +757,7 @@ class TableVersion:
|
|
|
784
757
|
|
|
785
758
|
def _update(
|
|
786
759
|
self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
787
|
-
where_clause: Optional['pixeltable.exprs.
|
|
760
|
+
where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
|
|
788
761
|
show_progress: bool = True
|
|
789
762
|
) -> UpdateStatus:
|
|
790
763
|
from pixeltable.plan import Planner
|
|
@@ -816,8 +789,6 @@ class TableVersion:
|
|
|
816
789
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
817
790
|
if col.is_pk and not allow_pk:
|
|
818
791
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
819
|
-
if col.col_type.is_media_type():
|
|
820
|
-
raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
|
|
821
792
|
|
|
822
793
|
# make sure that the value is compatible with the column type
|
|
823
794
|
try:
|
|
@@ -875,17 +846,17 @@ class TableVersion:
|
|
|
875
846
|
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
876
847
|
return result
|
|
877
848
|
|
|
878
|
-
def delete(self, where: Optional['exprs.
|
|
849
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
879
850
|
"""Delete rows in this table.
|
|
880
851
|
Args:
|
|
881
|
-
where: a
|
|
852
|
+
where: a predicate to filter rows to delete.
|
|
882
853
|
"""
|
|
883
854
|
assert self.is_insertable()
|
|
884
|
-
from pixeltable.exprs import
|
|
855
|
+
from pixeltable.exprs import Expr
|
|
885
856
|
from pixeltable.plan import Planner
|
|
886
857
|
if where is not None:
|
|
887
|
-
if not isinstance(where,
|
|
888
|
-
raise excs.Error(f"'where' argument must be a
|
|
858
|
+
if not isinstance(where, Expr):
|
|
859
|
+
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
889
860
|
analysis_info = Planner.analyze(self.path, where)
|
|
890
861
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
891
862
|
if analysis_info.filter is not None:
|
|
@@ -899,11 +870,11 @@ class TableVersion:
|
|
|
899
870
|
return status
|
|
900
871
|
|
|
901
872
|
def propagate_delete(
|
|
902
|
-
self, where: Optional['exprs.
|
|
873
|
+
self, where: Optional['exprs.Expr'], base_versions: List[Optional[int]],
|
|
903
874
|
conn: sql.engine.Connection, timestamp: float) -> int:
|
|
904
875
|
"""Delete rows in this table and propagate to views.
|
|
905
876
|
Args:
|
|
906
|
-
where: a
|
|
877
|
+
where: a predicate to filter rows to delete.
|
|
907
878
|
Returns:
|
|
908
879
|
number of deleted rows
|
|
909
880
|
"""
|
pixeltable/catalog/view.py
CHANGED
|
@@ -51,7 +51,7 @@ class View(Table):
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def create(
|
|
53
53
|
cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
|
|
54
|
-
predicate: 'pxt.exprs.
|
|
54
|
+
predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
|
|
55
55
|
iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
|
|
56
56
|
) -> View:
|
|
57
57
|
columns = cls._create_columns(schema)
|
|
@@ -213,5 +213,5 @@ class View(Table):
|
|
|
213
213
|
) -> UpdateStatus:
|
|
214
214
|
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
|
|
215
215
|
|
|
216
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
216
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
217
217
|
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')
|
pixeltable/dataframe.py
CHANGED
|
@@ -153,7 +153,7 @@ class DataFrame:
|
|
|
153
153
|
self,
|
|
154
154
|
tbl: catalog.TableVersionPath,
|
|
155
155
|
select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
|
|
156
|
-
where_clause: Optional[exprs.
|
|
156
|
+
where_clause: Optional[exprs.Expr] = None,
|
|
157
157
|
group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
158
158
|
grouping_tbl: Optional[catalog.TableVersion] = None,
|
|
159
159
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, # List[(expr, asc)]
|
|
@@ -530,7 +530,11 @@ class DataFrame:
|
|
|
530
530
|
limit=self.limit_val,
|
|
531
531
|
)
|
|
532
532
|
|
|
533
|
-
def where(self, pred: exprs.
|
|
533
|
+
def where(self, pred: exprs.Expr) -> DataFrame:
|
|
534
|
+
if not isinstance(pred, exprs.Expr):
|
|
535
|
+
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
536
|
+
if not pred.col_type.is_bool_type():
|
|
537
|
+
raise excs.Error(f'Where(): expression needs to return bool, but instead returns {pred.col_type}')
|
|
534
538
|
return DataFrame(
|
|
535
539
|
self.tbl,
|
|
536
540
|
select_list=self.select_list,
|
|
@@ -558,7 +562,7 @@ class DataFrame:
|
|
|
558
562
|
# we need to make sure that the grouping table is a base of self.tbl
|
|
559
563
|
base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
|
|
560
564
|
if base is None or base.id == self.tbl.tbl_id():
|
|
561
|
-
raise excs.Error(f'group_by(): {item.
|
|
565
|
+
raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
|
|
562
566
|
grouping_tbl = item._tbl_version_path.tbl_version
|
|
563
567
|
break
|
|
564
568
|
if not isinstance(item, exprs.Expr):
|
|
@@ -628,12 +632,9 @@ class DataFrame:
|
|
|
628
632
|
def __getitem__(self, index: object) -> DataFrame:
|
|
629
633
|
"""
|
|
630
634
|
Allowed:
|
|
631
|
-
- [<Predicate>]: filter operation
|
|
632
635
|
- [List[Expr]]/[Tuple[Expr]]: setting the select list
|
|
633
636
|
- [Expr]: setting a single-col select list
|
|
634
637
|
"""
|
|
635
|
-
if isinstance(index, exprs.Predicate):
|
|
636
|
-
return self.where(index)
|
|
637
638
|
if isinstance(index, tuple):
|
|
638
639
|
index = list(index)
|
|
639
640
|
if isinstance(index, exprs.Expr):
|
|
@@ -668,7 +669,7 @@ class DataFrame:
|
|
|
668
669
|
tbl = catalog.TableVersionPath.from_dict(d['tbl'])
|
|
669
670
|
select_list = [(exprs.Expr.from_dict(e), name) for e, name in d['select_list']] \
|
|
670
671
|
if d['select_list'] is not None else None
|
|
671
|
-
where_clause = exprs.
|
|
672
|
+
where_clause = exprs.Expr.from_dict(d['where_clause']) \
|
|
672
673
|
if d['where_clause'] is not None else None
|
|
673
674
|
group_by_clause = [exprs.Expr.from_dict(e) for e in d['group_by_clause']] \
|
|
674
675
|
if d['group_by_clause'] is not None else None
|
|
@@ -50,7 +50,14 @@ class ExprEvalNode(ExecNode):
|
|
|
50
50
|
|
|
51
51
|
def _open(self) -> None:
|
|
52
52
|
warnings.simplefilter("ignore", category=TqdmWarning)
|
|
53
|
-
|
|
53
|
+
# This is a temporary hack. When B-tree indices on string columns were implemented (via computed columns
|
|
54
|
+
# that invoke the `BtreeIndex.str_filter` udf), it resulted in frivolous progress bars appearing on every
|
|
55
|
+
# insertion. This special-cases the `str_filter` call to suppress the corresponding progress bar.
|
|
56
|
+
# TODO(aaron-siegel) Remove this hack once we clean up progress bars more generally.
|
|
57
|
+
is_str_filter_node = all(
|
|
58
|
+
isinstance(expr, exprs.FunctionCall) and expr.fn.name == 'str_filter' for expr in self.output_exprs
|
|
59
|
+
)
|
|
60
|
+
if self.ctx.show_pbar and not is_str_filter_node:
|
|
54
61
|
self.pbar = tqdm(
|
|
55
62
|
total=len(self.target_exprs) * self.ctx.num_rows,
|
|
56
63
|
desc='Computing cells',
|
pixeltable/exec/sql_scan_node.py
CHANGED
|
@@ -19,7 +19,7 @@ class SqlScanNode(ExecNode):
|
|
|
19
19
|
def __init__(
|
|
20
20
|
self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
|
|
21
21
|
select_list: Iterable[exprs.Expr],
|
|
22
|
-
where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.
|
|
22
|
+
where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
|
|
23
23
|
order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
|
|
24
24
|
limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
|
|
25
25
|
):
|
pixeltable/exprs/__init__.py
CHANGED
|
@@ -17,7 +17,6 @@ from .json_mapper import JsonMapper
|
|
|
17
17
|
from .json_path import RELATIVE_PATH_ROOT, JsonPath
|
|
18
18
|
from .literal import Literal
|
|
19
19
|
from .object_ref import ObjectRef
|
|
20
|
-
from .predicate import Predicate
|
|
21
20
|
from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
|
|
22
21
|
from .rowid_ref import RowidRef
|
|
23
22
|
from .similarity_expr import SimilarityExpr
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -63,14 +63,9 @@ class ColumnRef(Expr):
|
|
|
63
63
|
|
|
64
64
|
return super().__getattr__(name)
|
|
65
65
|
|
|
66
|
-
def similarity(self,
|
|
67
|
-
# if isinstance(other, Expr):
|
|
68
|
-
# raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
|
-
item = Expr.from_object(other)
|
|
70
|
-
if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
|
|
71
|
-
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
|
|
66
|
+
def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
|
|
72
67
|
from .similarity_expr import SimilarityExpr
|
|
73
|
-
return SimilarityExpr(self, item)
|
|
68
|
+
return SimilarityExpr(self, item, idx_name=idx)
|
|
74
69
|
|
|
75
70
|
def default_column_name(self) -> Optional[str]:
|
|
76
71
|
return str(self)
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, List, Any, Dict
|
|
3
|
+
from typing import Optional, List, Any, Dict
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -9,15 +9,15 @@ from .data_row import DataRow
|
|
|
9
9
|
from .expr import Expr
|
|
10
10
|
from .globals import ComparisonOperator
|
|
11
11
|
from .literal import Literal
|
|
12
|
-
from .predicate import Predicate
|
|
13
12
|
from .row_builder import RowBuilder
|
|
14
13
|
import pixeltable.exceptions as excs
|
|
15
14
|
import pixeltable.index as index
|
|
15
|
+
import pixeltable.type_system as ts
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class Comparison(
|
|
18
|
+
class Comparison(Expr):
|
|
19
19
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
20
|
-
super().__init__()
|
|
20
|
+
super().__init__(ts.BoolType())
|
|
21
21
|
self.operator = operator
|
|
22
22
|
|
|
23
23
|
# if this is a comparison of a column to a literal (ie, could be used as a search argument in an index lookup),
|
|
@@ -50,7 +50,7 @@ class Comparison(Predicate):
|
|
|
50
50
|
def _equals(self, other: Comparison) -> bool:
|
|
51
51
|
return self.operator == other.operator
|
|
52
52
|
|
|
53
|
-
def _id_attrs(self) ->
|
|
53
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
54
|
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
55
55
|
|
|
56
56
|
@property
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import operator
|
|
4
|
+
from typing import Optional, List, Any, Dict, Callable
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
8
|
+
from .data_row import DataRow
|
|
7
9
|
from .expr import Expr
|
|
8
10
|
from .globals import LogicalOperator
|
|
9
|
-
from .predicate import Predicate
|
|
10
|
-
from .data_row import DataRow
|
|
11
11
|
from .row_builder import RowBuilder
|
|
12
|
-
import pixeltable.
|
|
12
|
+
import pixeltable.type_system as ts
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class CompoundPredicate(
|
|
16
|
-
def __init__(self, operator: LogicalOperator, operands: List[
|
|
17
|
-
super().__init__()
|
|
15
|
+
class CompoundPredicate(Expr):
|
|
16
|
+
def __init__(self, operator: LogicalOperator, operands: List[Expr]):
|
|
17
|
+
super().__init__(ts.BoolType())
|
|
18
18
|
self.operator = operator
|
|
19
19
|
# operands are stored in self.components
|
|
20
20
|
if self.operator == LogicalOperator.NOT:
|
|
@@ -22,7 +22,7 @@ class CompoundPredicate(Predicate):
|
|
|
22
22
|
self.components = operands
|
|
23
23
|
else:
|
|
24
24
|
assert len(operands) > 1
|
|
25
|
-
self.operands: List[
|
|
25
|
+
self.operands: List[Expr] = []
|
|
26
26
|
for operand in operands:
|
|
27
27
|
self._merge_operand(operand)
|
|
28
28
|
|
|
@@ -34,14 +34,14 @@ class CompoundPredicate(Predicate):
|
|
|
34
34
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
35
35
|
|
|
36
36
|
@classmethod
|
|
37
|
-
def make_conjunction(cls, operands: List[
|
|
37
|
+
def make_conjunction(cls, operands: List[Expr]) -> Optional[Expr]:
|
|
38
38
|
if len(operands) == 0:
|
|
39
39
|
return None
|
|
40
40
|
if len(operands) == 1:
|
|
41
41
|
return operands[0]
|
|
42
42
|
return CompoundPredicate(LogicalOperator.AND, operands)
|
|
43
43
|
|
|
44
|
-
def _merge_operand(self, operand:
|
|
44
|
+
def _merge_operand(self, operand: Expr) -> None:
|
|
45
45
|
"""
|
|
46
46
|
Merge this operand, if possible, otherwise simply record it.
|
|
47
47
|
"""
|
|
@@ -55,11 +55,11 @@ class CompoundPredicate(Predicate):
|
|
|
55
55
|
def _equals(self, other: CompoundPredicate) -> bool:
|
|
56
56
|
return self.operator == other.operator
|
|
57
57
|
|
|
58
|
-
def _id_attrs(self) ->
|
|
58
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
59
59
|
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
60
60
|
|
|
61
61
|
def split_conjuncts(
|
|
62
|
-
self, condition: Callable[[
|
|
62
|
+
self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
63
63
|
if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
|
|
64
64
|
return super().split_conjuncts(condition)
|
|
65
65
|
matches = [op for op in self.components if condition(op)]
|