pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +6 -1
- pixeltable/catalog/catalog.py +107 -45
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +5 -0
- pixeltable/catalog/table_version.py +100 -106
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/update_status.py +12 -0
- pixeltable/config.py +6 -0
- pixeltable/dataframe.py +11 -5
- pixeltable/env.py +52 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +10 -11
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +123 -9
- pixeltable/functions/whisperx.py +2 -0
- pixeltable/functions/yolox.py +2 -0
- pixeltable/globals.py +56 -31
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/globals.py +16 -15
- pixeltable/io/table_data_conduit.py +46 -21
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/publish.py +0 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +5 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,6 +11,7 @@ from uuid import UUID
|
|
|
11
11
|
|
|
12
12
|
import jsonschema.exceptions
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
+
from sqlalchemy import exc as sql_exc
|
|
14
15
|
|
|
15
16
|
import pixeltable as pxt
|
|
16
17
|
import pixeltable.exceptions as excs
|
|
@@ -18,24 +19,19 @@ from pixeltable import exprs, index
|
|
|
18
19
|
from pixeltable.env import Env
|
|
19
20
|
from pixeltable.iterators import ComponentIterator
|
|
20
21
|
from pixeltable.metadata import schema
|
|
21
|
-
from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
22
22
|
from pixeltable.utils.filecache import FileCache
|
|
23
23
|
from pixeltable.utils.object_stores import ObjectOps
|
|
24
24
|
|
|
25
|
-
from .tbl_ops import TableOp
|
|
26
|
-
|
|
27
|
-
if TYPE_CHECKING:
|
|
28
|
-
from pixeltable.plan import SampleClause
|
|
29
|
-
|
|
30
25
|
from ..func.globals import resolve_symbol
|
|
31
26
|
from .column import Column
|
|
32
27
|
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
|
|
28
|
+
from .tbl_ops import TableOp
|
|
33
29
|
from .update_status import RowCountStats, UpdateStatus
|
|
34
30
|
|
|
35
31
|
if TYPE_CHECKING:
|
|
36
32
|
from pixeltable import exec, store
|
|
37
|
-
|
|
38
|
-
from .
|
|
33
|
+
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
34
|
+
from pixeltable.plan import SampleClause
|
|
39
35
|
|
|
40
36
|
_logger = logging.getLogger('pixeltable')
|
|
41
37
|
|
|
@@ -295,7 +291,14 @@ class TableVersion:
|
|
|
295
291
|
cat = pxt.catalog.Catalog.get()
|
|
296
292
|
|
|
297
293
|
tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
|
|
294
|
+
assert (tbl_id, None) not in cat._tbl_versions
|
|
298
295
|
tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
|
|
296
|
+
|
|
297
|
+
@cat.register_undo_action
|
|
298
|
+
def _() -> None:
|
|
299
|
+
if (tbl_id, None) in cat._tbl_versions:
|
|
300
|
+
del cat._tbl_versions[tbl_id, None]
|
|
301
|
+
|
|
299
302
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
300
303
|
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
301
304
|
tbl_version.init()
|
|
@@ -508,9 +511,7 @@ class TableVersion:
|
|
|
508
511
|
|
|
509
512
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
510
513
|
# we're creating a new schema version
|
|
511
|
-
self.
|
|
512
|
-
self.created_at = time.time()
|
|
513
|
-
self.schema_version = self.version
|
|
514
|
+
self.bump_version(bump_schema_version=True)
|
|
514
515
|
status = self._add_index(col, idx_name, idx)
|
|
515
516
|
self._write_md(new_version=True, new_schema_version=True)
|
|
516
517
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
@@ -604,18 +605,7 @@ class TableVersion:
|
|
|
604
605
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
605
606
|
self._tbl_md.index_md[idx_id] = idx_md
|
|
606
607
|
self.idxs_by_name[idx_name] = idx_info
|
|
607
|
-
|
|
608
|
-
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
609
|
-
finally:
|
|
610
|
-
|
|
611
|
-
def cleanup_index() -> None:
|
|
612
|
-
"""Delete the newly added in-memory index structure"""
|
|
613
|
-
del self.idxs_by_name[idx_name]
|
|
614
|
-
del self._tbl_md.index_md[idx_id]
|
|
615
|
-
self.next_idx_id = idx_id
|
|
616
|
-
|
|
617
|
-
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
618
|
-
run_cleanup_on_exception(cleanup_index)
|
|
608
|
+
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
619
609
|
|
|
620
610
|
def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
621
611
|
val_col, undo_vol = self._create_index_columns(idx)
|
|
@@ -632,9 +622,7 @@ class TableVersion:
|
|
|
632
622
|
assert idx_id in self._tbl_md.index_md
|
|
633
623
|
|
|
634
624
|
# we're creating a new schema version
|
|
635
|
-
self.
|
|
636
|
-
self.created_at = time.time()
|
|
637
|
-
self.schema_version = self.version
|
|
625
|
+
self.bump_version(bump_schema_version=True)
|
|
638
626
|
idx_md = self._tbl_md.index_md[idx_id]
|
|
639
627
|
idx_md.schema_version_drop = self.schema_version
|
|
640
628
|
assert idx_md.name in self.idxs_by_name
|
|
@@ -663,9 +651,7 @@ class TableVersion:
|
|
|
663
651
|
self.next_col_id += 1
|
|
664
652
|
|
|
665
653
|
# we're creating a new schema version
|
|
666
|
-
self.
|
|
667
|
-
self.created_at = time.time()
|
|
668
|
-
self.schema_version = self.version
|
|
654
|
+
self.bump_version(bump_schema_version=True)
|
|
669
655
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
670
656
|
all_cols: list[Column] = []
|
|
671
657
|
for col in cols:
|
|
@@ -697,7 +683,11 @@ class TableVersion:
|
|
|
697
683
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
698
684
|
) -> UpdateStatus:
|
|
699
685
|
"""Add and populate columns within the current transaction"""
|
|
686
|
+
from pixeltable.catalog import Catalog
|
|
687
|
+
from pixeltable.plan import Planner
|
|
688
|
+
|
|
700
689
|
cols_to_add = list(cols)
|
|
690
|
+
|
|
701
691
|
row_count = self.store_tbl.count()
|
|
702
692
|
for col in cols_to_add:
|
|
703
693
|
assert col.tbl is self
|
|
@@ -734,40 +724,27 @@ class TableVersion:
|
|
|
734
724
|
continue
|
|
735
725
|
|
|
736
726
|
# populate the column
|
|
737
|
-
from pixeltable.plan import Planner
|
|
738
|
-
|
|
739
727
|
plan = Planner.create_add_column_plan(self.path, col)
|
|
740
728
|
plan.ctx.num_rows = row_count
|
|
741
729
|
try:
|
|
742
730
|
plan.open()
|
|
743
731
|
try:
|
|
744
732
|
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
745
|
-
except
|
|
746
|
-
|
|
747
|
-
|
|
733
|
+
except sql_exc.DBAPIError as exc:
|
|
734
|
+
Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
|
|
735
|
+
# If it wasn't converted, re-raise as a generic Pixeltable error
|
|
736
|
+
# (this means it's not a known concurrency error; it's something else)
|
|
737
|
+
raise excs.Error(
|
|
738
|
+
f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
|
|
739
|
+
) from exc
|
|
748
740
|
if excs_per_col > 0:
|
|
749
741
|
cols_with_excs.append(col)
|
|
750
742
|
num_excs += excs_per_col
|
|
751
743
|
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
752
744
|
finally:
|
|
753
|
-
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
754
|
-
def cleanup_on_error() -> None:
|
|
755
|
-
"""Delete columns that are added as part of current add_columns operation and re-initialize
|
|
756
|
-
the sqlalchemy schema"""
|
|
757
|
-
self.cols = [col for col in self.cols if col not in cols_to_add]
|
|
758
|
-
for col in cols_to_add:
|
|
759
|
-
# remove columns that we already added
|
|
760
|
-
if col.id in self.cols_by_id:
|
|
761
|
-
del self.cols_by_id[col.id]
|
|
762
|
-
if col.name is not None and col.name in self.cols_by_name:
|
|
763
|
-
del self.cols_by_name[col.name]
|
|
764
|
-
self.store_tbl.create_sa_tbl()
|
|
765
|
-
|
|
766
|
-
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
767
|
-
run_cleanup_on_exception(cleanup_on_error)
|
|
768
745
|
plan.close()
|
|
769
746
|
|
|
770
|
-
|
|
747
|
+
Catalog.get().record_column_dependencies(self)
|
|
771
748
|
|
|
772
749
|
if print_stats:
|
|
773
750
|
plan.ctx.profile.print(num_rows=row_count)
|
|
@@ -787,9 +764,7 @@ class TableVersion:
|
|
|
787
764
|
assert self.is_mutable
|
|
788
765
|
|
|
789
766
|
# we're creating a new schema version
|
|
790
|
-
self.
|
|
791
|
-
self.created_at = time.time()
|
|
792
|
-
self.schema_version = self.version
|
|
767
|
+
self.bump_version(bump_schema_version=True)
|
|
793
768
|
|
|
794
769
|
# drop this column and all dependent index columns and indices
|
|
795
770
|
dropped_cols = [col]
|
|
@@ -853,9 +828,7 @@ class TableVersion:
|
|
|
853
828
|
self._schema_version_md.columns[col.id].name = new_name
|
|
854
829
|
|
|
855
830
|
# we're creating a new schema version
|
|
856
|
-
self.
|
|
857
|
-
self.created_at = time.time()
|
|
858
|
-
self.schema_version = self.version
|
|
831
|
+
self.bump_version(bump_schema_version=True)
|
|
859
832
|
|
|
860
833
|
self._write_md(new_version=True, new_schema_version=True)
|
|
861
834
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
@@ -875,9 +848,7 @@ class TableVersion:
|
|
|
875
848
|
|
|
876
849
|
def _create_schema_version(self) -> None:
|
|
877
850
|
# we're creating a new schema version
|
|
878
|
-
self.
|
|
879
|
-
self.created_at = time.time()
|
|
880
|
-
self.schema_version = self.version
|
|
851
|
+
self.bump_version(bump_schema_version=True)
|
|
881
852
|
self._write_md(new_version=True, new_schema_version=True)
|
|
882
853
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
883
854
|
|
|
@@ -924,8 +895,7 @@ class TableVersion:
|
|
|
924
895
|
) -> UpdateStatus:
|
|
925
896
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
926
897
|
# we're creating a new version
|
|
927
|
-
self.
|
|
928
|
-
self.created_at = timestamp
|
|
898
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
929
899
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
930
900
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
931
901
|
)
|
|
@@ -960,10 +930,11 @@ class TableVersion:
|
|
|
960
930
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
961
931
|
including within views.
|
|
962
932
|
"""
|
|
963
|
-
|
|
964
|
-
|
|
933
|
+
from pixeltable.exprs import SqlElementCache
|
|
965
934
|
from pixeltable.plan import Planner
|
|
966
935
|
|
|
936
|
+
assert self.is_mutable
|
|
937
|
+
|
|
967
938
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
968
939
|
if where is not None:
|
|
969
940
|
if not isinstance(where, exprs.Expr):
|
|
@@ -974,7 +945,6 @@ class TableVersion:
|
|
|
974
945
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
975
946
|
|
|
976
947
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
977
|
-
from pixeltable.exprs import SqlElementCache
|
|
978
948
|
|
|
979
949
|
result = self.propagate_update(
|
|
980
950
|
plan,
|
|
@@ -1001,11 +971,11 @@ class TableVersion:
|
|
|
1001
971
|
batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
|
|
1002
972
|
rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
|
|
1003
973
|
"""
|
|
974
|
+
from pixeltable.plan import Planner
|
|
975
|
+
|
|
1004
976
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
1005
977
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
1006
978
|
|
|
1007
|
-
from pixeltable.plan import Planner
|
|
1008
|
-
|
|
1009
979
|
plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
|
|
1010
980
|
self.path, batch, rowids, cascade=cascade
|
|
1011
981
|
)
|
|
@@ -1077,14 +1047,14 @@ class TableVersion:
|
|
|
1077
1047
|
def recompute_columns(
|
|
1078
1048
|
self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
|
|
1079
1049
|
) -> UpdateStatus:
|
|
1050
|
+
from pixeltable.exprs import CompoundPredicate, SqlElementCache
|
|
1051
|
+
from pixeltable.plan import Planner
|
|
1052
|
+
|
|
1080
1053
|
assert self.is_mutable
|
|
1081
1054
|
assert all(name in self.cols_by_name for name in col_names)
|
|
1082
1055
|
assert len(col_names) > 0
|
|
1083
1056
|
assert len(col_names) == 1 or not errors_only
|
|
1084
1057
|
|
|
1085
|
-
from pixeltable.exprs import CompoundPredicate
|
|
1086
|
-
from pixeltable.plan import Planner
|
|
1087
|
-
|
|
1088
1058
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1089
1059
|
where_clause: Optional[exprs.Expr] = None
|
|
1090
1060
|
if where is not None:
|
|
@@ -1099,7 +1069,6 @@ class TableVersion:
|
|
|
1099
1069
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1100
1070
|
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1101
1071
|
)
|
|
1102
|
-
from pixeltable.exprs import SqlElementCache
|
|
1103
1072
|
|
|
1104
1073
|
result = self.propagate_update(
|
|
1105
1074
|
plan,
|
|
@@ -1123,11 +1092,14 @@ class TableVersion:
|
|
|
1123
1092
|
cascade: bool,
|
|
1124
1093
|
show_progress: bool = True,
|
|
1125
1094
|
) -> UpdateStatus:
|
|
1095
|
+
from pixeltable.catalog import Catalog
|
|
1096
|
+
from pixeltable.plan import Planner
|
|
1097
|
+
|
|
1098
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1126
1099
|
result = UpdateStatus()
|
|
1127
1100
|
create_new_table_version = plan is not None
|
|
1128
1101
|
if create_new_table_version:
|
|
1129
|
-
self.
|
|
1130
|
-
self.created_at = timestamp
|
|
1102
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1131
1103
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1132
1104
|
plan, v_min=self.version, show_progress=show_progress
|
|
1133
1105
|
)
|
|
@@ -1146,8 +1118,6 @@ class TableVersion:
|
|
|
1146
1118
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
|
|
1147
1119
|
plan = None
|
|
1148
1120
|
if len(recomputed_cols) > 0:
|
|
1149
|
-
from pixeltable.plan import Planner
|
|
1150
|
-
|
|
1151
1121
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
1152
1122
|
status = view.get().propagate_update(
|
|
1153
1123
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
@@ -1182,6 +1152,10 @@ class TableVersion:
|
|
|
1182
1152
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1183
1153
|
) -> UpdateStatus:
|
|
1184
1154
|
"""Delete rows in this table and propagate to views"""
|
|
1155
|
+
from pixeltable.catalog import Catalog
|
|
1156
|
+
|
|
1157
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1158
|
+
|
|
1185
1159
|
# print(f'calling sql_expr()')
|
|
1186
1160
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1187
1161
|
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
@@ -1198,8 +1172,7 @@ class TableVersion:
|
|
|
1198
1172
|
result = UpdateStatus(row_count_stats=row_counts)
|
|
1199
1173
|
if del_rows > 0:
|
|
1200
1174
|
# we're creating a new version
|
|
1201
|
-
self.
|
|
1202
|
-
self.created_at = timestamp
|
|
1175
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1203
1176
|
for view in self.mutable_views:
|
|
1204
1177
|
status = view.get().propagate_delete(
|
|
1205
1178
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
@@ -1225,6 +1198,8 @@ class TableVersion:
|
|
|
1225
1198
|
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1226
1199
|
and relies on Catalog to reload it
|
|
1227
1200
|
"""
|
|
1201
|
+
from pixeltable.catalog import Catalog
|
|
1202
|
+
|
|
1228
1203
|
conn = Env.get().conn
|
|
1229
1204
|
# make sure we don't have a snapshot referencing this version
|
|
1230
1205
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1244,8 +1219,6 @@ class TableVersion:
|
|
|
1244
1219
|
)
|
|
1245
1220
|
)
|
|
1246
1221
|
|
|
1247
|
-
# delete newly-added data
|
|
1248
|
-
self.delete_media(tbl_version=self.version)
|
|
1249
1222
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1250
1223
|
|
|
1251
1224
|
# revert new deletions
|
|
@@ -1260,6 +1233,8 @@ class TableVersion:
|
|
|
1260
1233
|
# revert schema changes:
|
|
1261
1234
|
# - undo changes to self._tbl_md and write that back
|
|
1262
1235
|
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1236
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1237
|
+
old_version = self.version
|
|
1263
1238
|
if self.version == self.schema_version:
|
|
1264
1239
|
# physically delete newly-added columns and remove them from the stored md
|
|
1265
1240
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
@@ -1306,7 +1281,8 @@ class TableVersion:
|
|
|
1306
1281
|
.where(schema.TableVersion.version == self.version)
|
|
1307
1282
|
)
|
|
1308
1283
|
|
|
1309
|
-
self.version
|
|
1284
|
+
self._tbl_md.current_version = self._version_md.version = self.version - 1
|
|
1285
|
+
|
|
1310
1286
|
self._write_md(new_version=False, new_schema_version=False)
|
|
1311
1287
|
|
|
1312
1288
|
# propagate to views
|
|
@@ -1317,7 +1293,12 @@ class TableVersion:
|
|
|
1317
1293
|
|
|
1318
1294
|
# force reload on next operation
|
|
1319
1295
|
self.is_validated = False
|
|
1320
|
-
|
|
1296
|
+
Catalog.get().remove_tbl_version(self)
|
|
1297
|
+
|
|
1298
|
+
# delete newly-added data
|
|
1299
|
+
# Do this at the end, after all DB operations have completed.
|
|
1300
|
+
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1301
|
+
self.delete_media(tbl_version=old_version)
|
|
1321
1302
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1322
1303
|
|
|
1323
1304
|
def _init_external_stores(self) -> None:
|
|
@@ -1328,9 +1309,7 @@ class TableVersion:
|
|
|
1328
1309
|
self.external_stores[store.name] = store
|
|
1329
1310
|
|
|
1330
1311
|
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1331
|
-
self.
|
|
1332
|
-
self.created_at = time.time()
|
|
1333
|
-
self.schema_version = self.version
|
|
1312
|
+
self.bump_version(bump_schema_version=True)
|
|
1334
1313
|
|
|
1335
1314
|
self.external_stores[store.name] = store
|
|
1336
1315
|
self._tbl_md.external_stores.append(
|
|
@@ -1340,9 +1319,7 @@ class TableVersion:
|
|
|
1340
1319
|
|
|
1341
1320
|
def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1342
1321
|
del self.external_stores[store.name]
|
|
1343
|
-
self.
|
|
1344
|
-
self.created_at = time.time()
|
|
1345
|
-
self.schema_version = self.version
|
|
1322
|
+
self.bump_version(bump_schema_version=True)
|
|
1346
1323
|
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1347
1324
|
self._tbl_md.external_stores.pop(idx)
|
|
1348
1325
|
self._write_md(new_version=True, new_schema_version=True)
|
|
@@ -1398,35 +1375,52 @@ class TableVersion:
|
|
|
1398
1375
|
# if this is a snapshot instance, we need to ignore current_version
|
|
1399
1376
|
return self._tbl_md.current_version if self.effective_version is None else self.effective_version
|
|
1400
1377
|
|
|
1401
|
-
@version.setter
|
|
1402
|
-
def version(self, version: int) -> None:
|
|
1403
|
-
assert self.effective_version is None
|
|
1404
|
-
self._tbl_md.current_version = version
|
|
1405
|
-
self._version_md.version = version
|
|
1406
|
-
|
|
1407
1378
|
@property
|
|
1408
1379
|
def created_at(self) -> float:
|
|
1409
1380
|
return self._version_md.created_at
|
|
1410
1381
|
|
|
1411
|
-
@created_at.setter
|
|
1412
|
-
def created_at(self, ts: float) -> None:
|
|
1413
|
-
assert self.effective_version is None
|
|
1414
|
-
self._version_md.created_at = ts
|
|
1415
|
-
|
|
1416
1382
|
@property
|
|
1417
1383
|
def schema_version(self) -> int:
|
|
1418
1384
|
return self._schema_version_md.schema_version
|
|
1419
1385
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1386
|
+
def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
|
|
1387
|
+
"""
|
|
1388
|
+
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1389
|
+
_write_md() must be called separately to persist the changes.
|
|
1390
|
+
|
|
1391
|
+
Args:
|
|
1392
|
+
timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
|
|
1393
|
+
to the same timestamp. If `None`, then defaults to `time.time()`.
|
|
1394
|
+
bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
|
|
1395
|
+
and associated metadata.
|
|
1396
|
+
"""
|
|
1397
|
+
from pixeltable.catalog import Catalog
|
|
1398
|
+
|
|
1422
1399
|
assert self.effective_version is None
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1400
|
+
|
|
1401
|
+
if timestamp is None:
|
|
1402
|
+
timestamp = time.time()
|
|
1403
|
+
|
|
1404
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1405
|
+
|
|
1406
|
+
old_version = self._tbl_md.current_version
|
|
1407
|
+
assert self._version_md.version == old_version
|
|
1408
|
+
new_version = old_version + 1
|
|
1409
|
+
self._tbl_md.current_version = new_version
|
|
1410
|
+
self._version_md.version = new_version
|
|
1411
|
+
self._version_md.created_at = timestamp
|
|
1412
|
+
|
|
1413
|
+
if bump_schema_version:
|
|
1414
|
+
old_schema_version = self._tbl_md.current_schema_version
|
|
1415
|
+
assert self._version_md.schema_version == old_schema_version
|
|
1416
|
+
assert self._schema_version_md.schema_version == old_schema_version
|
|
1417
|
+
self._tbl_md.current_schema_version = new_version
|
|
1418
|
+
self._version_md.schema_version = new_version
|
|
1419
|
+
self._schema_version_md.preceding_schema_version = old_schema_version
|
|
1420
|
+
self._schema_version_md.schema_version = new_version
|
|
1427
1421
|
|
|
1428
1422
|
@property
|
|
1429
|
-
def preceding_schema_version(self) -> int:
|
|
1423
|
+
def preceding_schema_version(self) -> Optional[int]:
|
|
1430
1424
|
return self._schema_version_md.preceding_schema_version
|
|
1431
1425
|
|
|
1432
1426
|
@property
|
|
@@ -1558,8 +1552,8 @@ class TableVersion:
|
|
|
1558
1552
|
|
|
1559
1553
|
@classmethod
|
|
1560
1554
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1561
|
-
from pixeltable import
|
|
1555
|
+
from pixeltable.catalog import Catalog
|
|
1562
1556
|
|
|
1563
1557
|
id = UUID(d['id'])
|
|
1564
1558
|
effective_version = d['effective_version']
|
|
1565
|
-
return
|
|
1559
|
+
return Catalog.get().get_tbl_version(id, effective_version)
|
|
@@ -37,6 +37,9 @@ class TableVersionHandle:
|
|
|
37
37
|
def __hash__(self) -> int:
|
|
38
38
|
return hash((self.id, self.effective_version))
|
|
39
39
|
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
|
|
42
|
+
|
|
40
43
|
@property
|
|
41
44
|
def is_snapshot(self) -> bool:
|
|
42
45
|
return self.effective_version is not None
|
|
@@ -81,7 +84,7 @@ class ColumnHandle:
|
|
|
81
84
|
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
85
|
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
86
|
raise excs.Error(
|
|
84
|
-
f'Column
|
|
87
|
+
f'Column was dropped (no record for column ID {self.col_id} in table '
|
|
85
88
|
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
89
|
)
|
|
87
90
|
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
@@ -57,27 +57,35 @@ class UpdateStatus:
|
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
59
|
updated_cols: list[str] = field(default_factory=list)
|
|
60
|
+
"""Columns that were updated."""
|
|
60
61
|
cols_with_excs: list[str] = field(default_factory=list)
|
|
62
|
+
"""Columns that encountered exceptions."""
|
|
61
63
|
|
|
62
64
|
# stats for the rows affected by the operation
|
|
63
65
|
row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
66
|
+
"""Row count statistics for rows affected by this operation."""
|
|
64
67
|
|
|
65
68
|
# stats for changes cascaded to other tables
|
|
66
69
|
cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
70
|
+
"""Row count statistics for changes cascaded to other tables."""
|
|
67
71
|
|
|
68
72
|
# stats for the rows affected by the operation in an external store
|
|
69
73
|
ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
74
|
+
"""Row count statistics for rows affected in an external store."""
|
|
70
75
|
|
|
71
76
|
@property
|
|
72
77
|
def num_rows(self) -> int:
|
|
78
|
+
"""Total number of rows affected (including cascaded changes)."""
|
|
73
79
|
return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
|
|
74
80
|
|
|
75
81
|
@property
|
|
76
82
|
def num_excs(self) -> int:
|
|
83
|
+
"""Total number of exceptions encountered (including cascaded changes)."""
|
|
77
84
|
return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
|
|
78
85
|
|
|
79
86
|
@property
|
|
80
87
|
def num_computed_values(self) -> int:
|
|
88
|
+
"""Total number of computed values affected (including cascaded changes)."""
|
|
81
89
|
return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
|
|
82
90
|
|
|
83
91
|
def insert_to_update(self) -> 'UpdateStatus':
|
|
@@ -164,16 +172,20 @@ class UpdateStatus:
|
|
|
164
172
|
|
|
165
173
|
@property
|
|
166
174
|
def external_rows_updated(self) -> int:
|
|
175
|
+
"""Number of rows updated in an external store."""
|
|
167
176
|
return self.ext_row_count_stats.upd_rows
|
|
168
177
|
|
|
169
178
|
@property
|
|
170
179
|
def external_rows_created(self) -> int:
|
|
180
|
+
"""Number of rows created in an external store."""
|
|
171
181
|
return self.ext_row_count_stats.ins_rows
|
|
172
182
|
|
|
173
183
|
@property
|
|
174
184
|
def external_rows_deleted(self) -> int:
|
|
185
|
+
"""Number of rows deleted from an external store."""
|
|
175
186
|
return self.ext_row_count_stats.del_rows
|
|
176
187
|
|
|
177
188
|
@property
|
|
178
189
|
def ext_num_rows(self) -> int:
|
|
190
|
+
"""Total number of rows affected in an external store."""
|
|
179
191
|
return self.ext_row_count_stats.num_rows
|
pixeltable/config.py
CHANGED
|
@@ -181,6 +181,12 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
181
181
|
'api_version': 'API version if using Azure OpenAI',
|
|
182
182
|
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
183
183
|
},
|
|
184
|
+
'openrouter': {
|
|
185
|
+
'api_key': 'OpenRouter API key',
|
|
186
|
+
'site_url': 'Optional URL for your application (for OpenRouter analytics)',
|
|
187
|
+
'app_name': 'Optional name for your application (for OpenRouter analytics)',
|
|
188
|
+
'rate_limit': 'Rate limit for OpenRouter API requests',
|
|
189
|
+
},
|
|
184
190
|
'replicate': {'api_token': 'Replicate API token'},
|
|
185
191
|
'together': {
|
|
186
192
|
'api_key': 'Together API key',
|
pixeltable/dataframe.py
CHANGED
|
@@ -23,7 +23,7 @@ from typing import (
|
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import pydantic
|
|
26
|
-
import sqlalchemy as
|
|
26
|
+
import sqlalchemy.exc as sql_exc
|
|
27
27
|
|
|
28
28
|
from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
|
|
29
29
|
from pixeltable.catalog import Catalog, is_valid_identifier
|
|
@@ -186,6 +186,8 @@ class DataFrameResultSet:
|
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
class DataFrame:
|
|
189
|
+
"""Represents a query for retrieving and transforming data from Pixeltable tables."""
|
|
190
|
+
|
|
189
191
|
_from_clause: plan.FromClause
|
|
190
192
|
_select_list_exprs: list[exprs.Expr]
|
|
191
193
|
_schema: dict[str, ts.ColumnType]
|
|
@@ -456,6 +458,7 @@ class DataFrame:
|
|
|
456
458
|
|
|
457
459
|
@property
|
|
458
460
|
def schema(self) -> dict[str, ColumnType]:
|
|
461
|
+
"""Column names and types in this DataFrame."""
|
|
459
462
|
return self._schema
|
|
460
463
|
|
|
461
464
|
def bind(self, args: dict[str, Any]) -> DataFrame:
|
|
@@ -538,20 +541,23 @@ class DataFrame:
|
|
|
538
541
|
yield [data_row[e.slot_idx] for e in self._select_list_exprs]
|
|
539
542
|
except excs.ExprEvalError as e:
|
|
540
543
|
self._raise_expr_eval_err(e)
|
|
541
|
-
except
|
|
542
|
-
|
|
544
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
545
|
+
Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
|
|
546
|
+
raise # just re-raise if not converted to a Pixeltable error
|
|
543
547
|
|
|
544
548
|
def collect(self) -> DataFrameResultSet:
|
|
545
549
|
return DataFrameResultSet(list(self._output_row_iterator()), self.schema)
|
|
546
550
|
|
|
547
551
|
async def _acollect(self) -> DataFrameResultSet:
|
|
552
|
+
single_tbl = self._first_tbl if len(self._from_clause.tbls) == 1 else None
|
|
548
553
|
try:
|
|
549
554
|
result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec()]
|
|
550
555
|
return DataFrameResultSet(result, self.schema)
|
|
551
556
|
except excs.ExprEvalError as e:
|
|
552
557
|
self._raise_expr_eval_err(e)
|
|
553
|
-
except
|
|
554
|
-
|
|
558
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
559
|
+
Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
|
|
560
|
+
raise # just re-raise if not converted to a Pixeltable error
|
|
555
561
|
|
|
556
562
|
def count(self) -> int:
|
|
557
563
|
"""Return the number of rows in the DataFrame.
|