pixeltable 0.4.15__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -0
- pixeltable/catalog/catalog.py +105 -51
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +4 -0
- pixeltable/catalog/table_version.py +99 -78
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/config.py +6 -0
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +48 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +10 -11
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/globals.py +10 -4
- pixeltable/io/globals.py +16 -15
- pixeltable/io/table_data_conduit.py +46 -21
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +5 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/RECORD +57 -50
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,6 +11,7 @@ from uuid import UUID
|
|
|
11
11
|
|
|
12
12
|
import jsonschema.exceptions
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
+
from sqlalchemy import exc as sql_exc
|
|
14
15
|
|
|
15
16
|
import pixeltable as pxt
|
|
16
17
|
import pixeltable.exceptions as excs
|
|
@@ -21,20 +22,16 @@ from pixeltable.metadata import schema
|
|
|
21
22
|
from pixeltable.utils.filecache import FileCache
|
|
22
23
|
from pixeltable.utils.object_stores import ObjectOps
|
|
23
24
|
|
|
24
|
-
from .tbl_ops import TableOp
|
|
25
|
-
|
|
26
|
-
if TYPE_CHECKING:
|
|
27
|
-
from pixeltable.plan import SampleClause
|
|
28
|
-
|
|
29
25
|
from ..func.globals import resolve_symbol
|
|
30
26
|
from .column import Column
|
|
31
27
|
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
|
|
28
|
+
from .tbl_ops import TableOp
|
|
32
29
|
from .update_status import RowCountStats, UpdateStatus
|
|
33
30
|
|
|
34
31
|
if TYPE_CHECKING:
|
|
35
32
|
from pixeltable import exec, store
|
|
36
|
-
|
|
37
|
-
from .
|
|
33
|
+
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
34
|
+
from pixeltable.plan import SampleClause
|
|
38
35
|
|
|
39
36
|
_logger = logging.getLogger('pixeltable')
|
|
40
37
|
|
|
@@ -294,7 +291,14 @@ class TableVersion:
|
|
|
294
291
|
cat = pxt.catalog.Catalog.get()
|
|
295
292
|
|
|
296
293
|
tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
|
|
294
|
+
assert (tbl_id, None) not in cat._tbl_versions
|
|
297
295
|
tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
|
|
296
|
+
|
|
297
|
+
@cat.register_undo_action
|
|
298
|
+
def _() -> None:
|
|
299
|
+
if (tbl_id, None) in cat._tbl_versions:
|
|
300
|
+
del cat._tbl_versions[tbl_id, None]
|
|
301
|
+
|
|
298
302
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
299
303
|
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
300
304
|
tbl_version.init()
|
|
@@ -507,9 +511,7 @@ class TableVersion:
|
|
|
507
511
|
|
|
508
512
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
509
513
|
# we're creating a new schema version
|
|
510
|
-
self.
|
|
511
|
-
self.created_at = time.time()
|
|
512
|
-
self.schema_version = self.version
|
|
514
|
+
self.bump_version(bump_schema_version=True)
|
|
513
515
|
status = self._add_index(col, idx_name, idx)
|
|
514
516
|
self._write_md(new_version=True, new_schema_version=True)
|
|
515
517
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
@@ -620,9 +622,7 @@ class TableVersion:
|
|
|
620
622
|
assert idx_id in self._tbl_md.index_md
|
|
621
623
|
|
|
622
624
|
# we're creating a new schema version
|
|
623
|
-
self.
|
|
624
|
-
self.created_at = time.time()
|
|
625
|
-
self.schema_version = self.version
|
|
625
|
+
self.bump_version(bump_schema_version=True)
|
|
626
626
|
idx_md = self._tbl_md.index_md[idx_id]
|
|
627
627
|
idx_md.schema_version_drop = self.schema_version
|
|
628
628
|
assert idx_md.name in self.idxs_by_name
|
|
@@ -651,9 +651,7 @@ class TableVersion:
|
|
|
651
651
|
self.next_col_id += 1
|
|
652
652
|
|
|
653
653
|
# we're creating a new schema version
|
|
654
|
-
self.
|
|
655
|
-
self.created_at = time.time()
|
|
656
|
-
self.schema_version = self.version
|
|
654
|
+
self.bump_version(bump_schema_version=True)
|
|
657
655
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
658
656
|
all_cols: list[Column] = []
|
|
659
657
|
for col in cols:
|
|
@@ -685,7 +683,11 @@ class TableVersion:
|
|
|
685
683
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
686
684
|
) -> UpdateStatus:
|
|
687
685
|
"""Add and populate columns within the current transaction"""
|
|
686
|
+
from pixeltable.catalog import Catalog
|
|
687
|
+
from pixeltable.plan import Planner
|
|
688
|
+
|
|
688
689
|
cols_to_add = list(cols)
|
|
690
|
+
|
|
689
691
|
row_count = self.store_tbl.count()
|
|
690
692
|
for col in cols_to_add:
|
|
691
693
|
assert col.tbl is self
|
|
@@ -722,17 +724,19 @@ class TableVersion:
|
|
|
722
724
|
continue
|
|
723
725
|
|
|
724
726
|
# populate the column
|
|
725
|
-
from pixeltable.plan import Planner
|
|
726
|
-
|
|
727
727
|
plan = Planner.create_add_column_plan(self.path, col)
|
|
728
728
|
plan.ctx.num_rows = row_count
|
|
729
729
|
try:
|
|
730
730
|
plan.open()
|
|
731
731
|
try:
|
|
732
732
|
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
733
|
-
except
|
|
734
|
-
|
|
735
|
-
|
|
733
|
+
except sql_exc.DBAPIError as exc:
|
|
734
|
+
Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
|
|
735
|
+
# If it wasn't converted, re-raise as a generic Pixeltable error
|
|
736
|
+
# (this means it's not a known concurrency error; it's something else)
|
|
737
|
+
raise excs.Error(
|
|
738
|
+
f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
|
|
739
|
+
) from exc
|
|
736
740
|
if excs_per_col > 0:
|
|
737
741
|
cols_with_excs.append(col)
|
|
738
742
|
num_excs += excs_per_col
|
|
@@ -740,7 +744,7 @@ class TableVersion:
|
|
|
740
744
|
finally:
|
|
741
745
|
plan.close()
|
|
742
746
|
|
|
743
|
-
|
|
747
|
+
Catalog.get().record_column_dependencies(self)
|
|
744
748
|
|
|
745
749
|
if print_stats:
|
|
746
750
|
plan.ctx.profile.print(num_rows=row_count)
|
|
@@ -760,9 +764,7 @@ class TableVersion:
|
|
|
760
764
|
assert self.is_mutable
|
|
761
765
|
|
|
762
766
|
# we're creating a new schema version
|
|
763
|
-
self.
|
|
764
|
-
self.created_at = time.time()
|
|
765
|
-
self.schema_version = self.version
|
|
767
|
+
self.bump_version(bump_schema_version=True)
|
|
766
768
|
|
|
767
769
|
# drop this column and all dependent index columns and indices
|
|
768
770
|
dropped_cols = [col]
|
|
@@ -826,9 +828,7 @@ class TableVersion:
|
|
|
826
828
|
self._schema_version_md.columns[col.id].name = new_name
|
|
827
829
|
|
|
828
830
|
# we're creating a new schema version
|
|
829
|
-
self.
|
|
830
|
-
self.created_at = time.time()
|
|
831
|
-
self.schema_version = self.version
|
|
831
|
+
self.bump_version(bump_schema_version=True)
|
|
832
832
|
|
|
833
833
|
self._write_md(new_version=True, new_schema_version=True)
|
|
834
834
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
@@ -848,9 +848,7 @@ class TableVersion:
|
|
|
848
848
|
|
|
849
849
|
def _create_schema_version(self) -> None:
|
|
850
850
|
# we're creating a new schema version
|
|
851
|
-
self.
|
|
852
|
-
self.created_at = time.time()
|
|
853
|
-
self.schema_version = self.version
|
|
851
|
+
self.bump_version(bump_schema_version=True)
|
|
854
852
|
self._write_md(new_version=True, new_schema_version=True)
|
|
855
853
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
856
854
|
|
|
@@ -897,8 +895,7 @@ class TableVersion:
|
|
|
897
895
|
) -> UpdateStatus:
|
|
898
896
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
899
897
|
# we're creating a new version
|
|
900
|
-
self.
|
|
901
|
-
self.created_at = timestamp
|
|
898
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
902
899
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
903
900
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
904
901
|
)
|
|
@@ -933,10 +930,11 @@ class TableVersion:
|
|
|
933
930
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
934
931
|
including within views.
|
|
935
932
|
"""
|
|
936
|
-
|
|
937
|
-
|
|
933
|
+
from pixeltable.exprs import SqlElementCache
|
|
938
934
|
from pixeltable.plan import Planner
|
|
939
935
|
|
|
936
|
+
assert self.is_mutable
|
|
937
|
+
|
|
940
938
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
941
939
|
if where is not None:
|
|
942
940
|
if not isinstance(where, exprs.Expr):
|
|
@@ -947,7 +945,6 @@ class TableVersion:
|
|
|
947
945
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
948
946
|
|
|
949
947
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
950
|
-
from pixeltable.exprs import SqlElementCache
|
|
951
948
|
|
|
952
949
|
result = self.propagate_update(
|
|
953
950
|
plan,
|
|
@@ -974,11 +971,11 @@ class TableVersion:
|
|
|
974
971
|
batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
|
|
975
972
|
rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
|
|
976
973
|
"""
|
|
974
|
+
from pixeltable.plan import Planner
|
|
975
|
+
|
|
977
976
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
978
977
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
979
978
|
|
|
980
|
-
from pixeltable.plan import Planner
|
|
981
|
-
|
|
982
979
|
plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
|
|
983
980
|
self.path, batch, rowids, cascade=cascade
|
|
984
981
|
)
|
|
@@ -1050,14 +1047,14 @@ class TableVersion:
|
|
|
1050
1047
|
def recompute_columns(
|
|
1051
1048
|
self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
|
|
1052
1049
|
) -> UpdateStatus:
|
|
1050
|
+
from pixeltable.exprs import CompoundPredicate, SqlElementCache
|
|
1051
|
+
from pixeltable.plan import Planner
|
|
1052
|
+
|
|
1053
1053
|
assert self.is_mutable
|
|
1054
1054
|
assert all(name in self.cols_by_name for name in col_names)
|
|
1055
1055
|
assert len(col_names) > 0
|
|
1056
1056
|
assert len(col_names) == 1 or not errors_only
|
|
1057
1057
|
|
|
1058
|
-
from pixeltable.exprs import CompoundPredicate
|
|
1059
|
-
from pixeltable.plan import Planner
|
|
1060
|
-
|
|
1061
1058
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1062
1059
|
where_clause: Optional[exprs.Expr] = None
|
|
1063
1060
|
if where is not None:
|
|
@@ -1072,7 +1069,6 @@ class TableVersion:
|
|
|
1072
1069
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1073
1070
|
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1074
1071
|
)
|
|
1075
|
-
from pixeltable.exprs import SqlElementCache
|
|
1076
1072
|
|
|
1077
1073
|
result = self.propagate_update(
|
|
1078
1074
|
plan,
|
|
@@ -1096,11 +1092,14 @@ class TableVersion:
|
|
|
1096
1092
|
cascade: bool,
|
|
1097
1093
|
show_progress: bool = True,
|
|
1098
1094
|
) -> UpdateStatus:
|
|
1095
|
+
from pixeltable.catalog import Catalog
|
|
1096
|
+
from pixeltable.plan import Planner
|
|
1097
|
+
|
|
1098
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1099
1099
|
result = UpdateStatus()
|
|
1100
1100
|
create_new_table_version = plan is not None
|
|
1101
1101
|
if create_new_table_version:
|
|
1102
|
-
self.
|
|
1103
|
-
self.created_at = timestamp
|
|
1102
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1104
1103
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1105
1104
|
plan, v_min=self.version, show_progress=show_progress
|
|
1106
1105
|
)
|
|
@@ -1119,8 +1118,6 @@ class TableVersion:
|
|
|
1119
1118
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
|
|
1120
1119
|
plan = None
|
|
1121
1120
|
if len(recomputed_cols) > 0:
|
|
1122
|
-
from pixeltable.plan import Planner
|
|
1123
|
-
|
|
1124
1121
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
1125
1122
|
status = view.get().propagate_update(
|
|
1126
1123
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
@@ -1155,6 +1152,10 @@ class TableVersion:
|
|
|
1155
1152
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1156
1153
|
) -> UpdateStatus:
|
|
1157
1154
|
"""Delete rows in this table and propagate to views"""
|
|
1155
|
+
from pixeltable.catalog import Catalog
|
|
1156
|
+
|
|
1157
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1158
|
+
|
|
1158
1159
|
# print(f'calling sql_expr()')
|
|
1159
1160
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1160
1161
|
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
@@ -1171,8 +1172,7 @@ class TableVersion:
|
|
|
1171
1172
|
result = UpdateStatus(row_count_stats=row_counts)
|
|
1172
1173
|
if del_rows > 0:
|
|
1173
1174
|
# we're creating a new version
|
|
1174
|
-
self.
|
|
1175
|
-
self.created_at = timestamp
|
|
1175
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1176
1176
|
for view in self.mutable_views:
|
|
1177
1177
|
status = view.get().propagate_delete(
|
|
1178
1178
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
@@ -1198,6 +1198,8 @@ class TableVersion:
|
|
|
1198
1198
|
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1199
1199
|
and relies on Catalog to reload it
|
|
1200
1200
|
"""
|
|
1201
|
+
from pixeltable.catalog import Catalog
|
|
1202
|
+
|
|
1201
1203
|
conn = Env.get().conn
|
|
1202
1204
|
# make sure we don't have a snapshot referencing this version
|
|
1203
1205
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1217,8 +1219,6 @@ class TableVersion:
|
|
|
1217
1219
|
)
|
|
1218
1220
|
)
|
|
1219
1221
|
|
|
1220
|
-
# delete newly-added data
|
|
1221
|
-
self.delete_media(tbl_version=self.version)
|
|
1222
1222
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1223
1223
|
|
|
1224
1224
|
# revert new deletions
|
|
@@ -1233,6 +1233,8 @@ class TableVersion:
|
|
|
1233
1233
|
# revert schema changes:
|
|
1234
1234
|
# - undo changes to self._tbl_md and write that back
|
|
1235
1235
|
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1236
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1237
|
+
old_version = self.version
|
|
1236
1238
|
if self.version == self.schema_version:
|
|
1237
1239
|
# physically delete newly-added columns and remove them from the stored md
|
|
1238
1240
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
@@ -1279,7 +1281,8 @@ class TableVersion:
|
|
|
1279
1281
|
.where(schema.TableVersion.version == self.version)
|
|
1280
1282
|
)
|
|
1281
1283
|
|
|
1282
|
-
self.version
|
|
1284
|
+
self._tbl_md.current_version = self._version_md.version = self.version - 1
|
|
1285
|
+
|
|
1283
1286
|
self._write_md(new_version=False, new_schema_version=False)
|
|
1284
1287
|
|
|
1285
1288
|
# propagate to views
|
|
@@ -1290,7 +1293,12 @@ class TableVersion:
|
|
|
1290
1293
|
|
|
1291
1294
|
# force reload on next operation
|
|
1292
1295
|
self.is_validated = False
|
|
1293
|
-
|
|
1296
|
+
Catalog.get().remove_tbl_version(self)
|
|
1297
|
+
|
|
1298
|
+
# delete newly-added data
|
|
1299
|
+
# Do this at the end, after all DB operations have completed.
|
|
1300
|
+
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1301
|
+
self.delete_media(tbl_version=old_version)
|
|
1294
1302
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1295
1303
|
|
|
1296
1304
|
def _init_external_stores(self) -> None:
|
|
@@ -1301,9 +1309,7 @@ class TableVersion:
|
|
|
1301
1309
|
self.external_stores[store.name] = store
|
|
1302
1310
|
|
|
1303
1311
|
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1304
|
-
self.
|
|
1305
|
-
self.created_at = time.time()
|
|
1306
|
-
self.schema_version = self.version
|
|
1312
|
+
self.bump_version(bump_schema_version=True)
|
|
1307
1313
|
|
|
1308
1314
|
self.external_stores[store.name] = store
|
|
1309
1315
|
self._tbl_md.external_stores.append(
|
|
@@ -1313,9 +1319,7 @@ class TableVersion:
|
|
|
1313
1319
|
|
|
1314
1320
|
def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1315
1321
|
del self.external_stores[store.name]
|
|
1316
|
-
self.
|
|
1317
|
-
self.created_at = time.time()
|
|
1318
|
-
self.schema_version = self.version
|
|
1322
|
+
self.bump_version(bump_schema_version=True)
|
|
1319
1323
|
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1320
1324
|
self._tbl_md.external_stores.pop(idx)
|
|
1321
1325
|
self._write_md(new_version=True, new_schema_version=True)
|
|
@@ -1371,35 +1375,52 @@ class TableVersion:
|
|
|
1371
1375
|
# if this is a snapshot instance, we need to ignore current_version
|
|
1372
1376
|
return self._tbl_md.current_version if self.effective_version is None else self.effective_version
|
|
1373
1377
|
|
|
1374
|
-
@version.setter
|
|
1375
|
-
def version(self, version: int) -> None:
|
|
1376
|
-
assert self.effective_version is None
|
|
1377
|
-
self._tbl_md.current_version = version
|
|
1378
|
-
self._version_md.version = version
|
|
1379
|
-
|
|
1380
1378
|
@property
|
|
1381
1379
|
def created_at(self) -> float:
|
|
1382
1380
|
return self._version_md.created_at
|
|
1383
1381
|
|
|
1384
|
-
@created_at.setter
|
|
1385
|
-
def created_at(self, ts: float) -> None:
|
|
1386
|
-
assert self.effective_version is None
|
|
1387
|
-
self._version_md.created_at = ts
|
|
1388
|
-
|
|
1389
1382
|
@property
|
|
1390
1383
|
def schema_version(self) -> int:
|
|
1391
1384
|
return self._schema_version_md.schema_version
|
|
1392
1385
|
|
|
1393
|
-
|
|
1394
|
-
|
|
1386
|
+
def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
|
|
1387
|
+
"""
|
|
1388
|
+
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1389
|
+
_write_md() must be called separately to persist the changes.
|
|
1390
|
+
|
|
1391
|
+
Args:
|
|
1392
|
+
timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
|
|
1393
|
+
to the same timestamp. If `None`, then defaults to `time.time()`.
|
|
1394
|
+
bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
|
|
1395
|
+
and associated metadata.
|
|
1396
|
+
"""
|
|
1397
|
+
from pixeltable.catalog import Catalog
|
|
1398
|
+
|
|
1395
1399
|
assert self.effective_version is None
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
+
|
|
1401
|
+
if timestamp is None:
|
|
1402
|
+
timestamp = time.time()
|
|
1403
|
+
|
|
1404
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1405
|
+
|
|
1406
|
+
old_version = self._tbl_md.current_version
|
|
1407
|
+
assert self._version_md.version == old_version
|
|
1408
|
+
new_version = old_version + 1
|
|
1409
|
+
self._tbl_md.current_version = new_version
|
|
1410
|
+
self._version_md.version = new_version
|
|
1411
|
+
self._version_md.created_at = timestamp
|
|
1412
|
+
|
|
1413
|
+
if bump_schema_version:
|
|
1414
|
+
old_schema_version = self._tbl_md.current_schema_version
|
|
1415
|
+
assert self._version_md.schema_version == old_schema_version
|
|
1416
|
+
assert self._schema_version_md.schema_version == old_schema_version
|
|
1417
|
+
self._tbl_md.current_schema_version = new_version
|
|
1418
|
+
self._version_md.schema_version = new_version
|
|
1419
|
+
self._schema_version_md.preceding_schema_version = old_schema_version
|
|
1420
|
+
self._schema_version_md.schema_version = new_version
|
|
1400
1421
|
|
|
1401
1422
|
@property
|
|
1402
|
-
def preceding_schema_version(self) -> int:
|
|
1423
|
+
def preceding_schema_version(self) -> Optional[int]:
|
|
1403
1424
|
return self._schema_version_md.preceding_schema_version
|
|
1404
1425
|
|
|
1405
1426
|
@property
|
|
@@ -1531,8 +1552,8 @@ class TableVersion:
|
|
|
1531
1552
|
|
|
1532
1553
|
@classmethod
|
|
1533
1554
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1534
|
-
from pixeltable import
|
|
1555
|
+
from pixeltable.catalog import Catalog
|
|
1535
1556
|
|
|
1536
1557
|
id = UUID(d['id'])
|
|
1537
1558
|
effective_version = d['effective_version']
|
|
1538
|
-
return
|
|
1559
|
+
return Catalog.get().get_tbl_version(id, effective_version)
|
|
@@ -37,6 +37,9 @@ class TableVersionHandle:
|
|
|
37
37
|
def __hash__(self) -> int:
|
|
38
38
|
return hash((self.id, self.effective_version))
|
|
39
39
|
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
|
|
42
|
+
|
|
40
43
|
@property
|
|
41
44
|
def is_snapshot(self) -> bool:
|
|
42
45
|
return self.effective_version is not None
|
|
@@ -81,7 +84,7 @@ class ColumnHandle:
|
|
|
81
84
|
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
85
|
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
86
|
raise excs.Error(
|
|
84
|
-
f'Column
|
|
87
|
+
f'Column was dropped (no record for column ID {self.col_id} in table '
|
|
85
88
|
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
89
|
)
|
|
87
90
|
return self.tbl_version.get().cols_by_id[self.col_id]
|
pixeltable/config.py
CHANGED
|
@@ -181,6 +181,12 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
181
181
|
'api_version': 'API version if using Azure OpenAI',
|
|
182
182
|
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
183
183
|
},
|
|
184
|
+
'openrouter': {
|
|
185
|
+
'api_key': 'OpenRouter API key',
|
|
186
|
+
'site_url': 'Optional URL for your application (for OpenRouter analytics)',
|
|
187
|
+
'app_name': 'Optional name for your application (for OpenRouter analytics)',
|
|
188
|
+
'rate_limit': 'Rate limit for OpenRouter API requests',
|
|
189
|
+
},
|
|
184
190
|
'replicate': {'api_token': 'Replicate API token'},
|
|
185
191
|
'together': {
|
|
186
192
|
'api_key': 'Together API key',
|
pixeltable/dataframe.py
CHANGED
|
@@ -23,7 +23,7 @@ from typing import (
|
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import pydantic
|
|
26
|
-
import sqlalchemy as
|
|
26
|
+
import sqlalchemy.exc as sql_exc
|
|
27
27
|
|
|
28
28
|
from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
|
|
29
29
|
from pixeltable.catalog import Catalog, is_valid_identifier
|
|
@@ -186,6 +186,8 @@ class DataFrameResultSet:
|
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
class DataFrame:
|
|
189
|
+
"""Represents a query for retrieving and transforming data from Pixeltable tables."""
|
|
190
|
+
|
|
189
191
|
_from_clause: plan.FromClause
|
|
190
192
|
_select_list_exprs: list[exprs.Expr]
|
|
191
193
|
_schema: dict[str, ts.ColumnType]
|
|
@@ -539,20 +541,23 @@ class DataFrame:
|
|
|
539
541
|
yield [data_row[e.slot_idx] for e in self._select_list_exprs]
|
|
540
542
|
except excs.ExprEvalError as e:
|
|
541
543
|
self._raise_expr_eval_err(e)
|
|
542
|
-
except
|
|
543
|
-
|
|
544
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
545
|
+
Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
|
|
546
|
+
raise # just re-raise if not converted to a Pixeltable error
|
|
544
547
|
|
|
545
548
|
def collect(self) -> DataFrameResultSet:
|
|
546
549
|
return DataFrameResultSet(list(self._output_row_iterator()), self.schema)
|
|
547
550
|
|
|
548
551
|
async def _acollect(self) -> DataFrameResultSet:
|
|
552
|
+
single_tbl = self._first_tbl if len(self._from_clause.tbls) == 1 else None
|
|
549
553
|
try:
|
|
550
554
|
result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec()]
|
|
551
555
|
return DataFrameResultSet(result, self.schema)
|
|
552
556
|
except excs.ExprEvalError as e:
|
|
553
557
|
self._raise_expr_eval_err(e)
|
|
554
|
-
except
|
|
555
|
-
|
|
558
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
559
|
+
Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
|
|
560
|
+
raise # just re-raise if not converted to a Pixeltable error
|
|
556
561
|
|
|
557
562
|
def count(self) -> int:
|
|
558
563
|
"""Return the number of rows in the DataFrame.
|
pixeltable/env.py
CHANGED
|
@@ -27,6 +27,7 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
|
27
27
|
import nest_asyncio # type: ignore[import-untyped]
|
|
28
28
|
import pixeltable_pgserver
|
|
29
29
|
import sqlalchemy as sql
|
|
30
|
+
import tzlocal
|
|
30
31
|
from pillow_heif import register_heif_opener # type: ignore[import-untyped]
|
|
31
32
|
from sqlalchemy import orm
|
|
32
33
|
from tenacity import retry, stop_after_attempt, wait_exponential_jitter
|
|
@@ -71,6 +72,7 @@ class Env:
|
|
|
71
72
|
_db_server: Optional[pixeltable_pgserver.PostgresServer] # set only when running in local environment
|
|
72
73
|
_db_url: Optional[str]
|
|
73
74
|
_default_time_zone: Optional[ZoneInfo]
|
|
75
|
+
_verbosity: int
|
|
74
76
|
|
|
75
77
|
# info about optional packages that are utilized by some parts of the code
|
|
76
78
|
__optional_packages: dict[str, PackageInfo]
|
|
@@ -218,10 +220,18 @@ class Env:
|
|
|
218
220
|
"""
|
|
219
221
|
This is not a publicly visible setter; it is only for testing purposes.
|
|
220
222
|
"""
|
|
221
|
-
|
|
223
|
+
if tz is None:
|
|
224
|
+
tz_name = self._get_tz_name()
|
|
225
|
+
else:
|
|
226
|
+
assert isinstance(tz, ZoneInfo)
|
|
227
|
+
tz_name = tz.key
|
|
222
228
|
self.engine.dispose()
|
|
223
229
|
self._create_engine(time_zone_name=tz_name)
|
|
224
230
|
|
|
231
|
+
@property
|
|
232
|
+
def verbosity(self) -> int:
|
|
233
|
+
return self._verbosity
|
|
234
|
+
|
|
225
235
|
@property
|
|
226
236
|
def conn(self) -> Optional[sql.Connection]:
|
|
227
237
|
assert self._current_conn is not None
|
|
@@ -237,6 +247,11 @@ class Env:
|
|
|
237
247
|
assert self._dbms is not None
|
|
238
248
|
return self._dbms
|
|
239
249
|
|
|
250
|
+
@property
|
|
251
|
+
def is_using_cockroachdb(self) -> bool:
|
|
252
|
+
assert self._dbms is not None
|
|
253
|
+
return isinstance(self._dbms, CockroachDbms)
|
|
254
|
+
|
|
240
255
|
@property
|
|
241
256
|
def in_xact(self) -> bool:
|
|
242
257
|
return self._current_conn is not None
|
|
@@ -247,7 +262,7 @@ class Env:
|
|
|
247
262
|
return self._db_server is not None
|
|
248
263
|
|
|
249
264
|
@contextmanager
|
|
250
|
-
def begin_xact(self, for_write: bool = False) -> Iterator[sql.Connection]:
|
|
265
|
+
def begin_xact(self, *, for_write: bool = False) -> Iterator[sql.Connection]:
|
|
251
266
|
"""
|
|
252
267
|
Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly.
|
|
253
268
|
|
|
@@ -350,6 +365,26 @@ class Env:
|
|
|
350
365
|
def console_logger(self) -> ConsoleLogger:
|
|
351
366
|
return self._console_logger
|
|
352
367
|
|
|
368
|
+
def _get_tz_name(self) -> str:
|
|
369
|
+
"""Get the time zone name from the configuration, or the system local time zone if not specified.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
str: The time zone name.
|
|
373
|
+
"""
|
|
374
|
+
tz_name = Config.get().get_string_value('time_zone')
|
|
375
|
+
if tz_name is not None:
|
|
376
|
+
# Validate tzname
|
|
377
|
+
if not isinstance(tz_name, str):
|
|
378
|
+
self._logger.error('Invalid time zone specified in configuration.')
|
|
379
|
+
else:
|
|
380
|
+
try:
|
|
381
|
+
_ = ZoneInfo(tz_name)
|
|
382
|
+
except ZoneInfoNotFoundError:
|
|
383
|
+
self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
|
|
384
|
+
else:
|
|
385
|
+
tz_name = tzlocal.get_localzone_name()
|
|
386
|
+
return tz_name
|
|
387
|
+
|
|
353
388
|
def _set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
|
|
354
389
|
if self._initialized:
|
|
355
390
|
return
|
|
@@ -393,10 +428,12 @@ class Env:
|
|
|
393
428
|
warnings.simplefilter('ignore', category=UserWarning)
|
|
394
429
|
warnings.simplefilter('ignore', category=FutureWarning)
|
|
395
430
|
|
|
396
|
-
# Set
|
|
397
|
-
|
|
431
|
+
# Set verbosity level for user visible console messages
|
|
432
|
+
self._verbosity = config.get_int_value('verbosity')
|
|
433
|
+
if self._verbosity is None:
|
|
434
|
+
self._verbosity = 1
|
|
398
435
|
stdout_handler = ConsoleOutputHandler(stream=stdout)
|
|
399
|
-
stdout_handler.setLevel(
|
|
436
|
+
stdout_handler.setLevel(map_level(self._verbosity))
|
|
400
437
|
stdout_handler.addFilter(ConsoleMessageFilter())
|
|
401
438
|
self._logger.addHandler(stdout_handler)
|
|
402
439
|
self._console_logger = ConsoleLogger(self._logger)
|
|
@@ -430,6 +467,7 @@ class Env:
|
|
|
430
467
|
http_logger.propagate = False
|
|
431
468
|
|
|
432
469
|
self.clear_tmp_dir()
|
|
470
|
+
tz_name = self._get_tz_name()
|
|
433
471
|
|
|
434
472
|
# configure pixeltable database
|
|
435
473
|
self._init_db(config)
|
|
@@ -439,22 +477,10 @@ class Env:
|
|
|
439
477
|
'Reinitializing pixeltable database is not supported when running in non-local environment'
|
|
440
478
|
)
|
|
441
479
|
|
|
442
|
-
tz_name = config.get_string_value('time_zone')
|
|
443
|
-
if tz_name is not None:
|
|
444
|
-
# Validate tzname
|
|
445
|
-
if not isinstance(tz_name, str):
|
|
446
|
-
self._logger.error('Invalid time zone specified in configuration.')
|
|
447
|
-
else:
|
|
448
|
-
try:
|
|
449
|
-
_ = ZoneInfo(tz_name)
|
|
450
|
-
except ZoneInfoNotFoundError:
|
|
451
|
-
self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
|
|
452
|
-
|
|
453
480
|
if reinit_db and self._store_db_exists():
|
|
454
481
|
self._drop_store_db()
|
|
455
482
|
|
|
456
483
|
create_db = not self._store_db_exists()
|
|
457
|
-
|
|
458
484
|
if create_db:
|
|
459
485
|
self._logger.info(f'creating database at: {self.db_url}')
|
|
460
486
|
self._create_store_db()
|
|
@@ -534,13 +560,16 @@ class Env:
|
|
|
534
560
|
metadata.schema.base_metadata.create_all(self._sa_engine, checkfirst=True)
|
|
535
561
|
metadata.create_system_info(self._sa_engine)
|
|
536
562
|
|
|
537
|
-
def _create_engine(self, time_zone_name:
|
|
538
|
-
connect_args = {
|
|
563
|
+
def _create_engine(self, time_zone_name: str, echo: bool = False) -> None:
|
|
564
|
+
connect_args = {'options': f'-c timezone={time_zone_name}'}
|
|
565
|
+
self._logger.info(f'Creating SQLAlchemy engine with connection arguments: {connect_args}')
|
|
539
566
|
self._sa_engine = sql.create_engine(
|
|
540
567
|
self.db_url, echo=echo, isolation_level=self._dbms.transaction_isolation_level, connect_args=connect_args
|
|
541
568
|
)
|
|
542
569
|
|
|
543
570
|
self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
|
|
571
|
+
self._logger.info(f'Engine dialect: {self._sa_engine.dialect.name}')
|
|
572
|
+
self._logger.info(f'Engine driver : {self._sa_engine.dialect.driver}')
|
|
544
573
|
|
|
545
574
|
with self.engine.begin() as conn:
|
|
546
575
|
tz_name = conn.execute(sql.text('SHOW TIME ZONE')).scalar()
|
pixeltable/exec/__init__.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from .aggregation_node import AggregationNode
|
|
4
4
|
from .cache_prefetch_node import CachePrefetchNode
|
|
5
|
+
from .cell_materialization_node import CellMaterializationNode
|
|
6
|
+
from .cell_reconstruction_node import CellReconstructionNode
|
|
5
7
|
from .component_iteration_node import ComponentIterationNode
|
|
6
8
|
from .data_row_batch import DataRowBatch
|
|
7
9
|
from .exec_context import ExecContext
|