pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -0
- pixeltable/catalog/catalog.py +125 -63
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +4 -0
- pixeltable/catalog/table_version.py +174 -117
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/table_version_path.py +0 -11
- pixeltable/catalog/view.py +6 -0
- pixeltable/config.py +7 -0
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +56 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +23 -18
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +110 -28
- pixeltable/globals.py +10 -4
- pixeltable/io/globals.py +18 -17
- pixeltable/io/parquet.py +1 -1
- pixeltable/io/table_data_conduit.py +47 -22
- pixeltable/iterators/document.py +61 -23
- pixeltable/iterators/video.py +126 -53
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/packager.py +155 -26
- pixeltable/store.py +2 -3
- pixeltable/type_system.py +5 -3
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/av.py +65 -0
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +16 -1
- pixeltable/utils/s3_store.py +44 -11
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,6 +11,7 @@ from uuid import UUID
|
|
|
11
11
|
|
|
12
12
|
import jsonschema.exceptions
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
+
from sqlalchemy import exc as sql_exc
|
|
14
15
|
|
|
15
16
|
import pixeltable as pxt
|
|
16
17
|
import pixeltable.exceptions as excs
|
|
@@ -21,20 +22,16 @@ from pixeltable.metadata import schema
|
|
|
21
22
|
from pixeltable.utils.filecache import FileCache
|
|
22
23
|
from pixeltable.utils.object_stores import ObjectOps
|
|
23
24
|
|
|
24
|
-
from .tbl_ops import TableOp
|
|
25
|
-
|
|
26
|
-
if TYPE_CHECKING:
|
|
27
|
-
from pixeltable.plan import SampleClause
|
|
28
|
-
|
|
29
25
|
from ..func.globals import resolve_symbol
|
|
30
26
|
from .column import Column
|
|
31
|
-
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
|
|
27
|
+
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
|
|
28
|
+
from .tbl_ops import TableOp
|
|
32
29
|
from .update_status import RowCountStats, UpdateStatus
|
|
33
30
|
|
|
34
31
|
if TYPE_CHECKING:
|
|
35
32
|
from pixeltable import exec, store
|
|
36
|
-
|
|
37
|
-
from .
|
|
33
|
+
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
34
|
+
from pixeltable.plan import SampleClause
|
|
38
35
|
|
|
39
36
|
_logger = logging.getLogger('pixeltable')
|
|
40
37
|
|
|
@@ -193,9 +190,7 @@ class TableVersion:
|
|
|
193
190
|
"""Create a snapshot copy of this TableVersion"""
|
|
194
191
|
assert not self.is_snapshot
|
|
195
192
|
base = self.path.base.tbl_version if self.is_view else None
|
|
196
|
-
return TableVersion(
|
|
197
|
-
self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, mutable_views=[], base=base
|
|
198
|
-
)
|
|
193
|
+
return TableVersion(self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, [], base=base)
|
|
199
194
|
|
|
200
195
|
@property
|
|
201
196
|
def versioned_name(self) -> str:
|
|
@@ -204,6 +199,12 @@ class TableVersion:
|
|
|
204
199
|
else:
|
|
205
200
|
return f'{self.name}:{self.effective_version}'
|
|
206
201
|
|
|
202
|
+
def __repr__(self) -> str:
|
|
203
|
+
return (
|
|
204
|
+
f'TableVersion(id={self.id!r}, name={self.name!r}, '
|
|
205
|
+
f'version={self.version}, effective_version={self.effective_version})'
|
|
206
|
+
)
|
|
207
|
+
|
|
207
208
|
@property
|
|
208
209
|
def handle(self) -> 'TableVersionHandle':
|
|
209
210
|
from .table_version_handle import TableVersionHandle
|
|
@@ -290,11 +291,18 @@ class TableVersion:
|
|
|
290
291
|
comment: str,
|
|
291
292
|
media_validation: MediaValidation,
|
|
292
293
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
293
|
-
|
|
294
|
+
initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
|
|
294
295
|
cat = pxt.catalog.Catalog.get()
|
|
295
296
|
|
|
296
|
-
tbl_id = UUID(hex=
|
|
297
|
-
|
|
297
|
+
tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
|
|
298
|
+
assert (tbl_id, None) not in cat._tbl_versions
|
|
299
|
+
tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
|
|
300
|
+
|
|
301
|
+
@cat.register_undo_action
|
|
302
|
+
def _() -> None:
|
|
303
|
+
if (tbl_id, None) in cat._tbl_versions:
|
|
304
|
+
del cat._tbl_versions[tbl_id, None]
|
|
305
|
+
|
|
298
306
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
299
307
|
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
300
308
|
tbl_version.init()
|
|
@@ -308,8 +316,8 @@ class TableVersion:
|
|
|
308
316
|
tbl_id=tbl_id,
|
|
309
317
|
dir_id=dir_id,
|
|
310
318
|
tbl_md=tbl_version.tbl_md,
|
|
311
|
-
version_md=
|
|
312
|
-
schema_version_md=
|
|
319
|
+
version_md=initial_md.version_md,
|
|
320
|
+
schema_version_md=initial_md.schema_version_md,
|
|
313
321
|
)
|
|
314
322
|
return tbl_id, tbl_version
|
|
315
323
|
|
|
@@ -336,11 +344,14 @@ class TableVersion:
|
|
|
336
344
|
|
|
337
345
|
@classmethod
|
|
338
346
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
347
|
+
from .catalog import TableVersionPath
|
|
348
|
+
|
|
339
349
|
assert Env.get().in_xact
|
|
350
|
+
assert md.tbl_md.is_replica
|
|
340
351
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
341
352
|
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
342
353
|
view_md = md.tbl_md.view_md
|
|
343
|
-
base_path =
|
|
354
|
+
base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
344
355
|
base = base_path.tbl_version if base_path is not None else None
|
|
345
356
|
tbl_version = cls(
|
|
346
357
|
tbl_id,
|
|
@@ -405,8 +416,8 @@ class TableVersion:
|
|
|
405
416
|
def _init_schema(self) -> None:
|
|
406
417
|
# create columns first, so the indices can reference them
|
|
407
418
|
self._init_cols()
|
|
408
|
-
|
|
409
|
-
|
|
419
|
+
self._init_idxs()
|
|
420
|
+
|
|
410
421
|
# create the sa schema only after creating the columns and indices
|
|
411
422
|
self._init_sa_schema()
|
|
412
423
|
|
|
@@ -444,39 +455,70 @@ class TableVersion:
|
|
|
444
455
|
# self._record_refd_columns(col)
|
|
445
456
|
|
|
446
457
|
def _init_idxs(self) -> None:
|
|
447
|
-
# self.idx_md = tbl_md.index_md
|
|
448
|
-
self.idxs_by_name = {}
|
|
449
|
-
import pixeltable.index as index_module
|
|
450
|
-
|
|
451
458
|
for md in self.tbl_md.index_md.values():
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
):
|
|
455
|
-
# index not visible in this schema version
|
|
456
|
-
continue
|
|
457
|
-
|
|
458
|
-
# instantiate index object
|
|
459
|
+
# Instantiate index object. This needs to be done for all indices, even those that are not active in this
|
|
460
|
+
# TableVersion, so that we can make appropriate adjustments to the SA schema.
|
|
459
461
|
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
460
|
-
cls = getattr(
|
|
461
|
-
idx_col
|
|
462
|
-
|
|
463
|
-
# this is a reference to one of our columns: avoid TVP.get_column_by_id() here, because we're not fully
|
|
464
|
-
# initialized yet
|
|
465
|
-
idx_col = self.cols_by_id[md.indexed_col_id]
|
|
466
|
-
else:
|
|
467
|
-
assert self.path.base is not None
|
|
468
|
-
idx_col = self.path.base.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
|
|
462
|
+
cls = getattr(index, cls_name)
|
|
463
|
+
idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
|
|
464
|
+
assert idx_col is not None
|
|
469
465
|
idx = cls.from_dict(idx_col, md.init_args)
|
|
470
466
|
|
|
471
467
|
# fix up the sa column type of the index value and undo columns
|
|
472
|
-
|
|
468
|
+
# we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
|
|
469
|
+
# the correct SA schema in the StoreTable.
|
|
470
|
+
val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
|
|
473
471
|
val_col.sa_col_type = idx.index_sa_type()
|
|
474
|
-
|
|
475
|
-
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
472
|
+
undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
|
|
476
473
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
474
|
+
if not isinstance(idx, index.EmbeddingIndex):
|
|
475
|
+
# Historically, the intent has been not to store cellmd data, even for embedding indices. However,
|
|
476
|
+
# the cellmd columns get created anyway, even if stores_cellmd is set to `False` here, due to the
|
|
477
|
+
# timing of index column creation. In order to ensure that SA schemas align with what is actually in
|
|
478
|
+
# the physical tables, we keep this `True` for embedding indices.
|
|
479
|
+
# TODO: Decide whether index columns should store cellmd data.
|
|
480
|
+
# - If not, set to `False`, fix the column creation timing issue, and add a migration script to
|
|
481
|
+
# remedy existing cellmd columns.
|
|
482
|
+
# - If so, remove this TODO.
|
|
483
|
+
val_col._stores_cellmd = False
|
|
477
484
|
undo_col._stores_cellmd = False
|
|
478
|
-
|
|
479
|
-
|
|
485
|
+
|
|
486
|
+
# The index is active in this TableVersion provided that:
|
|
487
|
+
# (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
|
|
488
|
+
# the head version); and
|
|
489
|
+
# (ii) the index was created on or before the schema version of this TableVersion; and
|
|
490
|
+
# (iii) the index was not dropped on or before the schema version of this TableVersion.
|
|
491
|
+
supports_idxs = self.effective_version is None or (
|
|
492
|
+
self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
|
|
493
|
+
)
|
|
494
|
+
if (
|
|
495
|
+
supports_idxs
|
|
496
|
+
and md.schema_version_add <= self.schema_version
|
|
497
|
+
and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
|
|
498
|
+
):
|
|
499
|
+
# Since the index is present in this TableVersion, its associated columns must be as well.
|
|
500
|
+
# Sanity-check this.
|
|
501
|
+
assert md.indexed_col_id in self.cols_by_id
|
|
502
|
+
assert md.index_val_col_id in self.cols_by_id
|
|
503
|
+
assert md.index_val_undo_col_id in self.cols_by_id
|
|
504
|
+
idx_info = self.IndexInfo(
|
|
505
|
+
id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col
|
|
506
|
+
)
|
|
507
|
+
self.idxs_by_name[md.name] = idx_info
|
|
508
|
+
|
|
509
|
+
def _lookup_column(self, id: QColumnId) -> Column | None:
|
|
510
|
+
"""
|
|
511
|
+
Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
|
|
512
|
+
to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
|
|
513
|
+
|
|
514
|
+
This will search through *all* known columns, including columns that are not visible in this TableVersion.
|
|
515
|
+
"""
|
|
516
|
+
if id.tbl_id == self.id:
|
|
517
|
+
return next(col for col in self.cols if col.id == id.col_id)
|
|
518
|
+
elif self.base is not None:
|
|
519
|
+
return self.base.get()._lookup_column(id)
|
|
520
|
+
else:
|
|
521
|
+
return None
|
|
480
522
|
|
|
481
523
|
def _init_sa_schema(self) -> None:
|
|
482
524
|
# create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
|
|
@@ -507,9 +549,7 @@ class TableVersion:
|
|
|
507
549
|
|
|
508
550
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
509
551
|
# we're creating a new schema version
|
|
510
|
-
self.
|
|
511
|
-
self.created_at = time.time()
|
|
512
|
-
self.schema_version = self.version
|
|
552
|
+
self.bump_version(bump_schema_version=True)
|
|
513
553
|
status = self._add_index(col, idx_name, idx)
|
|
514
554
|
self._write_md(new_version=True, new_schema_version=True)
|
|
515
555
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
@@ -620,9 +660,7 @@ class TableVersion:
|
|
|
620
660
|
assert idx_id in self._tbl_md.index_md
|
|
621
661
|
|
|
622
662
|
# we're creating a new schema version
|
|
623
|
-
self.
|
|
624
|
-
self.created_at = time.time()
|
|
625
|
-
self.schema_version = self.version
|
|
663
|
+
self.bump_version(bump_schema_version=True)
|
|
626
664
|
idx_md = self._tbl_md.index_md[idx_id]
|
|
627
665
|
idx_md.schema_version_drop = self.schema_version
|
|
628
666
|
assert idx_md.name in self.idxs_by_name
|
|
@@ -651,9 +689,7 @@ class TableVersion:
|
|
|
651
689
|
self.next_col_id += 1
|
|
652
690
|
|
|
653
691
|
# we're creating a new schema version
|
|
654
|
-
self.
|
|
655
|
-
self.created_at = time.time()
|
|
656
|
-
self.schema_version = self.version
|
|
692
|
+
self.bump_version(bump_schema_version=True)
|
|
657
693
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
658
694
|
all_cols: list[Column] = []
|
|
659
695
|
for col in cols:
|
|
@@ -685,7 +721,11 @@ class TableVersion:
|
|
|
685
721
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
686
722
|
) -> UpdateStatus:
|
|
687
723
|
"""Add and populate columns within the current transaction"""
|
|
724
|
+
from pixeltable.catalog import Catalog
|
|
725
|
+
from pixeltable.plan import Planner
|
|
726
|
+
|
|
688
727
|
cols_to_add = list(cols)
|
|
728
|
+
|
|
689
729
|
row_count = self.store_tbl.count()
|
|
690
730
|
for col in cols_to_add:
|
|
691
731
|
assert col.tbl is self
|
|
@@ -722,17 +762,19 @@ class TableVersion:
|
|
|
722
762
|
continue
|
|
723
763
|
|
|
724
764
|
# populate the column
|
|
725
|
-
from pixeltable.plan import Planner
|
|
726
|
-
|
|
727
765
|
plan = Planner.create_add_column_plan(self.path, col)
|
|
728
766
|
plan.ctx.num_rows = row_count
|
|
729
767
|
try:
|
|
730
768
|
plan.open()
|
|
731
769
|
try:
|
|
732
770
|
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
733
|
-
except
|
|
734
|
-
|
|
735
|
-
|
|
771
|
+
except sql_exc.DBAPIError as exc:
|
|
772
|
+
Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
|
|
773
|
+
# If it wasn't converted, re-raise as a generic Pixeltable error
|
|
774
|
+
# (this means it's not a known concurrency error; it's something else)
|
|
775
|
+
raise excs.Error(
|
|
776
|
+
f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
|
|
777
|
+
) from exc
|
|
736
778
|
if excs_per_col > 0:
|
|
737
779
|
cols_with_excs.append(col)
|
|
738
780
|
num_excs += excs_per_col
|
|
@@ -740,7 +782,7 @@ class TableVersion:
|
|
|
740
782
|
finally:
|
|
741
783
|
plan.close()
|
|
742
784
|
|
|
743
|
-
|
|
785
|
+
Catalog.get().record_column_dependencies(self)
|
|
744
786
|
|
|
745
787
|
if print_stats:
|
|
746
788
|
plan.ctx.profile.print(num_rows=row_count)
|
|
@@ -760,9 +802,7 @@ class TableVersion:
|
|
|
760
802
|
assert self.is_mutable
|
|
761
803
|
|
|
762
804
|
# we're creating a new schema version
|
|
763
|
-
self.
|
|
764
|
-
self.created_at = time.time()
|
|
765
|
-
self.schema_version = self.version
|
|
805
|
+
self.bump_version(bump_schema_version=True)
|
|
766
806
|
|
|
767
807
|
# drop this column and all dependent index columns and indices
|
|
768
808
|
dropped_cols = [col]
|
|
@@ -826,9 +866,7 @@ class TableVersion:
|
|
|
826
866
|
self._schema_version_md.columns[col.id].name = new_name
|
|
827
867
|
|
|
828
868
|
# we're creating a new schema version
|
|
829
|
-
self.
|
|
830
|
-
self.created_at = time.time()
|
|
831
|
-
self.schema_version = self.version
|
|
869
|
+
self.bump_version(bump_schema_version=True)
|
|
832
870
|
|
|
833
871
|
self._write_md(new_version=True, new_schema_version=True)
|
|
834
872
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
@@ -848,9 +886,7 @@ class TableVersion:
|
|
|
848
886
|
|
|
849
887
|
def _create_schema_version(self) -> None:
|
|
850
888
|
# we're creating a new schema version
|
|
851
|
-
self.
|
|
852
|
-
self.created_at = time.time()
|
|
853
|
-
self.schema_version = self.version
|
|
889
|
+
self.bump_version(bump_schema_version=True)
|
|
854
890
|
self._write_md(new_version=True, new_schema_version=True)
|
|
855
891
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
856
892
|
|
|
@@ -897,8 +933,7 @@ class TableVersion:
|
|
|
897
933
|
) -> UpdateStatus:
|
|
898
934
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
899
935
|
# we're creating a new version
|
|
900
|
-
self.
|
|
901
|
-
self.created_at = timestamp
|
|
936
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
902
937
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
903
938
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
904
939
|
)
|
|
@@ -933,10 +968,11 @@ class TableVersion:
|
|
|
933
968
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
934
969
|
including within views.
|
|
935
970
|
"""
|
|
936
|
-
|
|
937
|
-
|
|
971
|
+
from pixeltable.exprs import SqlElementCache
|
|
938
972
|
from pixeltable.plan import Planner
|
|
939
973
|
|
|
974
|
+
assert self.is_mutable
|
|
975
|
+
|
|
940
976
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
941
977
|
if where is not None:
|
|
942
978
|
if not isinstance(where, exprs.Expr):
|
|
@@ -947,7 +983,6 @@ class TableVersion:
|
|
|
947
983
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
948
984
|
|
|
949
985
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
950
|
-
from pixeltable.exprs import SqlElementCache
|
|
951
986
|
|
|
952
987
|
result = self.propagate_update(
|
|
953
988
|
plan,
|
|
@@ -974,11 +1009,11 @@ class TableVersion:
|
|
|
974
1009
|
batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
|
|
975
1010
|
rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
|
|
976
1011
|
"""
|
|
1012
|
+
from pixeltable.plan import Planner
|
|
1013
|
+
|
|
977
1014
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
978
1015
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
979
1016
|
|
|
980
|
-
from pixeltable.plan import Planner
|
|
981
|
-
|
|
982
1017
|
plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
|
|
983
1018
|
self.path, batch, rowids, cascade=cascade
|
|
984
1019
|
)
|
|
@@ -1050,14 +1085,14 @@ class TableVersion:
|
|
|
1050
1085
|
def recompute_columns(
|
|
1051
1086
|
self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
|
|
1052
1087
|
) -> UpdateStatus:
|
|
1088
|
+
from pixeltable.exprs import CompoundPredicate, SqlElementCache
|
|
1089
|
+
from pixeltable.plan import Planner
|
|
1090
|
+
|
|
1053
1091
|
assert self.is_mutable
|
|
1054
1092
|
assert all(name in self.cols_by_name for name in col_names)
|
|
1055
1093
|
assert len(col_names) > 0
|
|
1056
1094
|
assert len(col_names) == 1 or not errors_only
|
|
1057
1095
|
|
|
1058
|
-
from pixeltable.exprs import CompoundPredicate
|
|
1059
|
-
from pixeltable.plan import Planner
|
|
1060
|
-
|
|
1061
1096
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1062
1097
|
where_clause: Optional[exprs.Expr] = None
|
|
1063
1098
|
if where is not None:
|
|
@@ -1072,7 +1107,6 @@ class TableVersion:
|
|
|
1072
1107
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1073
1108
|
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1074
1109
|
)
|
|
1075
|
-
from pixeltable.exprs import SqlElementCache
|
|
1076
1110
|
|
|
1077
1111
|
result = self.propagate_update(
|
|
1078
1112
|
plan,
|
|
@@ -1096,11 +1130,14 @@ class TableVersion:
|
|
|
1096
1130
|
cascade: bool,
|
|
1097
1131
|
show_progress: bool = True,
|
|
1098
1132
|
) -> UpdateStatus:
|
|
1133
|
+
from pixeltable.catalog import Catalog
|
|
1134
|
+
from pixeltable.plan import Planner
|
|
1135
|
+
|
|
1136
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1099
1137
|
result = UpdateStatus()
|
|
1100
1138
|
create_new_table_version = plan is not None
|
|
1101
1139
|
if create_new_table_version:
|
|
1102
|
-
self.
|
|
1103
|
-
self.created_at = timestamp
|
|
1140
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1104
1141
|
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1105
1142
|
plan, v_min=self.version, show_progress=show_progress
|
|
1106
1143
|
)
|
|
@@ -1119,8 +1156,6 @@ class TableVersion:
|
|
|
1119
1156
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
|
|
1120
1157
|
plan = None
|
|
1121
1158
|
if len(recomputed_cols) > 0:
|
|
1122
|
-
from pixeltable.plan import Planner
|
|
1123
|
-
|
|
1124
1159
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
1125
1160
|
status = view.get().propagate_update(
|
|
1126
1161
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
@@ -1155,6 +1190,10 @@ class TableVersion:
|
|
|
1155
1190
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1156
1191
|
) -> UpdateStatus:
|
|
1157
1192
|
"""Delete rows in this table and propagate to views"""
|
|
1193
|
+
from pixeltable.catalog import Catalog
|
|
1194
|
+
|
|
1195
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1196
|
+
|
|
1158
1197
|
# print(f'calling sql_expr()')
|
|
1159
1198
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1160
1199
|
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
@@ -1171,8 +1210,7 @@ class TableVersion:
|
|
|
1171
1210
|
result = UpdateStatus(row_count_stats=row_counts)
|
|
1172
1211
|
if del_rows > 0:
|
|
1173
1212
|
# we're creating a new version
|
|
1174
|
-
self.
|
|
1175
|
-
self.created_at = timestamp
|
|
1213
|
+
self.bump_version(timestamp, bump_schema_version=False)
|
|
1176
1214
|
for view in self.mutable_views:
|
|
1177
1215
|
status = view.get().propagate_delete(
|
|
1178
1216
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
@@ -1198,6 +1236,8 @@ class TableVersion:
|
|
|
1198
1236
|
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1199
1237
|
and relies on Catalog to reload it
|
|
1200
1238
|
"""
|
|
1239
|
+
from pixeltable.catalog import Catalog
|
|
1240
|
+
|
|
1201
1241
|
conn = Env.get().conn
|
|
1202
1242
|
# make sure we don't have a snapshot referencing this version
|
|
1203
1243
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1217,8 +1257,6 @@ class TableVersion:
|
|
|
1217
1257
|
)
|
|
1218
1258
|
)
|
|
1219
1259
|
|
|
1220
|
-
# delete newly-added data
|
|
1221
|
-
self.delete_media(tbl_version=self.version)
|
|
1222
1260
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1223
1261
|
|
|
1224
1262
|
# revert new deletions
|
|
@@ -1233,6 +1271,8 @@ class TableVersion:
|
|
|
1233
1271
|
# revert schema changes:
|
|
1234
1272
|
# - undo changes to self._tbl_md and write that back
|
|
1235
1273
|
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1274
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1275
|
+
old_version = self.version
|
|
1236
1276
|
if self.version == self.schema_version:
|
|
1237
1277
|
# physically delete newly-added columns and remove them from the stored md
|
|
1238
1278
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
@@ -1279,18 +1319,22 @@ class TableVersion:
|
|
|
1279
1319
|
.where(schema.TableVersion.version == self.version)
|
|
1280
1320
|
)
|
|
1281
1321
|
|
|
1282
|
-
self.version
|
|
1322
|
+
self._tbl_md.current_version = self._version_md.version = self.version - 1
|
|
1323
|
+
|
|
1283
1324
|
self._write_md(new_version=False, new_schema_version=False)
|
|
1284
1325
|
|
|
1285
1326
|
# propagate to views
|
|
1286
|
-
views_str = ', '.join([str(v.id) for v in self.mutable_views])
|
|
1287
|
-
print(f'revert(): mutable_views={views_str}')
|
|
1288
1327
|
for view in self.mutable_views:
|
|
1289
1328
|
view.get()._revert()
|
|
1290
1329
|
|
|
1291
1330
|
# force reload on next operation
|
|
1292
1331
|
self.is_validated = False
|
|
1293
|
-
|
|
1332
|
+
Catalog.get().remove_tbl_version(self)
|
|
1333
|
+
|
|
1334
|
+
# delete newly-added data
|
|
1335
|
+
# Do this at the end, after all DB operations have completed.
|
|
1336
|
+
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1337
|
+
self.delete_media(tbl_version=old_version)
|
|
1294
1338
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1295
1339
|
|
|
1296
1340
|
def _init_external_stores(self) -> None:
|
|
@@ -1301,9 +1345,7 @@ class TableVersion:
|
|
|
1301
1345
|
self.external_stores[store.name] = store
|
|
1302
1346
|
|
|
1303
1347
|
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1304
|
-
self.
|
|
1305
|
-
self.created_at = time.time()
|
|
1306
|
-
self.schema_version = self.version
|
|
1348
|
+
self.bump_version(bump_schema_version=True)
|
|
1307
1349
|
|
|
1308
1350
|
self.external_stores[store.name] = store
|
|
1309
1351
|
self._tbl_md.external_stores.append(
|
|
@@ -1313,9 +1355,7 @@ class TableVersion:
|
|
|
1313
1355
|
|
|
1314
1356
|
def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1315
1357
|
del self.external_stores[store.name]
|
|
1316
|
-
self.
|
|
1317
|
-
self.created_at = time.time()
|
|
1318
|
-
self.schema_version = self.version
|
|
1358
|
+
self.bump_version(bump_schema_version=True)
|
|
1319
1359
|
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1320
1360
|
self._tbl_md.external_stores.pop(idx)
|
|
1321
1361
|
self._write_md(new_version=True, new_schema_version=True)
|
|
@@ -1371,35 +1411,52 @@ class TableVersion:
|
|
|
1371
1411
|
# if this is a snapshot instance, we need to ignore current_version
|
|
1372
1412
|
return self._tbl_md.current_version if self.effective_version is None else self.effective_version
|
|
1373
1413
|
|
|
1374
|
-
@version.setter
|
|
1375
|
-
def version(self, version: int) -> None:
|
|
1376
|
-
assert self.effective_version is None
|
|
1377
|
-
self._tbl_md.current_version = version
|
|
1378
|
-
self._version_md.version = version
|
|
1379
|
-
|
|
1380
1414
|
@property
|
|
1381
1415
|
def created_at(self) -> float:
|
|
1382
1416
|
return self._version_md.created_at
|
|
1383
1417
|
|
|
1384
|
-
@created_at.setter
|
|
1385
|
-
def created_at(self, ts: float) -> None:
|
|
1386
|
-
assert self.effective_version is None
|
|
1387
|
-
self._version_md.created_at = ts
|
|
1388
|
-
|
|
1389
1418
|
@property
|
|
1390
1419
|
def schema_version(self) -> int:
|
|
1391
1420
|
return self._schema_version_md.schema_version
|
|
1392
1421
|
|
|
1393
|
-
|
|
1394
|
-
|
|
1422
|
+
def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
|
|
1423
|
+
"""
|
|
1424
|
+
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1425
|
+
_write_md() must be called separately to persist the changes.
|
|
1426
|
+
|
|
1427
|
+
Args:
|
|
1428
|
+
timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
|
|
1429
|
+
to the same timestamp. If `None`, then defaults to `time.time()`.
|
|
1430
|
+
bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
|
|
1431
|
+
and associated metadata.
|
|
1432
|
+
"""
|
|
1433
|
+
from pixeltable.catalog import Catalog
|
|
1434
|
+
|
|
1395
1435
|
assert self.effective_version is None
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1436
|
+
|
|
1437
|
+
if timestamp is None:
|
|
1438
|
+
timestamp = time.time()
|
|
1439
|
+
|
|
1440
|
+
Catalog.get().mark_modified_tvs(self.handle)
|
|
1441
|
+
|
|
1442
|
+
old_version = self._tbl_md.current_version
|
|
1443
|
+
assert self._version_md.version == old_version
|
|
1444
|
+
new_version = old_version + 1
|
|
1445
|
+
self._tbl_md.current_version = new_version
|
|
1446
|
+
self._version_md.version = new_version
|
|
1447
|
+
self._version_md.created_at = timestamp
|
|
1448
|
+
|
|
1449
|
+
if bump_schema_version:
|
|
1450
|
+
old_schema_version = self._tbl_md.current_schema_version
|
|
1451
|
+
assert self._version_md.schema_version == old_schema_version
|
|
1452
|
+
assert self._schema_version_md.schema_version == old_schema_version
|
|
1453
|
+
self._tbl_md.current_schema_version = new_version
|
|
1454
|
+
self._version_md.schema_version = new_version
|
|
1455
|
+
self._schema_version_md.preceding_schema_version = old_schema_version
|
|
1456
|
+
self._schema_version_md.schema_version = new_version
|
|
1400
1457
|
|
|
1401
1458
|
@property
|
|
1402
|
-
def preceding_schema_version(self) -> int:
|
|
1459
|
+
def preceding_schema_version(self) -> Optional[int]:
|
|
1403
1460
|
return self._schema_version_md.preceding_schema_version
|
|
1404
1461
|
|
|
1405
1462
|
@property
|
|
@@ -1531,8 +1588,8 @@ class TableVersion:
|
|
|
1531
1588
|
|
|
1532
1589
|
@classmethod
|
|
1533
1590
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1534
|
-
from pixeltable import
|
|
1591
|
+
from pixeltable.catalog import Catalog
|
|
1535
1592
|
|
|
1536
1593
|
id = UUID(d['id'])
|
|
1537
1594
|
effective_version = d['effective_version']
|
|
1538
|
-
return
|
|
1595
|
+
return Catalog.get().get_tbl_version(id, effective_version)
|
|
@@ -37,6 +37,9 @@ class TableVersionHandle:
|
|
|
37
37
|
def __hash__(self) -> int:
|
|
38
38
|
return hash((self.id, self.effective_version))
|
|
39
39
|
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
|
|
42
|
+
|
|
40
43
|
@property
|
|
41
44
|
def is_snapshot(self) -> bool:
|
|
42
45
|
return self.effective_version is not None
|
|
@@ -81,7 +84,7 @@ class ColumnHandle:
|
|
|
81
84
|
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
85
|
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
86
|
raise excs.Error(
|
|
84
|
-
f'Column
|
|
87
|
+
f'Column was dropped (no record for column ID {self.col_id} in table '
|
|
85
88
|
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
89
|
)
|
|
87
90
|
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
@@ -195,17 +195,6 @@ class TableVersionPath:
|
|
|
195
195
|
else:
|
|
196
196
|
return None
|
|
197
197
|
|
|
198
|
-
def get_column_by_id(self, tbl_id: UUID, col_id: int) -> Optional[Column]:
|
|
199
|
-
"""Return the column for the given tbl/col id"""
|
|
200
|
-
self.refresh_cached_md()
|
|
201
|
-
if self.tbl_version.id == tbl_id:
|
|
202
|
-
assert col_id in self._cached_tbl_version.cols_by_id
|
|
203
|
-
return self._cached_tbl_version.cols_by_id[col_id]
|
|
204
|
-
elif self.base is not None:
|
|
205
|
-
return self.base.get_column_by_id(tbl_id, col_id)
|
|
206
|
-
else:
|
|
207
|
-
return None
|
|
208
|
-
|
|
209
198
|
def has_column(self, col: Column) -> bool:
|
|
210
199
|
"""Return True if this table has the given column."""
|
|
211
200
|
assert col.tbl is not None
|
pixeltable/catalog/view.py
CHANGED
|
@@ -252,6 +252,12 @@ class View(Table):
|
|
|
252
252
|
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
253
253
|
)
|
|
254
254
|
|
|
255
|
+
def _is_named_pure_snapshot(self) -> bool:
|
|
256
|
+
"""
|
|
257
|
+
Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
|
|
258
|
+
"""
|
|
259
|
+
return self._id != self._tbl_version_path.tbl_id
|
|
260
|
+
|
|
255
261
|
def _is_anonymous_snapshot(self) -> bool:
|
|
256
262
|
"""
|
|
257
263
|
Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
|
pixeltable/config.py
CHANGED
|
@@ -163,6 +163,7 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
163
163
|
'api_key': 'API key for Pixeltable cloud',
|
|
164
164
|
'r2_profile': 'AWS config profile name used to access R2 storage',
|
|
165
165
|
's3_profile': 'AWS config profile name used to access S3 storage',
|
|
166
|
+
'b2_profile': 'S3-compatible profile name used to access Backblaze B2 storage',
|
|
166
167
|
},
|
|
167
168
|
'anthropic': {'api_key': 'Anthropic API key'},
|
|
168
169
|
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
|
@@ -181,6 +182,12 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
181
182
|
'api_version': 'API version if using Azure OpenAI',
|
|
182
183
|
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
183
184
|
},
|
|
185
|
+
'openrouter': {
|
|
186
|
+
'api_key': 'OpenRouter API key',
|
|
187
|
+
'site_url': 'Optional URL for your application (for OpenRouter analytics)',
|
|
188
|
+
'app_name': 'Optional name for your application (for OpenRouter analytics)',
|
|
189
|
+
'rate_limit': 'Rate limit for OpenRouter API requests',
|
|
190
|
+
},
|
|
184
191
|
'replicate': {'api_token': 'Replicate API token'},
|
|
185
192
|
'together': {
|
|
186
193
|
'api_key': 'Together API key',
|