pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
|
@@ -5,7 +5,7 @@ import importlib
|
|
|
5
5
|
import logging
|
|
6
6
|
import time
|
|
7
7
|
import uuid
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
11
|
import jsonschema.exceptions
|
|
@@ -13,12 +13,12 @@ import sqlalchemy as sql
|
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.exprs as exprs
|
|
17
|
-
import pixeltable.index as index
|
|
18
16
|
import pixeltable.type_system as ts
|
|
17
|
+
from pixeltable import exprs, index
|
|
19
18
|
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.iterators import ComponentIterator
|
|
21
20
|
from pixeltable.metadata import schema
|
|
21
|
+
from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
22
22
|
from pixeltable.utils.filecache import FileCache
|
|
23
23
|
from pixeltable.utils.media_store import MediaStore
|
|
24
24
|
|
|
@@ -54,7 +54,9 @@ class TableVersion:
|
|
|
54
54
|
|
|
55
55
|
id: UUID
|
|
56
56
|
name: str
|
|
57
|
+
user: Optional[str]
|
|
57
58
|
effective_version: Optional[int]
|
|
59
|
+
is_replica: bool
|
|
58
60
|
version: int
|
|
59
61
|
comment: str
|
|
60
62
|
media_validation: MediaValidation
|
|
@@ -108,8 +110,10 @@ class TableVersion:
|
|
|
108
110
|
):
|
|
109
111
|
self.id = id
|
|
110
112
|
self.name = tbl_md.name
|
|
113
|
+
self.user = tbl_md.user
|
|
111
114
|
self.effective_version = effective_version
|
|
112
115
|
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
116
|
+
self.is_replica = tbl_md.is_replica
|
|
113
117
|
self.comment = schema_version_md.comment
|
|
114
118
|
self.num_retained_versions = schema_version_md.num_retained_versions
|
|
115
119
|
self.schema_version = schema_version_md.schema_version
|
|
@@ -211,6 +215,7 @@ class TableVersion:
|
|
|
211
215
|
view_md: Optional[schema.ViewMd] = None,
|
|
212
216
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
213
217
|
session = Env.get().session
|
|
218
|
+
user = Env.get().user
|
|
214
219
|
|
|
215
220
|
# assign ids
|
|
216
221
|
cols_by_name: dict[str, Column] = {}
|
|
@@ -229,7 +234,8 @@ class TableVersion:
|
|
|
229
234
|
table_md = schema.TableMd(
|
|
230
235
|
tbl_id=str(tbl_id),
|
|
231
236
|
name=name,
|
|
232
|
-
user=
|
|
237
|
+
user=user,
|
|
238
|
+
is_replica=False,
|
|
233
239
|
current_version=0,
|
|
234
240
|
current_schema_version=0,
|
|
235
241
|
next_col_id=len(cols),
|
|
@@ -308,24 +314,16 @@ class TableVersion:
|
|
|
308
314
|
session.add(schema_version_record)
|
|
309
315
|
return tbl_record.id, tbl_version
|
|
310
316
|
|
|
311
|
-
@classmethod
|
|
312
|
-
def delete_md(cls, tbl_id: UUID) -> None:
|
|
313
|
-
conn = Env.get().conn
|
|
314
|
-
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
315
|
-
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
316
|
-
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
317
|
-
|
|
318
317
|
def drop(self) -> None:
|
|
318
|
+
from .catalog import Catalog
|
|
319
|
+
|
|
320
|
+
cat = Catalog.get()
|
|
319
321
|
# delete this table and all associated data
|
|
320
322
|
MediaStore.delete(self.id)
|
|
321
323
|
FileCache.get().clear(tbl_id=self.id)
|
|
322
|
-
|
|
324
|
+
cat.delete_tbl_md(self.id)
|
|
323
325
|
self.store_tbl.drop()
|
|
324
|
-
|
|
325
326
|
# de-register table version from catalog
|
|
326
|
-
from .catalog import Catalog
|
|
327
|
-
|
|
328
|
-
cat = Catalog.get()
|
|
329
327
|
cat.remove_tbl_version(self)
|
|
330
328
|
|
|
331
329
|
def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
@@ -341,8 +339,11 @@ class TableVersion:
|
|
|
341
339
|
self.cols = []
|
|
342
340
|
self.cols_by_name = {}
|
|
343
341
|
self.cols_by_id = {}
|
|
344
|
-
|
|
345
|
-
|
|
342
|
+
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
343
|
+
# point backward.
|
|
344
|
+
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
345
|
+
for col_md in sorted_column_md:
|
|
346
|
+
schema_col_md = schema_version_md.columns.get(col_md.id)
|
|
346
347
|
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
347
348
|
media_val = (
|
|
348
349
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
@@ -376,7 +377,7 @@ class TableVersion:
|
|
|
376
377
|
|
|
377
378
|
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
378
379
|
# this guarantees that references always point backwards
|
|
379
|
-
if col_md.value_expr is not None:
|
|
380
|
+
if not self.is_snapshot and col_md.value_expr is not None:
|
|
380
381
|
self._record_refd_columns(col)
|
|
381
382
|
|
|
382
383
|
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
@@ -385,10 +386,8 @@ class TableVersion:
|
|
|
385
386
|
import pixeltable.index as index_module
|
|
386
387
|
|
|
387
388
|
for md in tbl_md.index_md.values():
|
|
388
|
-
if (
|
|
389
|
-
md.
|
|
390
|
-
or md.schema_version_drop is not None
|
|
391
|
-
and md.schema_version_drop <= self.schema_version
|
|
389
|
+
if md.schema_version_add > self.schema_version or (
|
|
390
|
+
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
392
391
|
):
|
|
393
392
|
# index not visible in this schema version
|
|
394
393
|
continue
|
|
@@ -434,29 +433,15 @@ class TableVersion:
|
|
|
434
433
|
specified preceding schema version
|
|
435
434
|
"""
|
|
436
435
|
assert update_tbl_version or preceding_schema_version is None
|
|
436
|
+
from pixeltable.catalog import Catalog
|
|
437
437
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
.where(schema.Table.id == self.id)
|
|
438
|
+
tbl_md = self._create_tbl_md()
|
|
439
|
+
version_md = self._create_version_md(timestamp) if update_tbl_version else None
|
|
440
|
+
schema_version_md = (
|
|
441
|
+
self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
|
|
443
442
|
)
|
|
444
443
|
|
|
445
|
-
|
|
446
|
-
version_md = self._create_version_md(timestamp)
|
|
447
|
-
conn.execute(
|
|
448
|
-
sql.insert(schema.TableVersion.__table__).values(
|
|
449
|
-
tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)
|
|
450
|
-
)
|
|
451
|
-
)
|
|
452
|
-
|
|
453
|
-
if preceding_schema_version is not None:
|
|
454
|
-
schema_version_md = self._create_schema_version_md(preceding_schema_version)
|
|
455
|
-
conn.execute(
|
|
456
|
-
sql.insert(schema.TableSchemaVersion.__table__).values(
|
|
457
|
-
tbl_id=self.id, schema_version=self.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
458
|
-
)
|
|
459
|
-
)
|
|
444
|
+
Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
|
|
460
445
|
|
|
461
446
|
def ensure_md_loaded(self) -> None:
|
|
462
447
|
"""Ensure that table metadata is loaded."""
|
|
@@ -477,33 +462,36 @@ class TableVersion:
|
|
|
477
462
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
478
463
|
return status
|
|
479
464
|
|
|
480
|
-
def
|
|
481
|
-
"""Add a B-tree index on this column if it has a compatible type"""
|
|
465
|
+
def _is_btree_indexable(self, col: Column) -> bool:
|
|
482
466
|
if not col.stored:
|
|
483
467
|
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
484
|
-
return
|
|
468
|
+
return False
|
|
485
469
|
# Skip index for stored media columns produced by an iterator
|
|
486
470
|
if col.col_type.is_media_type() and self.is_iterator_column(col):
|
|
487
|
-
return
|
|
471
|
+
return False
|
|
488
472
|
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
489
473
|
# wrong type for a B-tree
|
|
490
|
-
return
|
|
491
|
-
if col.col_type.is_bool_type():
|
|
474
|
+
return False
|
|
475
|
+
if col.col_type.is_bool_type(): # noqa : SIM103 Supress `Return the negated condition directly` check
|
|
492
476
|
# B-trees on bools aren't useful
|
|
477
|
+
return False
|
|
478
|
+
return True
|
|
479
|
+
|
|
480
|
+
def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
|
|
481
|
+
"""Add a B-tree index on this column if it has a compatible type"""
|
|
482
|
+
if not self._is_btree_indexable(col):
|
|
493
483
|
return None
|
|
494
484
|
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
|
|
495
485
|
return status
|
|
496
486
|
|
|
497
|
-
def
|
|
487
|
+
def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
|
|
488
|
+
"""Create value and undo columns for the given index.
|
|
489
|
+
Args:
|
|
490
|
+
idx: index for which columns will be created.
|
|
491
|
+
Returns:
|
|
492
|
+
A tuple containing the value column and the undo column.
|
|
493
|
+
"""
|
|
498
494
|
assert not self.is_snapshot
|
|
499
|
-
idx_id = self.next_idx_id
|
|
500
|
-
self.next_idx_id += 1
|
|
501
|
-
if idx_name is None:
|
|
502
|
-
idx_name = f'idx{idx_id}'
|
|
503
|
-
else:
|
|
504
|
-
assert is_valid_identifier(idx_name)
|
|
505
|
-
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
506
|
-
|
|
507
495
|
# add the index value and undo columns (which need to be nullable)
|
|
508
496
|
val_col = Column(
|
|
509
497
|
col_id=self.next_col_id,
|
|
@@ -532,7 +520,19 @@ class TableVersion:
|
|
|
532
520
|
undo_col.tbl = self.create_handle()
|
|
533
521
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
534
522
|
self.next_col_id += 1
|
|
523
|
+
return val_col, undo_col
|
|
535
524
|
|
|
525
|
+
def _create_index(
|
|
526
|
+
self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
|
|
527
|
+
) -> None:
|
|
528
|
+
"""Create the given index along with index md"""
|
|
529
|
+
idx_id = self.next_idx_id
|
|
530
|
+
self.next_idx_id += 1
|
|
531
|
+
if idx_name is None:
|
|
532
|
+
idx_name = f'idx{idx_id}'
|
|
533
|
+
else:
|
|
534
|
+
assert is_valid_identifier(idx_name)
|
|
535
|
+
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
536
536
|
# create and register the index metadata
|
|
537
537
|
idx_cls = type(idx)
|
|
538
538
|
idx_md = schema.IndexMd(
|
|
@@ -550,14 +550,27 @@ class TableVersion:
|
|
|
550
550
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
551
551
|
self.idx_md[idx_id] = idx_md
|
|
552
552
|
self.idxs_by_name[idx_name] = idx_info
|
|
553
|
+
try:
|
|
554
|
+
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
555
|
+
finally:
|
|
556
|
+
|
|
557
|
+
def cleanup_index() -> None:
|
|
558
|
+
"""Delete the newly added in-memory index structure"""
|
|
559
|
+
del self.idxs_by_name[idx_name]
|
|
560
|
+
del self.idx_md[idx_id]
|
|
561
|
+
self.next_idx_id = idx_id
|
|
553
562
|
|
|
563
|
+
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
564
|
+
run_cleanup_on_exception(cleanup_index)
|
|
565
|
+
|
|
566
|
+
def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
567
|
+
val_col, undo_vol = self._create_index_columns(idx)
|
|
554
568
|
# add the columns and update the metadata
|
|
555
569
|
# TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
|
|
556
570
|
# with the database operations
|
|
557
|
-
status = self._add_columns([val_col,
|
|
571
|
+
status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
|
|
558
572
|
# now create the index structure
|
|
559
|
-
|
|
560
|
-
|
|
573
|
+
self._create_index(col, val_col, undo_vol, idx_name, idx)
|
|
561
574
|
return status
|
|
562
575
|
|
|
563
576
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -598,9 +611,21 @@ class TableVersion:
|
|
|
598
611
|
self.version += 1
|
|
599
612
|
preceding_schema_version = self.schema_version
|
|
600
613
|
self.schema_version = self.version
|
|
601
|
-
|
|
614
|
+
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
615
|
+
all_cols: list[Column] = []
|
|
602
616
|
for col in cols:
|
|
603
|
-
|
|
617
|
+
all_cols.append(col)
|
|
618
|
+
if self._is_btree_indexable(col):
|
|
619
|
+
idx = index.BtreeIndex(col)
|
|
620
|
+
val_col, undo_col = self._create_index_columns(idx)
|
|
621
|
+
index_cols[col] = (idx, val_col, undo_col)
|
|
622
|
+
all_cols.append(val_col)
|
|
623
|
+
all_cols.append(undo_col)
|
|
624
|
+
# Add all columns
|
|
625
|
+
status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
|
|
626
|
+
# Create indices and their mds
|
|
627
|
+
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
628
|
+
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
604
629
|
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
605
630
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
606
631
|
|
|
@@ -616,18 +641,18 @@ class TableVersion:
|
|
|
616
641
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
617
642
|
) -> UpdateStatus:
|
|
618
643
|
"""Add and populate columns within the current transaction"""
|
|
619
|
-
|
|
644
|
+
cols_to_add = list(cols)
|
|
620
645
|
row_count = self.store_tbl.count()
|
|
621
|
-
for col in
|
|
622
|
-
if not col.col_type.nullable and not col.is_computed:
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
)
|
|
646
|
+
for col in cols_to_add:
|
|
647
|
+
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
648
|
+
raise excs.Error(
|
|
649
|
+
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
650
|
+
)
|
|
627
651
|
|
|
628
652
|
num_excs = 0
|
|
629
653
|
cols_with_excs: list[Column] = []
|
|
630
|
-
for col in
|
|
654
|
+
for col in cols_to_add:
|
|
655
|
+
excs_per_col = 0
|
|
631
656
|
col.schema_version_add = self.schema_version
|
|
632
657
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
633
658
|
# because it might be referenced by the next column's value_expr
|
|
@@ -650,29 +675,32 @@ class TableVersion:
|
|
|
650
675
|
|
|
651
676
|
plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
|
|
652
677
|
plan.ctx.num_rows = row_count
|
|
653
|
-
|
|
654
678
|
try:
|
|
655
679
|
plan.open()
|
|
656
680
|
try:
|
|
657
|
-
|
|
681
|
+
excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
|
|
658
682
|
except sql.exc.DBAPIError as exc:
|
|
659
683
|
# Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
|
|
660
684
|
raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
|
|
661
|
-
if
|
|
685
|
+
if excs_per_col > 0:
|
|
662
686
|
cols_with_excs.append(col)
|
|
663
|
-
|
|
664
|
-
self.cols.pop()
|
|
665
|
-
for col in cols:
|
|
666
|
-
# remove columns that we already added
|
|
667
|
-
if col.id not in self.cols_by_id:
|
|
668
|
-
continue
|
|
669
|
-
if col.name is not None:
|
|
670
|
-
del self.cols_by_name[col.name]
|
|
671
|
-
del self.cols_by_id[col.id]
|
|
672
|
-
# we need to re-initialize the sqlalchemy schema
|
|
673
|
-
self.store_tbl.create_sa_tbl()
|
|
674
|
-
raise exc
|
|
687
|
+
num_excs += excs_per_col
|
|
675
688
|
finally:
|
|
689
|
+
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
690
|
+
def cleanup_on_error() -> None:
|
|
691
|
+
"""Delete columns that are added as part of current add_columns operation and re-initialize
|
|
692
|
+
the sqlalchemy schema"""
|
|
693
|
+
self.cols = [col for col in self.cols if col not in cols_to_add]
|
|
694
|
+
for col in cols_to_add:
|
|
695
|
+
# remove columns that we already added
|
|
696
|
+
if col.id in self.cols_by_id:
|
|
697
|
+
del self.cols_by_id[col.id]
|
|
698
|
+
if col.name is not None and col.name in self.cols_by_name:
|
|
699
|
+
del self.cols_by_name[col.name]
|
|
700
|
+
self.store_tbl.create_sa_tbl()
|
|
701
|
+
|
|
702
|
+
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
703
|
+
run_cleanup_on_exception(cleanup_on_error)
|
|
676
704
|
plan.close()
|
|
677
705
|
|
|
678
706
|
if print_stats:
|
|
@@ -756,19 +784,20 @@ class TableVersion:
|
|
|
756
784
|
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
757
785
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
758
786
|
|
|
759
|
-
def set_comment(self, new_comment: Optional[str]):
|
|
787
|
+
def set_comment(self, new_comment: Optional[str]) -> None:
|
|
760
788
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
761
789
|
self.comment = new_comment
|
|
762
790
|
self._create_schema_version()
|
|
763
791
|
|
|
764
|
-
def set_num_retained_versions(self, new_num_retained_versions: int):
|
|
792
|
+
def set_num_retained_versions(self, new_num_retained_versions: int) -> None:
|
|
765
793
|
_logger.info(
|
|
766
|
-
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions}
|
|
794
|
+
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} '
|
|
795
|
+
f'(was {self.num_retained_versions})'
|
|
767
796
|
)
|
|
768
797
|
self.num_retained_versions = new_num_retained_versions
|
|
769
798
|
self._create_schema_version()
|
|
770
799
|
|
|
771
|
-
def _create_schema_version(self):
|
|
800
|
+
def _create_schema_version(self) -> None:
|
|
772
801
|
# we're creating a new schema version
|
|
773
802
|
self.version += 1
|
|
774
803
|
preceding_schema_version = self.schema_version
|
|
@@ -858,7 +887,7 @@ class TableVersion:
|
|
|
858
887
|
|
|
859
888
|
from pixeltable.plan import Planner
|
|
860
889
|
|
|
861
|
-
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
890
|
+
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
862
891
|
if where is not None:
|
|
863
892
|
if not isinstance(where, exprs.Expr):
|
|
864
893
|
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
@@ -897,7 +926,6 @@ class TableVersion:
|
|
|
897
926
|
"""
|
|
898
927
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
899
928
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
900
|
-
cols_with_excs: set[str] = set()
|
|
901
929
|
|
|
902
930
|
from pixeltable.plan import Planner
|
|
903
931
|
|
|
@@ -919,7 +947,7 @@ class TableVersion:
|
|
|
919
947
|
return result
|
|
920
948
|
|
|
921
949
|
def _validate_update_spec(
|
|
922
|
-
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
|
|
950
|
+
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool, allow_media: bool
|
|
923
951
|
) -> dict[Column, exprs.Expr]:
|
|
924
952
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
925
953
|
for col_name, val in value_spec.items():
|
|
@@ -939,27 +967,31 @@ class TableVersion:
|
|
|
939
967
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
940
968
|
if col.is_pk and not allow_pk:
|
|
941
969
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
970
|
+
if col.col_type.is_media_type() and not allow_media:
|
|
971
|
+
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
942
972
|
|
|
943
973
|
# make sure that the value is compatible with the column type
|
|
944
974
|
value_expr: exprs.Expr
|
|
945
975
|
try:
|
|
946
976
|
# check if this is a literal
|
|
947
977
|
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
948
|
-
except (TypeError, jsonschema.exceptions.ValidationError):
|
|
978
|
+
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
949
979
|
if not allow_exprs:
|
|
950
980
|
raise excs.Error(
|
|
951
981
|
f'Column {col_name}: value {val!r} is not a valid literal for this column '
|
|
952
982
|
f'(expected {col.col_type})'
|
|
953
|
-
)
|
|
983
|
+
) from exc
|
|
954
984
|
# it's not a literal, let's try to create an expr from it
|
|
955
985
|
value_expr = exprs.Expr.from_object(val)
|
|
956
986
|
if value_expr is None:
|
|
957
|
-
raise excs.Error(
|
|
987
|
+
raise excs.Error(
|
|
988
|
+
f'Column {col_name}: value {val!r} is not a recognized literal or expression'
|
|
989
|
+
) from exc
|
|
958
990
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
959
991
|
raise excs.Error(
|
|
960
992
|
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
961
993
|
f'{col_name} ({col.col_type})'
|
|
962
|
-
)
|
|
994
|
+
) from exc
|
|
963
995
|
update_targets[col] = value_expr
|
|
964
996
|
|
|
965
997
|
return update_targets
|
|
@@ -988,7 +1020,7 @@ class TableVersion:
|
|
|
988
1020
|
self._update_md(timestamp)
|
|
989
1021
|
|
|
990
1022
|
if cascade:
|
|
991
|
-
base_versions = [None if plan is None else self.version
|
|
1023
|
+
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
992
1024
|
# propagate to views
|
|
993
1025
|
for view in self.mutable_views:
|
|
994
1026
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
|
|
@@ -1048,11 +1080,9 @@ class TableVersion:
|
|
|
1048
1080
|
# we're creating a new version
|
|
1049
1081
|
self.version += 1
|
|
1050
1082
|
self._update_md(timestamp)
|
|
1051
|
-
else:
|
|
1052
|
-
pass
|
|
1053
1083
|
for view in self.mutable_views:
|
|
1054
1084
|
num_rows += view.get().propagate_delete(
|
|
1055
|
-
where=None, base_versions=[self.version
|
|
1085
|
+
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1056
1086
|
)
|
|
1057
1087
|
return num_rows
|
|
1058
1088
|
|
|
@@ -1232,9 +1262,7 @@ class TableVersion:
|
|
|
1232
1262
|
|
|
1233
1263
|
def is_system_column(self, col: Column) -> bool:
|
|
1234
1264
|
"""Return True if column was created by Pixeltable"""
|
|
1235
|
-
|
|
1236
|
-
return True
|
|
1237
|
-
return False
|
|
1265
|
+
return col.name == _POS_COLUMN_NAME and self.is_component_view
|
|
1238
1266
|
|
|
1239
1267
|
def user_columns(self) -> list[Column]:
|
|
1240
1268
|
"""Return all non-system columns"""
|
|
@@ -1262,7 +1290,7 @@ class TableVersion:
|
|
|
1262
1290
|
|
|
1263
1291
|
def _record_refd_columns(self, col: Column) -> None:
|
|
1264
1292
|
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1265
|
-
|
|
1293
|
+
from pixeltable import exprs
|
|
1266
1294
|
|
|
1267
1295
|
if col.value_expr_dict is not None:
|
|
1268
1296
|
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
@@ -1296,6 +1324,7 @@ class TableVersion:
|
|
|
1296
1324
|
column_md: dict[int, schema.ColumnMd] = {}
|
|
1297
1325
|
for col in cols:
|
|
1298
1326
|
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1327
|
+
assert col.is_pk is not None
|
|
1299
1328
|
column_md[col.id] = schema.ColumnMd(
|
|
1300
1329
|
id=col.id,
|
|
1301
1330
|
col_type=col.col_type.as_dict(),
|
|
@@ -1317,7 +1346,8 @@ class TableVersion:
|
|
|
1317
1346
|
return schema.TableMd(
|
|
1318
1347
|
tbl_id=str(self.id),
|
|
1319
1348
|
name=self.name,
|
|
1320
|
-
user=
|
|
1349
|
+
user=self.user,
|
|
1350
|
+
is_replica=self.is_replica,
|
|
1321
1351
|
current_version=self.version,
|
|
1322
1352
|
current_schema_version=self.schema_version,
|
|
1323
1353
|
next_col_id=self.next_col_id,
|
|
@@ -1364,7 +1394,7 @@ class TableVersion:
|
|
|
1364
1394
|
|
|
1365
1395
|
@classmethod
|
|
1366
1396
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1367
|
-
|
|
1397
|
+
from pixeltable import catalog
|
|
1368
1398
|
|
|
1369
1399
|
id = UUID(d['id'])
|
|
1370
1400
|
effective_version = d['effective_version']
|
|
@@ -31,6 +31,9 @@ class TableVersionHandle:
|
|
|
31
31
|
return False
|
|
32
32
|
return self.id == other.id and self.effective_version == other.effective_version
|
|
33
33
|
|
|
34
|
+
def __hash__(self) -> int:
|
|
35
|
+
return hash((self.id, self.effective_version))
|
|
36
|
+
|
|
34
37
|
@classmethod
|
|
35
38
|
def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
|
|
36
39
|
return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
|
|
@@ -82,7 +82,7 @@ class TableVersionPath:
|
|
|
82
82
|
"""Return all tbl versions"""
|
|
83
83
|
if self.base is None:
|
|
84
84
|
return [self.tbl_version]
|
|
85
|
-
return [self.tbl_version
|
|
85
|
+
return [self.tbl_version, *self.base.get_tbl_versions()]
|
|
86
86
|
|
|
87
87
|
def get_bases(self) -> list[TableVersionHandle]:
|
|
88
88
|
"""Return all tbl versions"""
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
@@ -20,7 +20,7 @@ from .table_version_handle import TableVersionHandle
|
|
|
20
20
|
from .table_version_path import TableVersionPath
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
|
-
|
|
23
|
+
from pixeltable.globals import TableDataSource
|
|
24
24
|
|
|
25
25
|
_logger = logging.getLogger('pixeltable')
|
|
26
26
|
|
|
@@ -65,7 +65,7 @@ class View(Table):
|
|
|
65
65
|
base: TableVersionPath,
|
|
66
66
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
67
67
|
additional_columns: dict[str, Any],
|
|
68
|
-
predicate: Optional['
|
|
68
|
+
predicate: Optional['exprs.Expr'],
|
|
69
69
|
is_snapshot: bool,
|
|
70
70
|
num_retained_versions: int,
|
|
71
71
|
comment: str,
|
|
@@ -98,7 +98,8 @@ class View(Table):
|
|
|
98
98
|
# make sure that the value can be computed in the context of the base
|
|
99
99
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
100
100
|
raise excs.Error(
|
|
101
|
-
f'Column {col.name}: value expression cannot be computed in the context of the
|
|
101
|
+
f'Column {col.name}: value expression cannot be computed in the context of the '
|
|
102
|
+
f'base {base.tbl_name()}'
|
|
102
103
|
)
|
|
103
104
|
|
|
104
105
|
if iterator_cls is not None:
|
|
@@ -111,8 +112,8 @@ class View(Table):
|
|
|
111
112
|
bound_args: dict[str, Any]
|
|
112
113
|
try:
|
|
113
114
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
114
|
-
except TypeError as
|
|
115
|
-
raise excs.Error(f'Invalid iterator arguments: {
|
|
115
|
+
except TypeError as exc:
|
|
116
|
+
raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
|
|
116
117
|
# we ignore 'self'
|
|
117
118
|
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
118
119
|
del bound_args[first_param_name]
|
|
@@ -203,8 +204,8 @@ class View(Table):
|
|
|
203
204
|
|
|
204
205
|
from pixeltable.plan import Planner
|
|
205
206
|
|
|
206
|
-
plan,
|
|
207
|
-
num_rows, num_excs,
|
|
207
|
+
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
208
|
+
num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
208
209
|
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
209
210
|
|
|
210
211
|
session.commit()
|
|
@@ -241,7 +242,7 @@ class View(Table):
|
|
|
241
242
|
# there is not TableVersion to drop
|
|
242
243
|
self._check_is_dropped()
|
|
243
244
|
self.is_dropped = True
|
|
244
|
-
|
|
245
|
+
catalog.Catalog.get().delete_tbl_md(self._id)
|
|
245
246
|
else:
|
|
246
247
|
super()._drop()
|
|
247
248
|
|
|
@@ -251,11 +252,6 @@ class View(Table):
|
|
|
251
252
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
252
253
|
return md
|
|
253
254
|
|
|
254
|
-
if TYPE_CHECKING:
|
|
255
|
-
import datasets # type: ignore[import-untyped]
|
|
256
|
-
|
|
257
|
-
from pixeltable.globals import RowData, TableDataSource
|
|
258
|
-
|
|
259
255
|
def insert(
|
|
260
256
|
self,
|
|
261
257
|
source: Optional[TableDataSource] = None,
|
pixeltable/dataframe.py
CHANGED
|
@@ -88,12 +88,12 @@ class DataFrameResultSet:
|
|
|
88
88
|
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
89
89
|
return (self._row_to_dict(i) for i in range(len(self)))
|
|
90
90
|
|
|
91
|
-
def __eq__(self, other):
|
|
91
|
+
def __eq__(self, other: object) -> bool:
|
|
92
92
|
if not isinstance(other, DataFrameResultSet):
|
|
93
93
|
return False
|
|
94
94
|
return self.to_pandas().equals(other.to_pandas())
|
|
95
95
|
|
|
96
|
-
def __hash__(self):
|
|
96
|
+
def __hash__(self) -> int:
|
|
97
97
|
return hash(self.to_pandas())
|
|
98
98
|
|
|
99
99
|
|
|
@@ -571,7 +571,7 @@ class DataFrame:
|
|
|
571
571
|
expr = exprs.Expr.from_object(raw_expr)
|
|
572
572
|
if expr is None:
|
|
573
573
|
raise excs.Error(f'Invalid expression: {raw_expr}')
|
|
574
|
-
if expr.col_type.is_invalid_type():
|
|
574
|
+
if expr.col_type.is_invalid_type() and not (isinstance(expr, exprs.Literal) and expr.val is None):
|
|
575
575
|
raise excs.Error(f'Invalid type: {raw_expr}')
|
|
576
576
|
if not expr.is_bound_by(self._from_clause.tbls):
|
|
577
577
|
raise excs.Error(
|
|
@@ -624,6 +624,8 @@ class DataFrame:
|
|
|
624
624
|
|
|
625
625
|
>>> df = person.where(t.age > 30)
|
|
626
626
|
"""
|
|
627
|
+
if self.where_clause is not None:
|
|
628
|
+
raise excs.Error('Where clause already specified')
|
|
627
629
|
if not isinstance(pred, exprs.Expr):
|
|
628
630
|
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
629
631
|
if not pred.col_type.is_bool_type():
|