pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +11 -2
- pixeltable/catalog/catalog.py +407 -119
- pixeltable/catalog/column.py +38 -26
- pixeltable/catalog/globals.py +130 -15
- pixeltable/catalog/insertable_table.py +10 -9
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +245 -119
- pixeltable/catalog/table_version.py +142 -116
- pixeltable/catalog/table_version_handle.py +30 -2
- pixeltable/catalog/table_version_path.py +28 -4
- pixeltable/catalog/view.py +14 -20
- pixeltable/config.py +4 -0
- pixeltable/dataframe.py +10 -9
- pixeltable/env.py +5 -11
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/sql_node.py +47 -30
- pixeltable/exprs/column_property_ref.py +2 -10
- pixeltable/exprs/column_ref.py +24 -21
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/expr.py +4 -4
- pixeltable/exprs/row_builder.py +44 -13
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +4 -2
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +8 -6
- pixeltable/functions/mistralai.py +2 -13
- pixeltable/functions/openai.py +1 -6
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/util.py +6 -1
- pixeltable/globals.py +0 -2
- pixeltable/io/external_store.py +81 -54
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +49 -45
- pixeltable/io/table_data_conduit.py +1 -1
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -0
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +59 -139
- pixeltable/share/packager.py +2 -2
- pixeltable/store.py +114 -103
- pixeltable/type_system.py +30 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
- pixeltable/utils/sample.py +0 -25
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,14 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
from ..func.globals import resolve_symbol
|
|
31
31
|
from .column import Column
|
|
32
|
-
from .globals import
|
|
32
|
+
from .globals import (
|
|
33
|
+
_POS_COLUMN_NAME,
|
|
34
|
+
_ROWID_COLUMN_NAME,
|
|
35
|
+
MediaValidation,
|
|
36
|
+
RowCountStats,
|
|
37
|
+
UpdateStatus,
|
|
38
|
+
is_valid_identifier,
|
|
39
|
+
)
|
|
33
40
|
|
|
34
41
|
if TYPE_CHECKING:
|
|
35
42
|
from pixeltable import exec, store
|
|
@@ -167,18 +174,6 @@ class TableVersion:
|
|
|
167
174
|
self.idxs_by_name = {}
|
|
168
175
|
self.external_stores = {}
|
|
169
176
|
|
|
170
|
-
def init(self) -> None:
|
|
171
|
-
"""
|
|
172
|
-
Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
|
|
173
|
-
in Catalog.
|
|
174
|
-
"""
|
|
175
|
-
from .catalog import Catalog
|
|
176
|
-
|
|
177
|
-
assert (self.id, self.effective_version) in Catalog.get()._tbl_versions
|
|
178
|
-
self._init_schema()
|
|
179
|
-
# init external stores; this needs to happen after the schema is created
|
|
180
|
-
self._init_external_stores()
|
|
181
|
-
|
|
182
177
|
def __hash__(self) -> int:
|
|
183
178
|
return hash(self.id)
|
|
184
179
|
|
|
@@ -195,6 +190,12 @@ class TableVersion:
|
|
|
195
190
|
else:
|
|
196
191
|
return f'{self.name}:{self.effective_version}'
|
|
197
192
|
|
|
193
|
+
@property
|
|
194
|
+
def handle(self) -> 'TableVersionHandle':
|
|
195
|
+
from .table_version_handle import TableVersionHandle
|
|
196
|
+
|
|
197
|
+
return TableVersionHandle(self.id, self.effective_version, self)
|
|
198
|
+
|
|
198
199
|
@classmethod
|
|
199
200
|
def create(
|
|
200
201
|
cls,
|
|
@@ -207,7 +208,6 @@ class TableVersion:
|
|
|
207
208
|
# base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
208
209
|
view_md: Optional[schema.ViewMd] = None,
|
|
209
210
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
210
|
-
session = Env.get().session
|
|
211
211
|
user = Env.get().user
|
|
212
212
|
|
|
213
213
|
# assign ids
|
|
@@ -224,8 +224,9 @@ class TableVersion:
|
|
|
224
224
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
225
225
|
column_md = cls._create_column_md(cols)
|
|
226
226
|
tbl_id = uuid.uuid4()
|
|
227
|
+
tbl_id_str = str(tbl_id)
|
|
227
228
|
table_md = schema.TableMd(
|
|
228
|
-
tbl_id=
|
|
229
|
+
tbl_id=tbl_id_str,
|
|
229
230
|
name=name,
|
|
230
231
|
user=user,
|
|
231
232
|
is_replica=False,
|
|
@@ -234,22 +235,17 @@ class TableVersion:
|
|
|
234
235
|
next_col_id=len(cols),
|
|
235
236
|
next_idx_id=0,
|
|
236
237
|
next_row_id=0,
|
|
238
|
+
view_sn=0,
|
|
237
239
|
column_md=column_md,
|
|
238
240
|
index_md={},
|
|
239
241
|
external_stores=[],
|
|
240
242
|
view_md=view_md,
|
|
241
243
|
additional_md={},
|
|
242
244
|
)
|
|
243
|
-
# create a schema.Table here, we need it to call our c'tor;
|
|
244
|
-
# don't add it to the session yet, we might add index metadata
|
|
245
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
246
245
|
|
|
247
246
|
# create schema.TableVersion
|
|
248
247
|
table_version_md = schema.TableVersionMd(
|
|
249
|
-
tbl_id=
|
|
250
|
-
)
|
|
251
|
-
tbl_version_record = schema.TableVersion(
|
|
252
|
-
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
248
|
+
tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
|
|
253
249
|
)
|
|
254
250
|
|
|
255
251
|
# create schema.TableSchemaVersion
|
|
@@ -263,7 +259,7 @@ class TableVersion:
|
|
|
263
259
|
schema_col_md[col.id] = md
|
|
264
260
|
|
|
265
261
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
266
|
-
tbl_id=
|
|
262
|
+
tbl_id=tbl_id_str,
|
|
267
263
|
schema_version=0,
|
|
268
264
|
preceding_schema_version=None,
|
|
269
265
|
columns=schema_col_md,
|
|
@@ -272,9 +268,8 @@ class TableVersion:
|
|
|
272
268
|
media_validation=media_validation.name.lower(),
|
|
273
269
|
additional_md={},
|
|
274
270
|
)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
)
|
|
271
|
+
|
|
272
|
+
cat = pxt.catalog.Catalog.get()
|
|
278
273
|
|
|
279
274
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
280
275
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
@@ -285,22 +280,23 @@ class TableVersion:
|
|
|
285
280
|
and view_md.sample_clause is None
|
|
286
281
|
and len(cols) == 0
|
|
287
282
|
):
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
283
|
+
cat.store_tbl_md(
|
|
284
|
+
tbl_id=tbl_id,
|
|
285
|
+
dir_id=dir_id,
|
|
286
|
+
tbl_md=table_md,
|
|
287
|
+
version_md=table_version_md,
|
|
288
|
+
schema_version_md=schema_version_md,
|
|
289
|
+
)
|
|
290
|
+
return tbl_id, None
|
|
292
291
|
|
|
293
292
|
# assert (base_path is not None) == (view_md is not None)
|
|
294
293
|
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
295
294
|
effective_version = 0 if is_snapshot else None
|
|
296
295
|
base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
297
296
|
base = base_path.tbl_version if base_path is not None else None
|
|
298
|
-
tbl_version = cls(
|
|
299
|
-
tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
|
|
300
|
-
)
|
|
297
|
+
tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
|
|
301
298
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
302
|
-
cat =
|
|
303
|
-
cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
|
|
299
|
+
cat._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
304
300
|
tbl_version.init()
|
|
305
301
|
tbl_version.store_tbl.create()
|
|
306
302
|
is_mutable = not is_snapshot and not table_md.is_replica
|
|
@@ -317,12 +313,14 @@ class TableVersion:
|
|
|
317
313
|
status = tbl_version._add_default_index(col)
|
|
318
314
|
assert status is None or status.num_excs == 0
|
|
319
315
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
316
|
+
cat.store_tbl_md(
|
|
317
|
+
tbl_id=tbl_id,
|
|
318
|
+
dir_id=dir_id,
|
|
319
|
+
tbl_md=tbl_version.tbl_md,
|
|
320
|
+
version_md=table_version_md,
|
|
321
|
+
schema_version_md=schema_version_md,
|
|
322
|
+
)
|
|
323
|
+
return tbl_id, tbl_version
|
|
326
324
|
|
|
327
325
|
@classmethod
|
|
328
326
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
@@ -342,24 +340,39 @@ class TableVersion:
|
|
|
342
340
|
return tbl_version
|
|
343
341
|
|
|
344
342
|
def drop(self) -> None:
|
|
345
|
-
from .catalog import Catalog
|
|
346
|
-
|
|
347
343
|
if self.is_view and self.is_mutable:
|
|
348
344
|
# update mutable_views
|
|
345
|
+
# TODO: invalidate base to force reload
|
|
349
346
|
from .table_version_handle import TableVersionHandle
|
|
350
347
|
|
|
351
348
|
assert self.base is not None
|
|
352
349
|
if self.base.get().is_mutable:
|
|
353
350
|
self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
354
351
|
|
|
355
|
-
cat = Catalog.get()
|
|
352
|
+
# cat = Catalog.get()
|
|
356
353
|
# delete this table and all associated data
|
|
357
354
|
MediaStore.delete(self.id)
|
|
358
355
|
FileCache.get().clear(tbl_id=self.id)
|
|
359
|
-
cat.delete_tbl_md(self.id)
|
|
356
|
+
# cat.delete_tbl_md(self.id)
|
|
360
357
|
self.store_tbl.drop()
|
|
361
358
|
# de-register table version from catalog
|
|
362
|
-
cat.remove_tbl_version(self)
|
|
359
|
+
# cat.remove_tbl_version(self)
|
|
360
|
+
|
|
361
|
+
def init(self) -> None:
|
|
362
|
+
"""
|
|
363
|
+
Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
|
|
364
|
+
in Catalog.
|
|
365
|
+
"""
|
|
366
|
+
from .catalog import Catalog
|
|
367
|
+
|
|
368
|
+
cat = Catalog.get()
|
|
369
|
+
assert (self.id, self.effective_version) in cat._tbl_versions
|
|
370
|
+
self._init_schema()
|
|
371
|
+
if not self.is_snapshot:
|
|
372
|
+
cat.record_column_dependencies(self)
|
|
373
|
+
|
|
374
|
+
# init external stores; this needs to happen after the schema is created
|
|
375
|
+
self._init_external_stores()
|
|
363
376
|
|
|
364
377
|
def _init_schema(self) -> None:
|
|
365
378
|
# create columns first, so the indices can reference them
|
|
@@ -369,6 +382,10 @@ class TableVersion:
|
|
|
369
382
|
# create the sa schema only after creating the columns and indices
|
|
370
383
|
self._init_sa_schema()
|
|
371
384
|
|
|
385
|
+
# created value_exprs after everything else has been initialized
|
|
386
|
+
for col in self.cols_by_id.values():
|
|
387
|
+
col.init_value_expr()
|
|
388
|
+
|
|
372
389
|
def _init_cols(self) -> None:
|
|
373
390
|
"""Initialize self.cols with the columns visible in our effective version"""
|
|
374
391
|
self.cols = []
|
|
@@ -395,6 +412,7 @@ class TableVersion:
|
|
|
395
412
|
schema_version_add=col_md.schema_version_add,
|
|
396
413
|
schema_version_drop=col_md.schema_version_drop,
|
|
397
414
|
value_expr_dict=col_md.value_expr,
|
|
415
|
+
tbl=self,
|
|
398
416
|
)
|
|
399
417
|
col.tbl = self
|
|
400
418
|
self.cols.append(col)
|
|
@@ -410,10 +428,10 @@ class TableVersion:
|
|
|
410
428
|
self.cols_by_name[col.name] = col
|
|
411
429
|
self.cols_by_id[col.id] = col
|
|
412
430
|
|
|
413
|
-
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
414
|
-
# this guarantees that references always point backwards
|
|
415
|
-
if not self.is_snapshot and col_md.value_expr is not None:
|
|
416
|
-
|
|
431
|
+
# # make sure to traverse columns ordered by position = order in which cols were created;
|
|
432
|
+
# # this guarantees that references always point backwards
|
|
433
|
+
# if not self.is_snapshot and col_md.value_expr is not None:
|
|
434
|
+
# self._record_refd_columns(col)
|
|
417
435
|
|
|
418
436
|
def _init_idxs(self) -> None:
|
|
419
437
|
# self.idx_md = tbl_md.index_md
|
|
@@ -479,14 +497,9 @@ class TableVersion:
|
|
|
479
497
|
)
|
|
480
498
|
|
|
481
499
|
Catalog.get().store_tbl_md(
|
|
482
|
-
self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
500
|
+
self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
483
501
|
)
|
|
484
502
|
|
|
485
|
-
def ensure_md_loaded(self) -> None:
|
|
486
|
-
"""Ensure that table metadata is loaded."""
|
|
487
|
-
for col in self.cols_by_id.values():
|
|
488
|
-
_ = col.value_expr
|
|
489
|
-
|
|
490
503
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
491
504
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
492
505
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
@@ -689,6 +702,7 @@ class TableVersion:
|
|
|
689
702
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
690
703
|
)
|
|
691
704
|
|
|
705
|
+
computed_values = 0
|
|
692
706
|
num_excs = 0
|
|
693
707
|
cols_with_excs: list[Column] = []
|
|
694
708
|
for col in cols_to_add:
|
|
@@ -700,9 +714,6 @@ class TableVersion:
|
|
|
700
714
|
if col.name is not None:
|
|
701
715
|
self.cols_by_name[col.name] = col
|
|
702
716
|
self.cols_by_id[col.id] = col
|
|
703
|
-
if col.value_expr is not None:
|
|
704
|
-
col.check_value_expr()
|
|
705
|
-
self._record_refd_columns(col)
|
|
706
717
|
|
|
707
718
|
# also add to stored md
|
|
708
719
|
self._tbl_md.column_md[col.id] = schema.ColumnMd(
|
|
@@ -730,18 +741,19 @@ class TableVersion:
|
|
|
730
741
|
# populate the column
|
|
731
742
|
from pixeltable.plan import Planner
|
|
732
743
|
|
|
733
|
-
plan
|
|
744
|
+
plan = Planner.create_add_column_plan(self.path, col)
|
|
734
745
|
plan.ctx.num_rows = row_count
|
|
735
746
|
try:
|
|
736
747
|
plan.open()
|
|
737
748
|
try:
|
|
738
|
-
excs_per_col = self.store_tbl.load_column(col, plan,
|
|
749
|
+
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
739
750
|
except sql.exc.DBAPIError as exc:
|
|
740
751
|
# Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
|
|
741
752
|
raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
|
|
742
753
|
if excs_per_col > 0:
|
|
743
754
|
cols_with_excs.append(col)
|
|
744
755
|
num_excs += excs_per_col
|
|
756
|
+
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
745
757
|
finally:
|
|
746
758
|
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
747
759
|
def cleanup_on_error() -> None:
|
|
@@ -760,14 +772,18 @@ class TableVersion:
|
|
|
760
772
|
run_cleanup_on_exception(cleanup_on_error)
|
|
761
773
|
plan.close()
|
|
762
774
|
|
|
775
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
776
|
+
|
|
763
777
|
if print_stats:
|
|
764
778
|
plan.ctx.profile.print(num_rows=row_count)
|
|
765
|
-
|
|
779
|
+
|
|
780
|
+
# TODO: what to do about system columns with exceptions?
|
|
781
|
+
row_counts = RowCountStats(
|
|
782
|
+
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
783
|
+
) # add_columns
|
|
766
784
|
return UpdateStatus(
|
|
767
|
-
num_rows=row_count,
|
|
768
|
-
num_computed_values=row_count,
|
|
769
|
-
num_excs=num_excs,
|
|
770
785
|
cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
786
|
+
row_count_stats=row_counts,
|
|
771
787
|
)
|
|
772
788
|
|
|
773
789
|
def drop_column(self, col: Column) -> None:
|
|
@@ -805,13 +821,6 @@ class TableVersion:
|
|
|
805
821
|
assert not self.is_snapshot
|
|
806
822
|
|
|
807
823
|
for col in cols:
|
|
808
|
-
if col.value_expr is not None:
|
|
809
|
-
# update Column.dependent_cols
|
|
810
|
-
for c in self.cols:
|
|
811
|
-
if c == col:
|
|
812
|
-
break
|
|
813
|
-
c.dependent_cols.discard(col)
|
|
814
|
-
|
|
815
824
|
col.schema_version_drop = self.schema_version
|
|
816
825
|
if col.name is not None:
|
|
817
826
|
assert col.name in self.cols_by_name
|
|
@@ -828,6 +837,7 @@ class TableVersion:
|
|
|
828
837
|
schema_col.pos = pos
|
|
829
838
|
|
|
830
839
|
self.store_tbl.create_sa_tbl()
|
|
840
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
831
841
|
|
|
832
842
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
833
843
|
"""Rename a column."""
|
|
@@ -913,14 +923,10 @@ class TableVersion:
|
|
|
913
923
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
914
924
|
# we're creating a new version
|
|
915
925
|
self.version += 1
|
|
916
|
-
result =
|
|
917
|
-
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
|
|
926
|
+
cols_with_excs, result = self.store_tbl.insert_rows(
|
|
918
927
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
919
928
|
)
|
|
920
|
-
result
|
|
921
|
-
result.num_excs = num_excs
|
|
922
|
-
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
923
|
-
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
929
|
+
result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
|
|
924
930
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
925
931
|
|
|
926
932
|
# update views
|
|
@@ -929,14 +935,10 @@ class TableVersion:
|
|
|
929
935
|
|
|
930
936
|
plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
|
|
931
937
|
status = view.get()._insert(plan, timestamp, print_stats=print_stats)
|
|
932
|
-
result
|
|
933
|
-
result.num_excs += status.num_excs
|
|
934
|
-
result.num_computed_values += status.num_computed_values
|
|
935
|
-
result.cols_with_excs += status.cols_with_excs
|
|
938
|
+
result += status.to_cascade()
|
|
936
939
|
|
|
937
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
938
940
|
if print_stats:
|
|
939
|
-
plan.ctx.profile.print(num_rows=num_rows)
|
|
941
|
+
plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
|
|
940
942
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
941
943
|
return result
|
|
942
944
|
|
|
@@ -976,7 +978,7 @@ class TableVersion:
|
|
|
976
978
|
cascade=cascade,
|
|
977
979
|
show_progress=True,
|
|
978
980
|
)
|
|
979
|
-
result
|
|
981
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
980
982
|
return result
|
|
981
983
|
|
|
982
984
|
def batch_update(
|
|
@@ -1003,7 +1005,7 @@ class TableVersion:
|
|
|
1003
1005
|
result = self.propagate_update(
|
|
1004
1006
|
plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
|
|
1005
1007
|
)
|
|
1006
|
-
result
|
|
1008
|
+
result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
|
|
1007
1009
|
|
|
1008
1010
|
unmatched_rows = row_update_node.unmatched_rows()
|
|
1009
1011
|
if len(unmatched_rows) > 0:
|
|
@@ -1011,7 +1013,7 @@ class TableVersion:
|
|
|
1011
1013
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
1012
1014
|
if insert_if_not_exists:
|
|
1013
1015
|
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
1014
|
-
result += insert_status
|
|
1016
|
+
result += insert_status.to_cascade()
|
|
1015
1017
|
return result
|
|
1016
1018
|
|
|
1017
1019
|
def _validate_update_spec(
|
|
@@ -1064,6 +1066,38 @@ class TableVersion:
|
|
|
1064
1066
|
|
|
1065
1067
|
return update_targets
|
|
1066
1068
|
|
|
1069
|
+
def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
|
|
1070
|
+
assert not self.is_snapshot
|
|
1071
|
+
assert all(name in self.cols_by_name for name in col_names)
|
|
1072
|
+
assert len(col_names) > 0
|
|
1073
|
+
assert len(col_names) == 1 or not errors_only
|
|
1074
|
+
|
|
1075
|
+
from pixeltable.plan import Planner
|
|
1076
|
+
|
|
1077
|
+
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1078
|
+
where_clause: Optional[exprs.Expr] = None
|
|
1079
|
+
if errors_only:
|
|
1080
|
+
where_clause = (
|
|
1081
|
+
exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
|
|
1082
|
+
!= None
|
|
1083
|
+
)
|
|
1084
|
+
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1085
|
+
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1086
|
+
)
|
|
1087
|
+
from pixeltable.exprs import SqlElementCache
|
|
1088
|
+
|
|
1089
|
+
result = self.propagate_update(
|
|
1090
|
+
plan,
|
|
1091
|
+
where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
|
|
1092
|
+
recomputed_cols,
|
|
1093
|
+
base_versions=[],
|
|
1094
|
+
timestamp=time.time(),
|
|
1095
|
+
cascade=cascade,
|
|
1096
|
+
show_progress=True,
|
|
1097
|
+
)
|
|
1098
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
1099
|
+
return result
|
|
1100
|
+
|
|
1067
1101
|
def propagate_update(
|
|
1068
1102
|
self,
|
|
1069
1103
|
plan: Optional[exec.ExecNode],
|
|
@@ -1074,18 +1108,20 @@ class TableVersion:
|
|
|
1074
1108
|
cascade: bool,
|
|
1075
1109
|
show_progress: bool = True,
|
|
1076
1110
|
) -> UpdateStatus:
|
|
1077
|
-
result = UpdateStatus()
|
|
1078
1111
|
if plan is not None:
|
|
1079
1112
|
# we're creating a new version
|
|
1080
1113
|
self.version += 1
|
|
1081
|
-
|
|
1082
|
-
|
|
1114
|
+
cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
|
|
1115
|
+
result = status.insert_to_update()
|
|
1116
|
+
result += UpdateStatus(
|
|
1117
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
1083
1118
|
)
|
|
1084
|
-
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
1085
1119
|
self.store_tbl.delete_rows(
|
|
1086
1120
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1087
1121
|
)
|
|
1088
1122
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1123
|
+
else:
|
|
1124
|
+
result = UpdateStatus()
|
|
1089
1125
|
|
|
1090
1126
|
if cascade:
|
|
1091
1127
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
@@ -1100,17 +1136,17 @@ class TableVersion:
|
|
|
1100
1136
|
status = view.get().propagate_update(
|
|
1101
1137
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
1102
1138
|
)
|
|
1103
|
-
result
|
|
1104
|
-
result.num_excs += status.num_excs
|
|
1105
|
-
result.cols_with_excs += status.cols_with_excs
|
|
1139
|
+
result += status.to_cascade()
|
|
1106
1140
|
|
|
1107
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
1108
1141
|
return result
|
|
1109
1142
|
|
|
1110
1143
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
1111
1144
|
"""Delete rows in this table.
|
|
1112
1145
|
Args:
|
|
1113
1146
|
where: a predicate to filter rows to delete.
|
|
1147
|
+
|
|
1148
|
+
Returns:
|
|
1149
|
+
UpdateStatus: an object containing the number of deleted rows and other statistics.
|
|
1114
1150
|
"""
|
|
1115
1151
|
assert self.is_insertable
|
|
1116
1152
|
from pixeltable.exprs import Expr
|
|
@@ -1126,14 +1162,12 @@ class TableVersion:
|
|
|
1126
1162
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1127
1163
|
sql_where_clause = analysis_info.sql_where_clause
|
|
1128
1164
|
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
status = UpdateStatus(num_rows=num_rows)
|
|
1165
|
+
status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
|
|
1132
1166
|
return status
|
|
1133
1167
|
|
|
1134
1168
|
def propagate_delete(
|
|
1135
1169
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1136
|
-
) ->
|
|
1170
|
+
) -> UpdateStatus:
|
|
1137
1171
|
"""Delete rows in this table and propagate to views.
|
|
1138
1172
|
Args:
|
|
1139
1173
|
where: a predicate to filter rows to delete.
|
|
@@ -1149,18 +1183,21 @@ class TableVersion:
|
|
|
1149
1183
|
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1150
1184
|
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1151
1185
|
# print(f'where_clause cols: {x}')
|
|
1152
|
-
|
|
1186
|
+
del_rows = self.store_tbl.delete_rows(
|
|
1153
1187
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1154
1188
|
)
|
|
1155
|
-
|
|
1189
|
+
row_counts = RowCountStats(del_rows=del_rows) # delete
|
|
1190
|
+
result = UpdateStatus(row_count_stats=row_counts)
|
|
1191
|
+
if del_rows > 0:
|
|
1156
1192
|
# we're creating a new version
|
|
1157
1193
|
self.version += 1
|
|
1158
1194
|
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1159
1195
|
for view in self.mutable_views:
|
|
1160
|
-
|
|
1196
|
+
status = view.get().propagate_delete(
|
|
1161
1197
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1162
1198
|
)
|
|
1163
|
-
|
|
1199
|
+
result += status.to_cascade()
|
|
1200
|
+
return result
|
|
1164
1201
|
|
|
1165
1202
|
def revert(self) -> None:
|
|
1166
1203
|
"""Reverts the table to the previous version."""
|
|
@@ -1458,18 +1495,6 @@ class TableVersion:
|
|
|
1458
1495
|
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
1459
1496
|
return names
|
|
1460
1497
|
|
|
1461
|
-
def _record_refd_columns(self, col: Column) -> None:
|
|
1462
|
-
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1463
|
-
from pixeltable import exprs
|
|
1464
|
-
|
|
1465
|
-
if col.value_expr_dict is not None:
|
|
1466
|
-
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
1467
|
-
refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
|
|
1468
|
-
else:
|
|
1469
|
-
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
|
|
1470
|
-
for refd_col in refd_cols:
|
|
1471
|
-
refd_col.dependent_cols.add(col)
|
|
1472
|
-
|
|
1473
1498
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1474
1499
|
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1475
1500
|
return result
|
|
@@ -1478,7 +1503,8 @@ class TableVersion:
|
|
|
1478
1503
|
"""
|
|
1479
1504
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1480
1505
|
"""
|
|
1481
|
-
|
|
1506
|
+
cat = pxt.catalog.Catalog.get()
|
|
1507
|
+
result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
|
|
1482
1508
|
if len(result) > 0:
|
|
1483
1509
|
result.update(self.get_dependent_columns(result))
|
|
1484
1510
|
return result
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from typing import TYPE_CHECKING, Optional
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
8
|
+
from pixeltable import exceptions as excs
|
|
9
|
+
|
|
7
10
|
from .table_version import TableVersion
|
|
8
11
|
|
|
9
12
|
if TYPE_CHECKING:
|
|
10
|
-
|
|
13
|
+
from pixeltable.catalog import Column
|
|
11
14
|
|
|
12
15
|
_logger = logging.getLogger('pixeltable')
|
|
13
16
|
|
|
@@ -34,6 +37,10 @@ class TableVersionHandle:
|
|
|
34
37
|
def __hash__(self) -> int:
|
|
35
38
|
return hash((self.id, self.effective_version))
|
|
36
39
|
|
|
40
|
+
@property
|
|
41
|
+
def is_snapshot(self) -> bool:
|
|
42
|
+
return self.effective_version is not None
|
|
43
|
+
|
|
37
44
|
@classmethod
|
|
38
45
|
def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
|
|
39
46
|
return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
|
|
@@ -53,7 +60,6 @@ class TableVersionHandle:
|
|
|
53
60
|
else:
|
|
54
61
|
self._tbl_version = Catalog.get().get_tbl_version(self.id, self.effective_version)
|
|
55
62
|
if self.effective_version is None:
|
|
56
|
-
# make sure we don't see a discarded instance of a live TableVersion
|
|
57
63
|
tvs = list(Catalog.get()._tbl_versions.values())
|
|
58
64
|
assert self._tbl_version in tvs
|
|
59
65
|
return self._tbl_version
|
|
@@ -64,3 +70,25 @@ class TableVersionHandle:
|
|
|
64
70
|
@classmethod
|
|
65
71
|
def from_dict(cls, d: dict) -> TableVersionHandle:
|
|
66
72
|
return cls(UUID(d['id']), d['effective_version'])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True)
|
|
76
|
+
class ColumnHandle:
|
|
77
|
+
tbl_version: TableVersionHandle
|
|
78
|
+
col_id: int
|
|
79
|
+
|
|
80
|
+
def get(self) -> 'Column':
|
|
81
|
+
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
|
+
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
|
+
raise excs.Error(
|
|
84
|
+
f'Column has been dropped (no record for column ID {self.col_id} in table '
|
|
85
|
+
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
|
+
)
|
|
87
|
+
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
88
|
+
|
|
89
|
+
def as_dict(self) -> dict:
|
|
90
|
+
return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, d: dict) -> ColumnHandle:
|
|
94
|
+
return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
|
|
@@ -8,6 +8,7 @@ from pixeltable.env import Env
|
|
|
8
8
|
from pixeltable.metadata import schema
|
|
9
9
|
|
|
10
10
|
from .column import Column
|
|
11
|
+
from .globals import MediaValidation
|
|
11
12
|
from .table_version import TableVersion
|
|
12
13
|
from .table_version_handle import TableVersionHandle
|
|
13
14
|
|
|
@@ -83,6 +84,7 @@ class TableVersionPath:
|
|
|
83
84
|
if self.base is not None:
|
|
84
85
|
self.base.clear_cached_md()
|
|
85
86
|
|
|
87
|
+
@property
|
|
86
88
|
def tbl_id(self) -> UUID:
|
|
87
89
|
"""Return the id of the table/view that this path represents"""
|
|
88
90
|
return self.tbl_version.id
|
|
@@ -92,6 +94,11 @@ class TableVersionPath:
|
|
|
92
94
|
self.refresh_cached_md()
|
|
93
95
|
return self._cached_tbl_version.version
|
|
94
96
|
|
|
97
|
+
def schema_version(self) -> int:
|
|
98
|
+
"""Return the version of the table/view that this path represents"""
|
|
99
|
+
self.refresh_cached_md()
|
|
100
|
+
return self._cached_tbl_version.schema_version
|
|
101
|
+
|
|
95
102
|
def tbl_name(self) -> str:
|
|
96
103
|
"""Return the name of the table/view that this path represents"""
|
|
97
104
|
self.refresh_cached_md()
|
|
@@ -103,10 +110,7 @@ class TableVersionPath:
|
|
|
103
110
|
|
|
104
111
|
def is_snapshot(self) -> bool:
|
|
105
112
|
"""Return True if this is a path of snapshot versions"""
|
|
106
|
-
self.
|
|
107
|
-
if not self._cached_tbl_version.is_snapshot:
|
|
108
|
-
return False
|
|
109
|
-
return self.base.is_snapshot() if self.base is not None else True
|
|
113
|
+
return self.tbl_version.is_snapshot
|
|
110
114
|
|
|
111
115
|
def is_view(self) -> bool:
|
|
112
116
|
self.refresh_cached_md()
|
|
@@ -116,10 +120,30 @@ class TableVersionPath:
|
|
|
116
120
|
self.refresh_cached_md()
|
|
117
121
|
return self._cached_tbl_version.is_component_view
|
|
118
122
|
|
|
123
|
+
def is_replica(self) -> bool:
|
|
124
|
+
self.refresh_cached_md()
|
|
125
|
+
return self._cached_tbl_version.is_replica
|
|
126
|
+
|
|
127
|
+
def is_mutable(self) -> bool:
|
|
128
|
+
self.refresh_cached_md()
|
|
129
|
+
return self._cached_tbl_version.is_mutable
|
|
130
|
+
|
|
119
131
|
def is_insertable(self) -> bool:
|
|
120
132
|
self.refresh_cached_md()
|
|
121
133
|
return self._cached_tbl_version.is_insertable
|
|
122
134
|
|
|
135
|
+
def comment(self) -> str:
|
|
136
|
+
self.refresh_cached_md()
|
|
137
|
+
return self._cached_tbl_version.comment
|
|
138
|
+
|
|
139
|
+
def num_retained_versions(self) -> int:
|
|
140
|
+
self.refresh_cached_md()
|
|
141
|
+
return self._cached_tbl_version.num_retained_versions
|
|
142
|
+
|
|
143
|
+
def media_validation(self) -> MediaValidation:
|
|
144
|
+
self.refresh_cached_md()
|
|
145
|
+
return self._cached_tbl_version.media_validation
|
|
146
|
+
|
|
123
147
|
def get_tbl_versions(self) -> list[TableVersionHandle]:
|
|
124
148
|
"""Return all tbl versions"""
|
|
125
149
|
if self.base is None:
|