pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -0
- pixeltable/catalog/catalog.py +125 -63
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +4 -0
- pixeltable/catalog/table_version.py +174 -117
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/table_version_path.py +0 -11
- pixeltable/catalog/view.py +6 -0
- pixeltable/config.py +7 -0
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +56 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +23 -18
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +110 -28
- pixeltable/globals.py +10 -4
- pixeltable/io/globals.py +18 -17
- pixeltable/io/parquet.py +1 -1
- pixeltable/io/table_data_conduit.py +47 -22
- pixeltable/iterators/document.py +61 -23
- pixeltable/iterators/video.py +126 -53
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/packager.py +155 -26
- pixeltable/store.py +2 -3
- pixeltable/type_system.py +5 -3
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/av.py +65 -0
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +16 -1
- pixeltable/utils/s3_store.py +44 -11
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
pixeltable/catalog/catalog.py
CHANGED
|
@@ -12,11 +12,13 @@ from uuid import UUID
|
|
|
12
12
|
|
|
13
13
|
import psycopg
|
|
14
14
|
import sqlalchemy as sql
|
|
15
|
+
import sqlalchemy.exc as sql_exc
|
|
15
16
|
|
|
16
17
|
from pixeltable import exceptions as excs
|
|
17
18
|
from pixeltable.env import Env
|
|
18
19
|
from pixeltable.iterators import ComponentIterator
|
|
19
20
|
from pixeltable.metadata import schema
|
|
21
|
+
from pixeltable.utils.exception_handler import run_cleanup
|
|
20
22
|
|
|
21
23
|
from .column import Column
|
|
22
24
|
from .dir import Dir
|
|
@@ -101,7 +103,7 @@ def retry_loop(
|
|
|
101
103
|
except PendingTableOpsError as e:
|
|
102
104
|
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
103
105
|
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
104
|
-
except (
|
|
106
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
105
107
|
# TODO: what other exceptions should we be looking for?
|
|
106
108
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
107
109
|
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
@@ -167,6 +169,7 @@ class Catalog:
|
|
|
167
169
|
_in_write_xact: bool # True if we're in a write transaction
|
|
168
170
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
169
171
|
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
172
|
+
_undo_actions: list[Callable[[], None]]
|
|
170
173
|
_in_retry_loop: bool
|
|
171
174
|
|
|
172
175
|
# cached column dependencies
|
|
@@ -199,6 +202,7 @@ class Catalog:
|
|
|
199
202
|
self._in_write_xact = False
|
|
200
203
|
self._x_locked_tbl_ids = set()
|
|
201
204
|
self._modified_tvs = set()
|
|
205
|
+
self._undo_actions = []
|
|
202
206
|
self._in_retry_loop = False
|
|
203
207
|
self._column_dependencies = {}
|
|
204
208
|
self._column_dependents = None
|
|
@@ -245,6 +249,11 @@ class Catalog:
|
|
|
245
249
|
for v in tbl_version.mutable_views:
|
|
246
250
|
assert v.effective_version is None, f'{v.id}:{v.effective_version}'
|
|
247
251
|
|
|
252
|
+
def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
|
|
253
|
+
"""Record that the given TableVersion instances were modified in the current transaction"""
|
|
254
|
+
assert Env.get().in_xact
|
|
255
|
+
self._modified_tvs.update(handle)
|
|
256
|
+
|
|
248
257
|
@contextmanager
|
|
249
258
|
def begin_xact(
|
|
250
259
|
self,
|
|
@@ -309,6 +318,7 @@ class Catalog:
|
|
|
309
318
|
self._column_dependents = None
|
|
310
319
|
has_exc = False
|
|
311
320
|
|
|
321
|
+
assert not self._undo_actions
|
|
312
322
|
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
313
323
|
if tbl is not None or tbl_id is not None:
|
|
314
324
|
try:
|
|
@@ -352,7 +362,7 @@ class Catalog:
|
|
|
352
362
|
# raise to abort the transaction
|
|
353
363
|
raise
|
|
354
364
|
|
|
355
|
-
except (
|
|
365
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
356
366
|
has_exc = True
|
|
357
367
|
if isinstance(
|
|
358
368
|
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
@@ -360,10 +370,12 @@ class Catalog:
|
|
|
360
370
|
num_retries += 1
|
|
361
371
|
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
362
372
|
time.sleep(random.uniform(0.1, 0.5))
|
|
373
|
+
assert not self._undo_actions # We should not have any undo actions at this point
|
|
363
374
|
continue
|
|
364
375
|
else:
|
|
365
376
|
raise
|
|
366
377
|
|
|
378
|
+
assert not self._undo_actions
|
|
367
379
|
yield conn
|
|
368
380
|
return
|
|
369
381
|
|
|
@@ -376,49 +388,19 @@ class Catalog:
|
|
|
376
388
|
# we got this exception after getting the initial table locks and therefore need to abort
|
|
377
389
|
raise
|
|
378
390
|
|
|
379
|
-
except (
|
|
380
|
-
has_exc = True
|
|
381
|
-
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
382
|
-
# records): we convert these into Errors, if asked to do so, and abort
|
|
383
|
-
# TODO: what other concurrency-related exceptions should we expect?
|
|
384
|
-
|
|
385
|
-
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
386
|
-
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
387
|
-
# the table got dropped in the middle of the table operation
|
|
388
|
-
tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
|
|
389
|
-
_logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
|
|
390
|
-
assert tbl is not None
|
|
391
|
-
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
392
|
-
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
393
|
-
# we still got a serialization error, despite getting x-locks at the beginning
|
|
394
|
-
msg: str
|
|
395
|
-
if tbl is not None:
|
|
396
|
-
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
|
|
397
|
-
elif tbl_id is not None:
|
|
398
|
-
msg = f'{tbl_id}'
|
|
399
|
-
else:
|
|
400
|
-
msg = ''
|
|
401
|
-
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
402
|
-
raise excs.Error(
|
|
403
|
-
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
404
|
-
'operation that was run on a different process.\n'
|
|
405
|
-
'Please re-run the operation.'
|
|
406
|
-
) from None
|
|
407
|
-
else:
|
|
408
|
-
raise
|
|
409
|
-
|
|
410
|
-
except KeyboardInterrupt:
|
|
391
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
411
392
|
has_exc = True
|
|
412
|
-
|
|
413
|
-
raise
|
|
393
|
+
self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
|
|
394
|
+
raise # re-raise the error if it didn't convert to a pxt.Error
|
|
414
395
|
|
|
415
|
-
except:
|
|
396
|
+
except (Exception, KeyboardInterrupt) as e:
|
|
416
397
|
has_exc = True
|
|
398
|
+
_logger.debug(f'Caught {e.__class__}')
|
|
417
399
|
raise
|
|
418
400
|
|
|
419
401
|
finally:
|
|
420
402
|
self._in_write_xact = False
|
|
421
|
-
self._x_locked_tbl_ids
|
|
403
|
+
self._x_locked_tbl_ids.clear()
|
|
422
404
|
self._column_dependents = None
|
|
423
405
|
|
|
424
406
|
# invalidate cached current TableVersion instances
|
|
@@ -428,14 +410,75 @@ class Catalog:
|
|
|
428
410
|
tv.is_validated = False
|
|
429
411
|
|
|
430
412
|
if has_exc:
|
|
431
|
-
#
|
|
413
|
+
# Execute undo actions in reverse order (LIFO)
|
|
414
|
+
for hook in reversed(self._undo_actions):
|
|
415
|
+
run_cleanup(hook, raise_error=False)
|
|
416
|
+
# purge all modified TableVersion instances; we can't guarantee they are still consistent with the
|
|
432
417
|
# stored metadata
|
|
433
418
|
for handle in self._modified_tvs:
|
|
434
419
|
self._clear_tv_cache(handle.id, handle.effective_version)
|
|
435
|
-
# Clear potentially corrupted cached metadata
|
|
420
|
+
# Clear potentially corrupted cached metadata
|
|
436
421
|
if tbl is not None:
|
|
437
422
|
tbl.clear_cached_md()
|
|
438
|
-
|
|
423
|
+
|
|
424
|
+
self._undo_actions.clear()
|
|
425
|
+
self._modified_tvs.clear()
|
|
426
|
+
|
|
427
|
+
def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
|
|
428
|
+
"""Registers a function to be called if the current transaction fails.
|
|
429
|
+
|
|
430
|
+
The function is called only if the current transaction fails due to an exception.
|
|
431
|
+
|
|
432
|
+
Rollback functions are called in reverse order of registration (LIFO).
|
|
433
|
+
|
|
434
|
+
The function should not raise exceptions; if it does, they are logged and ignored.
|
|
435
|
+
"""
|
|
436
|
+
assert Env.get().in_xact
|
|
437
|
+
self._undo_actions.append(func)
|
|
438
|
+
return func
|
|
439
|
+
|
|
440
|
+
def convert_sql_exc(
|
|
441
|
+
self,
|
|
442
|
+
e: sql_exc.StatementError,
|
|
443
|
+
tbl_id: UUID | None = None,
|
|
444
|
+
tbl: TableVersionHandle | None = None,
|
|
445
|
+
convert_db_excs: bool = True,
|
|
446
|
+
) -> None:
|
|
447
|
+
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
448
|
+
# records); we convert these into pxt.Error exceptions if appropriate
|
|
449
|
+
|
|
450
|
+
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
451
|
+
if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
|
|
452
|
+
# the table got dropped in the middle of the operation
|
|
453
|
+
tbl_name = tbl.get().name
|
|
454
|
+
_logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
|
|
455
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
456
|
+
elif (
|
|
457
|
+
isinstance(
|
|
458
|
+
e.orig,
|
|
459
|
+
(
|
|
460
|
+
psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
|
|
461
|
+
psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
|
|
462
|
+
psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
|
|
463
|
+
),
|
|
464
|
+
)
|
|
465
|
+
and convert_db_excs
|
|
466
|
+
):
|
|
467
|
+
msg: str
|
|
468
|
+
if tbl is not None:
|
|
469
|
+
msg = f'{tbl.get().name} ({tbl.id})'
|
|
470
|
+
elif tbl_id is not None:
|
|
471
|
+
msg = f'{tbl_id}'
|
|
472
|
+
else:
|
|
473
|
+
msg = ''
|
|
474
|
+
_logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
|
|
475
|
+
# Suppress the underlying SQL exception unless DEBUG is enabled
|
|
476
|
+
raise_from = e if _logger.isEnabledFor(logging.DEBUG) else None
|
|
477
|
+
raise excs.Error(
|
|
478
|
+
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
479
|
+
'operation that was run on a different process.\n'
|
|
480
|
+
'Please re-run the operation.'
|
|
481
|
+
) from raise_from
|
|
439
482
|
|
|
440
483
|
@property
|
|
441
484
|
def in_write_xact(self) -> bool:
|
|
@@ -601,7 +644,7 @@ class Catalog:
|
|
|
601
644
|
if op.op_sn == op.num_ops - 1:
|
|
602
645
|
conn.execute(reset_has_pending_stmt)
|
|
603
646
|
|
|
604
|
-
except (
|
|
647
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
605
648
|
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
606
649
|
# logic of begin_xact()?
|
|
607
650
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
@@ -928,11 +971,18 @@ class Catalog:
|
|
|
928
971
|
num_retained_versions: int,
|
|
929
972
|
comment: str,
|
|
930
973
|
media_validation: MediaValidation,
|
|
931
|
-
) -> Table:
|
|
974
|
+
) -> tuple[Table, bool]:
|
|
975
|
+
"""
|
|
976
|
+
Creates a new InsertableTable at the given path.
|
|
977
|
+
|
|
978
|
+
If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
|
|
979
|
+
|
|
980
|
+
Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
|
|
981
|
+
"""
|
|
932
982
|
existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
|
|
933
983
|
if existing is not None:
|
|
934
984
|
assert isinstance(existing, Table)
|
|
935
|
-
return existing
|
|
985
|
+
return existing, False
|
|
936
986
|
|
|
937
987
|
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
938
988
|
assert dir is not None
|
|
@@ -948,7 +998,7 @@ class Catalog:
|
|
|
948
998
|
media_validation=media_validation,
|
|
949
999
|
)
|
|
950
1000
|
self._tbls[tbl._id, None] = tbl
|
|
951
|
-
return tbl
|
|
1001
|
+
return tbl, True
|
|
952
1002
|
|
|
953
1003
|
def create_view(
|
|
954
1004
|
self,
|
|
@@ -1203,8 +1253,14 @@ class Catalog:
|
|
|
1203
1253
|
TableVersion.create_replica(md)
|
|
1204
1254
|
|
|
1205
1255
|
@retry_loop(for_write=False)
|
|
1206
|
-
def get_table(self, path: Path) -> Table:
|
|
1207
|
-
obj = Catalog.get()._get_schema_object(
|
|
1256
|
+
def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
|
|
1257
|
+
obj = Catalog.get()._get_schema_object(
|
|
1258
|
+
path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
|
|
1259
|
+
)
|
|
1260
|
+
if obj is None:
|
|
1261
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
1262
|
+
return None
|
|
1263
|
+
|
|
1208
1264
|
assert isinstance(obj, Table)
|
|
1209
1265
|
# We need to clear cached metadata from tbl_version_path, in case the schema has been changed
|
|
1210
1266
|
# by another process.
|
|
@@ -1216,7 +1272,7 @@ class Catalog:
|
|
|
1216
1272
|
tbl = self._get_schema_object(
|
|
1217
1273
|
path,
|
|
1218
1274
|
expected=Table,
|
|
1219
|
-
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
1275
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
|
|
1220
1276
|
lock_parent=True,
|
|
1221
1277
|
lock_obj=False,
|
|
1222
1278
|
)
|
|
@@ -1301,7 +1357,7 @@ class Catalog:
|
|
|
1301
1357
|
base_id = tvp.base.tbl_id
|
|
1302
1358
|
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
1303
1359
|
base_tv.tbl_md.view_sn += 1
|
|
1304
|
-
self.
|
|
1360
|
+
self.mark_modified_tvs(base_tv.handle)
|
|
1305
1361
|
result = Env.get().conn.execute(
|
|
1306
1362
|
sql.update(schema.Table.__table__)
|
|
1307
1363
|
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
@@ -1313,7 +1369,7 @@ class Catalog:
|
|
|
1313
1369
|
if not is_pure_snapshot:
|
|
1314
1370
|
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1315
1371
|
# has been dropped
|
|
1316
|
-
self.
|
|
1372
|
+
self.mark_modified_tvs(tvp.tbl_version)
|
|
1317
1373
|
tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
|
|
1318
1374
|
if not is_pure_snapshot:
|
|
1319
1375
|
# drop the store table before deleting the Table record
|
|
@@ -1682,6 +1738,9 @@ class Catalog:
|
|
|
1682
1738
|
|
|
1683
1739
|
@retry_loop(for_write=False)
|
|
1684
1740
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1741
|
+
return self._collect_tbl_history(tbl_id, n)
|
|
1742
|
+
|
|
1743
|
+
def _collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1685
1744
|
"""
|
|
1686
1745
|
Returns the history of up to n versions of the table with the given UUID.
|
|
1687
1746
|
|
|
@@ -1694,14 +1753,15 @@ class Catalog:
|
|
|
1694
1753
|
Each row contains a TableVersion and a TableSchemaVersion object.
|
|
1695
1754
|
"""
|
|
1696
1755
|
q = (
|
|
1697
|
-
sql.select(schema.TableVersion, schema.TableSchemaVersion)
|
|
1698
|
-
.
|
|
1699
|
-
.join(
|
|
1700
|
-
schema.TableSchemaVersion,
|
|
1701
|
-
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1702
|
-
)
|
|
1756
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
1757
|
+
.where(schema.Table.id == tbl_id)
|
|
1758
|
+
.join(schema.TableVersion)
|
|
1703
1759
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1760
|
+
.join(schema.TableSchemaVersion)
|
|
1704
1761
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1762
|
+
.where(
|
|
1763
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1764
|
+
)
|
|
1705
1765
|
.order_by(schema.TableVersion.version.desc())
|
|
1706
1766
|
)
|
|
1707
1767
|
if n is not None:
|
|
@@ -1709,7 +1769,7 @@ class Catalog:
|
|
|
1709
1769
|
src_rows = Env.get().session.execute(q).fetchall()
|
|
1710
1770
|
return [
|
|
1711
1771
|
schema.FullTableMd(
|
|
1712
|
-
|
|
1772
|
+
schema.md_from_dict(schema.TableMd, row.Table.md),
|
|
1713
1773
|
schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1714
1774
|
schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1715
1775
|
)
|
|
@@ -1904,11 +1964,13 @@ class Catalog:
|
|
|
1904
1964
|
|
|
1905
1965
|
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
1906
1966
|
# TableVersionPath. We need to prepend it separately.
|
|
1907
|
-
if isinstance(tbl, View) and tbl.
|
|
1967
|
+
if isinstance(tbl, View) and tbl._is_named_pure_snapshot():
|
|
1908
1968
|
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
1909
1969
|
md = [snapshot_md, *md]
|
|
1910
1970
|
|
|
1911
|
-
for ancestor_md in md
|
|
1971
|
+
for ancestor_md in md:
|
|
1972
|
+
# Set the `is_replica` flag on every ancestor's TableMd.
|
|
1973
|
+
ancestor_md.tbl_md.is_replica = True
|
|
1912
1974
|
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
1913
1975
|
# match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
|
|
1914
1976
|
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
@@ -1916,6 +1978,8 @@ class Catalog:
|
|
|
1916
1978
|
# destination catalog.
|
|
1917
1979
|
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
1918
1980
|
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
1981
|
+
|
|
1982
|
+
for ancestor_md in md[1:]:
|
|
1919
1983
|
# Also, the table version of every proper ancestor is emphemeral; it does not represent a queryable
|
|
1920
1984
|
# table version (the data might be incomplete, since we have only retrieved one of its views, not
|
|
1921
1985
|
# the table itself).
|
|
@@ -1968,9 +2032,7 @@ class Catalog:
|
|
|
1968
2032
|
tbl_version: TableVersion
|
|
1969
2033
|
if view_md is None:
|
|
1970
2034
|
# this is a base table
|
|
1971
|
-
tbl_version = TableVersion(
|
|
1972
|
-
tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1973
|
-
)
|
|
2035
|
+
tbl_version = TableVersion(tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views)
|
|
1974
2036
|
else:
|
|
1975
2037
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1976
2038
|
# TODO: add TableVersionMd.is_pure_snapshot() and use that
|
|
@@ -2005,7 +2067,7 @@ class Catalog:
|
|
|
2005
2067
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
2006
2068
|
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
2007
2069
|
# fully initialized
|
|
2008
|
-
self.
|
|
2070
|
+
self.mark_modified_tvs(tbl_version.handle)
|
|
2009
2071
|
tbl_version.init()
|
|
2010
2072
|
return tbl_version
|
|
2011
2073
|
|
pixeltable/catalog/column.py
CHANGED
|
@@ -48,7 +48,7 @@ class Column:
|
|
|
48
48
|
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
name: str
|
|
51
|
+
name: Optional[str]
|
|
52
52
|
id: Optional[int]
|
|
53
53
|
col_type: ts.ColumnType
|
|
54
54
|
stored: bool
|
|
@@ -259,7 +259,12 @@ class Column:
|
|
|
259
259
|
# default: record errors for computed and media columns
|
|
260
260
|
if self._stores_cellmd is not None:
|
|
261
261
|
return self._stores_cellmd
|
|
262
|
-
return self.is_stored and (
|
|
262
|
+
return self.is_stored and (
|
|
263
|
+
self.is_computed
|
|
264
|
+
or self.col_type.is_media_type()
|
|
265
|
+
or self.col_type.is_json_type()
|
|
266
|
+
or self.col_type.is_array_type()
|
|
267
|
+
)
|
|
263
268
|
|
|
264
269
|
@property
|
|
265
270
|
def qualified_name(self) -> str:
|
pixeltable/catalog/table.py
CHANGED
|
@@ -117,6 +117,7 @@ class Table(SchemaObject):
|
|
|
117
117
|
is_primary_key=col.is_pk,
|
|
118
118
|
media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
|
|
119
119
|
computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
|
|
120
|
+
defined_in=col.tbl.name,
|
|
120
121
|
)
|
|
121
122
|
# Pure snapshots have no indices
|
|
122
123
|
indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
|
|
@@ -19,6 +19,10 @@ class ColumnMetadata(TypedDict):
|
|
|
19
19
|
"""The media validation policy for this column."""
|
|
20
20
|
computed_with: Optional[str]
|
|
21
21
|
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
22
|
+
defined_in: Optional[str]
|
|
23
|
+
"""Name of the table where this column was originally defined.
|
|
24
|
+
|
|
25
|
+
If the current table is a view, then `defined_in` may differ from the current table name."""
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class EmbeddingIndexParams(TypedDict):
|