pixeltable 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +309 -59
- pixeltable/catalog/globals.py +5 -5
- pixeltable/catalog/insertable_table.py +2 -1
- pixeltable/catalog/path.py +13 -6
- pixeltable/catalog/table.py +8 -6
- pixeltable/catalog/table_version.py +100 -72
- pixeltable/catalog/view.py +4 -9
- pixeltable/exec/sql_node.py +0 -1
- pixeltable/exprs/json_path.py +1 -5
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +1 -1
- pixeltable/func/expr_template_function.py +2 -2
- pixeltable/func/function.py +3 -4
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/tools.py +1 -1
- pixeltable/globals.py +7 -2
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +7 -4
- pixeltable/utils/exception_handler.py +59 -0
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.12.dist-info}/METADATA +1 -1
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.12.dist-info}/RECORD +35 -31
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -4,7 +4,7 @@ from .__version__ import __version__, __version_tuple__
|
|
|
4
4
|
from .catalog import Column, InsertableTable, Table, UpdateStatus, View
|
|
5
5
|
from .dataframe import DataFrame
|
|
6
6
|
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
7
|
-
from .func import Aggregator, Function, expr_udf, query, uda, udf
|
|
7
|
+
from .func import Aggregator, Function, expr_udf, query, retrieval_udf, uda, udf
|
|
8
8
|
from .globals import (
|
|
9
9
|
array,
|
|
10
10
|
configure_logging,
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.12'
|
|
3
|
+
__version_tuple__ = (0, 3, 12)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -33,16 +33,6 @@ if TYPE_CHECKING:
|
|
|
33
33
|
_logger = logging.getLogger('pixeltable')
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def _lock_str(for_update: bool) -> str:
|
|
37
|
-
return 'X' if for_update else 'S'
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# TODO: remove once the concurrent update behavior has been debugged
|
|
41
|
-
# def _debug_print(for_update: bool, msg: str) -> None:
|
|
42
|
-
# return
|
|
43
|
-
# print(f'{datetime.datetime.now()}: {_lock_str(for_update)}: {msg}')
|
|
44
|
-
|
|
45
|
-
|
|
46
36
|
def _unpack_row(
|
|
47
37
|
row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
|
|
48
38
|
) -> Optional[list[Any]]:
|
|
@@ -82,11 +72,14 @@ def _retry_loop(op: Callable[..., T]) -> Callable[..., T]:
|
|
|
82
72
|
with Env.get().begin_xact():
|
|
83
73
|
return op(*args, **kwargs)
|
|
84
74
|
except sql.exc.DBAPIError as e:
|
|
85
|
-
if isinstance(e.orig, psycopg.errors.SerializationFailure)
|
|
86
|
-
num_remaining_retries
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
75
|
+
if isinstance(e.orig, psycopg.errors.SerializationFailure):
|
|
76
|
+
if num_remaining_retries > 0:
|
|
77
|
+
num_remaining_retries -= 1
|
|
78
|
+
# print(f'serialization failure:\n{e}')
|
|
79
|
+
# print('retrying ************************************************************')
|
|
80
|
+
time.sleep(1)
|
|
81
|
+
else:
|
|
82
|
+
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
90
83
|
else:
|
|
91
84
|
raise
|
|
92
85
|
|
|
@@ -123,20 +116,39 @@ class Catalog:
|
|
|
123
116
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
124
117
|
self._init_store()
|
|
125
118
|
|
|
119
|
+
@classmethod
|
|
120
|
+
def _lock_dir(cls, parent_id: Optional[UUID], dir_id: Optional[UUID], dir_name: Optional[str]) -> None:
|
|
121
|
+
"""Update directory record(s) to sequentialize thread access. Lock is released when transaction commits.
|
|
122
|
+
If dir_id is present, then all other conditions are ignored.
|
|
123
|
+
Note that (parent_id==None) is a valid where condition.
|
|
124
|
+
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
125
|
+
"""
|
|
126
|
+
user = Env.get().user
|
|
127
|
+
conn = Env.get().conn
|
|
128
|
+
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
129
|
+
if dir_id is not None:
|
|
130
|
+
q = q.where(schema.Dir.id == dir_id)
|
|
131
|
+
else:
|
|
132
|
+
q = q.where(schema.Dir.parent_id == parent_id)
|
|
133
|
+
if dir_name is not None:
|
|
134
|
+
q = q.where(schema.Dir.md['name'].astext == dir_name)
|
|
135
|
+
if user is not None:
|
|
136
|
+
q = q.where(schema.Dir.md['user'].astext == user)
|
|
137
|
+
conn.execute(q)
|
|
138
|
+
|
|
126
139
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
127
140
|
"""Return path for directory with given id"""
|
|
128
141
|
conn = Env.get().conn
|
|
129
142
|
names: list[str] = []
|
|
130
143
|
while True:
|
|
131
144
|
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
132
|
-
# _debug_print(for_update=False, msg=f'dir id={dir_id}')
|
|
133
145
|
row = conn.execute(q).one()
|
|
134
146
|
dir = schema.Dir(**row._mapping)
|
|
135
147
|
if dir.md['name'] == '':
|
|
136
148
|
break
|
|
137
149
|
names.insert(0, dir.md['name'])
|
|
138
150
|
dir_id = dir.parent_id
|
|
139
|
-
return Path('.'.join(names), empty_is_valid=True)
|
|
151
|
+
return Path('.'.join(names), empty_is_valid=True, allow_system_paths=True)
|
|
140
152
|
|
|
141
153
|
@dataclasses.dataclass
|
|
142
154
|
class DirEntry:
|
|
@@ -155,7 +167,6 @@ class Catalog:
|
|
|
155
167
|
result: dict[str, Catalog.DirEntry] = {}
|
|
156
168
|
|
|
157
169
|
q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
158
|
-
# _debug_print(for_update=False, msg=f'dirs parent_id={dir_id}')
|
|
159
170
|
rows = conn.execute(q).all()
|
|
160
171
|
for row in rows:
|
|
161
172
|
dir = schema.Dir(**row._mapping)
|
|
@@ -165,7 +176,6 @@ class Catalog:
|
|
|
165
176
|
result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
|
|
166
177
|
|
|
167
178
|
q = sql.select(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
168
|
-
# _debug_print(for_update=False, msg=f'tbls parent_id={dir_id}')
|
|
169
179
|
rows = conn.execute(q).all()
|
|
170
180
|
for row in rows:
|
|
171
181
|
tbl = schema.Table(**row._mapping)
|
|
@@ -175,6 +185,9 @@ class Catalog:
|
|
|
175
185
|
|
|
176
186
|
@_retry_loop
|
|
177
187
|
def move(self, path: Path, new_path: Path) -> None:
|
|
188
|
+
self._move(path, new_path)
|
|
189
|
+
|
|
190
|
+
def _move(self, path: Path, new_path: Path) -> None:
|
|
178
191
|
_, dest_dir, src_obj = self._prepare_dir_op(
|
|
179
192
|
add_dir_path=new_path.parent,
|
|
180
193
|
add_name=new_path.name,
|
|
@@ -258,17 +271,14 @@ class Catalog:
|
|
|
258
271
|
conn = Env.get().conn
|
|
259
272
|
|
|
260
273
|
# check for subdirectory
|
|
274
|
+
if for_update:
|
|
275
|
+
self._lock_dir(dir_id, None, name)
|
|
261
276
|
q = sql.select(schema.Dir).where(
|
|
262
277
|
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
263
278
|
)
|
|
264
|
-
if for_update:
|
|
265
|
-
q = q.with_for_update()
|
|
266
|
-
# _debug_print(for_update, f'dir name={name!r} parent={dir_id}')
|
|
267
|
-
# row = conn.execute(q).one_or_none()
|
|
268
|
-
# if row is not None:
|
|
269
|
-
# dir_record = schema.Dir(**row._mapping)
|
|
270
|
-
# return Dir(dir_record.id, dir_record.parent_id, name)
|
|
271
279
|
rows = conn.execute(q).all()
|
|
280
|
+
# The condition below can occur if there is a synchronization failure across multiple processes
|
|
281
|
+
# It indicates database inconsistency.
|
|
272
282
|
if len(rows) > 1:
|
|
273
283
|
raise AssertionError(rows)
|
|
274
284
|
if len(rows) == 1:
|
|
@@ -283,7 +293,6 @@ class Catalog:
|
|
|
283
293
|
)
|
|
284
294
|
if for_update:
|
|
285
295
|
q = q.with_for_update()
|
|
286
|
-
# _debug_print(for_update, f'table name={name!r} parent={dir_id}')
|
|
287
296
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
288
297
|
if tbl_id is not None:
|
|
289
298
|
if tbl_id not in self._tbls:
|
|
@@ -422,6 +431,155 @@ class Catalog:
|
|
|
422
431
|
self._tbls[view._id] = view
|
|
423
432
|
return view
|
|
424
433
|
|
|
434
|
+
@_retry_loop
|
|
435
|
+
def create_replica(self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam) -> Table:
|
|
436
|
+
"""
|
|
437
|
+
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
438
|
+
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
439
|
+
list position 0 and the (root) base table at list position -1.
|
|
440
|
+
"""
|
|
441
|
+
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
442
|
+
|
|
443
|
+
# First handle path collisions (if_exists='ignore' or 'replace' or etc).
|
|
444
|
+
existing = self._handle_path_collision(path, View, False, if_exists)
|
|
445
|
+
if existing is not None:
|
|
446
|
+
if existing._id != tbl_id:
|
|
447
|
+
raise excs.Error(
|
|
448
|
+
f"An attempt was made to create a replica table at {path!r} with if_exists='ignore', "
|
|
449
|
+
'but a different table already exists at that location.'
|
|
450
|
+
)
|
|
451
|
+
assert isinstance(existing, View)
|
|
452
|
+
return existing
|
|
453
|
+
|
|
454
|
+
# Ensure that the system directory exists.
|
|
455
|
+
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
456
|
+
|
|
457
|
+
# Now check to see if this table already exists in the catalog.
|
|
458
|
+
# TODO: Handle concurrency in create_replica()
|
|
459
|
+
existing = Catalog.get().get_table_by_id(tbl_id)
|
|
460
|
+
if existing is not None:
|
|
461
|
+
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
462
|
+
# It does exist. If it's a non-system table, that's an error: it's already been replicated.
|
|
463
|
+
if not existing_path.is_system_path:
|
|
464
|
+
raise excs.Error(
|
|
465
|
+
f'That table has already been replicated as {existing._path()!r}. \n'
|
|
466
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
467
|
+
)
|
|
468
|
+
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
469
|
+
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
|
|
470
|
+
self._move(existing_path, path)
|
|
471
|
+
|
|
472
|
+
# Now store the metadata for this replica. In the case where the table already exists (and was just moved
|
|
473
|
+
# into a named location), this will be a no-op, but it still serves to validate that the newly received
|
|
474
|
+
# metadata is identical to what's in the catalog.
|
|
475
|
+
self.__store_replica_md(path, md[0])
|
|
476
|
+
|
|
477
|
+
# Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
|
|
478
|
+
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
479
|
+
for ancestor_md in md[1:]:
|
|
480
|
+
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
481
|
+
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
482
|
+
replica_path: Path
|
|
483
|
+
if replica is None:
|
|
484
|
+
# We've never seen this table before. Create a new anonymous system table for it.
|
|
485
|
+
replica_path = Path(f'_system.replica_{ancestor_id.hex}', allow_system_paths=True)
|
|
486
|
+
else:
|
|
487
|
+
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
488
|
+
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
489
|
+
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
490
|
+
replica_path = Path(replica._path(), allow_system_paths=True)
|
|
491
|
+
|
|
492
|
+
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
493
|
+
# known version (in which case the newly received metadata will be validated as identical).
|
|
494
|
+
self.__store_replica_md(replica_path, ancestor_md)
|
|
495
|
+
|
|
496
|
+
# Update the catalog (as a final step, after all DB operations completed successfully).
|
|
497
|
+
# Only the table being replicated is actually made visible in the catalog.
|
|
498
|
+
self._tbls[tbl_id] = self._load_tbl(tbl_id)
|
|
499
|
+
return self._tbls[tbl_id]
|
|
500
|
+
|
|
501
|
+
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
502
|
+
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
503
|
+
# TODO: Handle concurrency
|
|
504
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
505
|
+
assert dir is not None
|
|
506
|
+
|
|
507
|
+
conn = Env.get().conn
|
|
508
|
+
tbl_id = md.tbl_md.tbl_id
|
|
509
|
+
|
|
510
|
+
new_tbl_md: Optional[schema.TableMd] = None
|
|
511
|
+
new_version_md: Optional[schema.TableVersionMd] = None
|
|
512
|
+
new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
|
|
513
|
+
|
|
514
|
+
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
515
|
+
# this table. (In particular, if this is a base table, then its table metadata need to be consistent
|
|
516
|
+
# with the latest version of this table having a replicated view somewhere in the catalog.)
|
|
517
|
+
q: sql.Executable = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
518
|
+
existing_md_row = conn.execute(q).one_or_none()
|
|
519
|
+
|
|
520
|
+
if existing_md_row is None:
|
|
521
|
+
# No existing table, so create a new record.
|
|
522
|
+
q = sql.insert(schema.Table.__table__).values(
|
|
523
|
+
id=tbl_id,
|
|
524
|
+
dir_id=dir._id,
|
|
525
|
+
md=dataclasses.asdict(
|
|
526
|
+
dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
527
|
+
),
|
|
528
|
+
)
|
|
529
|
+
conn.execute(q)
|
|
530
|
+
else:
|
|
531
|
+
assert existing_md_row.md['is_replica']
|
|
532
|
+
if md.tbl_md.current_version > existing_md_row.md['current_version']:
|
|
533
|
+
# New metadata is more recent than the metadata currently stored in the DB; we'll update the record
|
|
534
|
+
# in place in the DB.
|
|
535
|
+
new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
536
|
+
|
|
537
|
+
# Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
|
|
538
|
+
# it already exists, check that the existing record is identical to the new one.
|
|
539
|
+
q = (
|
|
540
|
+
sql.select(schema.TableVersion.md)
|
|
541
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
542
|
+
.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {md.version_md.version}"))
|
|
543
|
+
)
|
|
544
|
+
existing_version_md_row = conn.execute(q).one_or_none()
|
|
545
|
+
if existing_version_md_row is None:
|
|
546
|
+
new_version_md = md.version_md
|
|
547
|
+
else:
|
|
548
|
+
existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
|
|
549
|
+
if existing_version_md != md.version_md:
|
|
550
|
+
raise excs.Error(
|
|
551
|
+
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
552
|
+
'the metadata recorded from a prior replica.\n'
|
|
553
|
+
'This is likely due to data corruption in the replicated table.'
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Do the same thing for TableSchemaVersion.
|
|
557
|
+
q = (
|
|
558
|
+
sql.select(schema.TableSchemaVersion.md)
|
|
559
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
560
|
+
.where(
|
|
561
|
+
sql.text(
|
|
562
|
+
f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
|
|
563
|
+
f'{md.schema_version_md.schema_version}'
|
|
564
|
+
)
|
|
565
|
+
)
|
|
566
|
+
)
|
|
567
|
+
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
568
|
+
if existing_schema_version_md_row is None:
|
|
569
|
+
new_schema_version_md = md.schema_version_md
|
|
570
|
+
else:
|
|
571
|
+
existing_schema_version_md = schema.md_from_dict(
|
|
572
|
+
schema.TableSchemaVersionMd, existing_schema_version_md_row.md
|
|
573
|
+
)
|
|
574
|
+
if existing_schema_version_md != md.schema_version_md:
|
|
575
|
+
raise excs.Error(
|
|
576
|
+
f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
|
|
577
|
+
'is inconsistent with the metadata recorded from a prior replica.\n'
|
|
578
|
+
'This is likely due to data corruption in the replicated table.'
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
582
|
+
|
|
425
583
|
@_retry_loop
|
|
426
584
|
def get_table(self, path: Path) -> Table:
|
|
427
585
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
@@ -480,6 +638,9 @@ class Catalog:
|
|
|
480
638
|
|
|
481
639
|
@_retry_loop
|
|
482
640
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
641
|
+
return self._create_dir(path, if_exists, parents)
|
|
642
|
+
|
|
643
|
+
def _create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
483
644
|
# existing = self._handle_path_collision(path, Dir, False, if_exists)
|
|
484
645
|
# if existing is not None:
|
|
485
646
|
# assert isinstance(existing, Dir)
|
|
@@ -535,7 +696,8 @@ class Catalog:
|
|
|
535
696
|
raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
|
|
536
697
|
|
|
537
698
|
# drop existing subdirs
|
|
538
|
-
|
|
699
|
+
self._lock_dir(dir_id, None, None)
|
|
700
|
+
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
539
701
|
for row in conn.execute(dir_q).all():
|
|
540
702
|
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
541
703
|
|
|
@@ -548,7 +710,6 @@ class Catalog:
|
|
|
548
710
|
self._drop_tbl(tbl, force=True, is_replace=False)
|
|
549
711
|
|
|
550
712
|
# self.drop_dir(dir_id)
|
|
551
|
-
# _debug_print(for_update=True, msg=f'drop dir id={dir_id}')
|
|
552
713
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
553
714
|
_logger.info(f'Removed directory {str(dir_path)!r}.')
|
|
554
715
|
|
|
@@ -558,7 +719,6 @@ class Catalog:
|
|
|
558
719
|
q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
|
|
559
720
|
if for_update:
|
|
560
721
|
q = q.with_for_update()
|
|
561
|
-
# _debug_print(for_update=False, msg=f'views of tbl id={tbl_id}')
|
|
562
722
|
result = [r[0] for r in conn.execute(q).all()]
|
|
563
723
|
return result
|
|
564
724
|
|
|
@@ -582,10 +742,9 @@ class Catalog:
|
|
|
582
742
|
def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
|
|
583
743
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
584
744
|
conn = Env.get().conn
|
|
585
|
-
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
586
745
|
if for_update:
|
|
587
|
-
|
|
588
|
-
|
|
746
|
+
self._lock_dir(None, dir_id, None)
|
|
747
|
+
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
589
748
|
row = conn.execute(q).one_or_none()
|
|
590
749
|
if row is None:
|
|
591
750
|
return None
|
|
@@ -594,31 +753,27 @@ class Catalog:
|
|
|
594
753
|
|
|
595
754
|
def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
|
|
596
755
|
"""
|
|
597
|
-
Locking protocol:
|
|
598
|
-
- S locks on all ancestors
|
|
599
|
-
- X lock on dir if for_update == True, otherwise also an S lock
|
|
756
|
+
Locking protocol: X locks on all ancestors
|
|
600
757
|
"""
|
|
601
758
|
user = Env.get().user
|
|
602
759
|
conn = Env.get().conn
|
|
603
760
|
if path.is_root:
|
|
604
|
-
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
605
761
|
if for_update:
|
|
606
|
-
|
|
607
|
-
|
|
762
|
+
self._lock_dir(parent_id=None, dir_id=None, dir_name='')
|
|
763
|
+
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
608
764
|
row = conn.execute(q).one_or_none()
|
|
609
765
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
610
766
|
else:
|
|
611
767
|
parent_dir = self._get_dir(path.parent, for_update=False)
|
|
612
768
|
if parent_dir is None:
|
|
613
769
|
return None
|
|
770
|
+
if for_update:
|
|
771
|
+
self._lock_dir(parent_id=parent_dir.id, dir_id=None, dir_name=path.name)
|
|
614
772
|
q = sql.select(schema.Dir).where(
|
|
615
773
|
schema.Dir.parent_id == parent_dir.id,
|
|
616
774
|
schema.Dir.md['name'].astext == path.name,
|
|
617
775
|
schema.Dir.md['user'].astext == user,
|
|
618
776
|
)
|
|
619
|
-
if for_update:
|
|
620
|
-
q = q.with_for_update()
|
|
621
|
-
# _debug_print(for_update, f'dir {str(path)}')
|
|
622
777
|
row = conn.execute(q).one_or_none()
|
|
623
778
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
624
779
|
|
|
@@ -641,7 +796,6 @@ class Catalog:
|
|
|
641
796
|
)
|
|
642
797
|
.where(schema.Table.id == tbl_id)
|
|
643
798
|
)
|
|
644
|
-
# _debug_print(for_update=False, msg=f'load table id={tbl_id!r}')
|
|
645
799
|
row = conn.execute(q).one_or_none()
|
|
646
800
|
if row is None:
|
|
647
801
|
return None
|
|
@@ -680,13 +834,19 @@ class Catalog:
|
|
|
680
834
|
# TODO: also load mutable views
|
|
681
835
|
return view
|
|
682
836
|
|
|
683
|
-
def
|
|
684
|
-
|
|
837
|
+
def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
|
|
838
|
+
"""
|
|
839
|
+
Loads metadata from the store for a given table UUID and version.
|
|
840
|
+
"""
|
|
841
|
+
_logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
|
|
685
842
|
conn = Env.get().conn
|
|
843
|
+
|
|
686
844
|
q = (
|
|
687
|
-
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
845
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
688
846
|
.select_from(schema.Table)
|
|
689
847
|
.where(schema.Table.id == tbl_id)
|
|
848
|
+
.join(schema.TableVersion)
|
|
849
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
690
850
|
.join(schema.TableSchemaVersion)
|
|
691
851
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
692
852
|
)
|
|
@@ -698,16 +858,11 @@ class Catalog:
|
|
|
698
858
|
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
699
859
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
700
860
|
# WHERE t.id = tbl_id
|
|
701
|
-
q = (
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
sql.text(
|
|
707
|
-
(
|
|
708
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
709
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
710
|
-
)
|
|
861
|
+
q = q.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}")).where(
|
|
862
|
+
sql.text(
|
|
863
|
+
(
|
|
864
|
+
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
865
|
+
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
711
866
|
)
|
|
712
867
|
)
|
|
713
868
|
)
|
|
@@ -715,9 +870,15 @@ class Catalog:
|
|
|
715
870
|
# we are loading the current version
|
|
716
871
|
# SELECT *
|
|
717
872
|
# FROM Table t
|
|
873
|
+
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND t.current_version = tv.version)
|
|
718
874
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
719
875
|
# WHERE t.id = tbl_id
|
|
720
876
|
q = q.where(
|
|
877
|
+
sql.text(
|
|
878
|
+
f"({schema.Table.__table__}.md->>'current_version')::int = "
|
|
879
|
+
f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
|
|
880
|
+
)
|
|
881
|
+
).where(
|
|
721
882
|
sql.text(
|
|
722
883
|
(
|
|
723
884
|
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
@@ -727,11 +888,100 @@ class Catalog:
|
|
|
727
888
|
)
|
|
728
889
|
|
|
729
890
|
row = conn.execute(q).one_or_none()
|
|
730
|
-
|
|
891
|
+
assert row is not None, f'Table record not found: {tbl_id}:{effective_version}'
|
|
892
|
+
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
893
|
+
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
894
|
+
)
|
|
895
|
+
assert tbl_record.id == tbl_id
|
|
731
896
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
897
|
+
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
732
898
|
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
899
|
+
|
|
900
|
+
return schema.FullTableMd(tbl_md, version_md, schema_version_md)
|
|
901
|
+
|
|
902
|
+
def store_tbl_md(
|
|
903
|
+
self,
|
|
904
|
+
tbl_id: UUID,
|
|
905
|
+
tbl_md: Optional[schema.TableMd],
|
|
906
|
+
version_md: Optional[schema.TableVersionMd],
|
|
907
|
+
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
908
|
+
) -> None:
|
|
909
|
+
"""
|
|
910
|
+
Stores metadata to the DB. If specified, `tbl_md` will be updated in place (only one such record can exist
|
|
911
|
+
per UUID); `version_md` and `schema_version_md` will be inserted as new records.
|
|
912
|
+
|
|
913
|
+
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
914
|
+
"""
|
|
915
|
+
conn = Env.get().conn
|
|
916
|
+
|
|
917
|
+
if tbl_md is not None:
|
|
918
|
+
result = conn.execute(
|
|
919
|
+
sql.update(schema.Table.__table__)
|
|
920
|
+
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
921
|
+
.where(schema.Table.id == tbl_id)
|
|
922
|
+
)
|
|
923
|
+
assert result.rowcount == 1, result.rowcount
|
|
924
|
+
|
|
925
|
+
if version_md is not None:
|
|
926
|
+
conn.execute(
|
|
927
|
+
sql.insert(schema.TableVersion.__table__).values(
|
|
928
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
929
|
+
)
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
if schema_version_md is not None:
|
|
933
|
+
conn.execute(
|
|
934
|
+
sql.insert(schema.TableSchemaVersion.__table__).values(
|
|
935
|
+
tbl_id=tbl_id,
|
|
936
|
+
schema_version=schema_version_md.schema_version,
|
|
937
|
+
md=dataclasses.asdict(schema_version_md),
|
|
938
|
+
)
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
942
|
+
"""
|
|
943
|
+
Deletes all table metadata from the store for the given table UUID.
|
|
944
|
+
"""
|
|
945
|
+
conn = Env.get().conn
|
|
946
|
+
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
947
|
+
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
948
|
+
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
949
|
+
|
|
950
|
+
def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
|
|
951
|
+
"""
|
|
952
|
+
Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
|
|
953
|
+
TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
|
|
954
|
+
consistent) table state.
|
|
955
|
+
"""
|
|
956
|
+
# TODO: First acquire X-locks for all relevant metadata entries
|
|
957
|
+
|
|
958
|
+
# Load metadata for every table in the TableVersionPath for `tbl`.
|
|
959
|
+
md = [self.load_tbl_md(tv.id, tv.effective_version) for tv in tbl._tbl_version_path.get_tbl_versions()]
|
|
960
|
+
|
|
961
|
+
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
962
|
+
# TableVersionPath. We need to prepend it separately.
|
|
963
|
+
if tbl._id != tbl._tbl_version.id:
|
|
964
|
+
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
965
|
+
md = [snapshot_md, *md]
|
|
966
|
+
|
|
967
|
+
for ancestor_md in md[1:]:
|
|
968
|
+
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
969
|
+
# match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
|
|
970
|
+
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
971
|
+
# current_version and current_schema_version will always point to versions that are known to the
|
|
972
|
+
# destination catalog.
|
|
973
|
+
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
974
|
+
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
975
|
+
|
|
976
|
+
return md
|
|
977
|
+
|
|
978
|
+
def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
979
|
+
tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
733
980
|
view_md = tbl_md.view_md
|
|
734
981
|
|
|
982
|
+
_logger.info(f'Loading table version: {tbl_id}:{effective_version}')
|
|
983
|
+
conn = Env.get().conn
|
|
984
|
+
|
|
735
985
|
# load mutable view ids
|
|
736
986
|
q = sql.select(schema.Table.id).where(
|
|
737
987
|
sql.text(
|
|
@@ -745,7 +995,7 @@ class Catalog:
|
|
|
745
995
|
if view_md is None:
|
|
746
996
|
# this is a base table
|
|
747
997
|
tbl_version = TableVersion(
|
|
748
|
-
|
|
998
|
+
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
749
999
|
)
|
|
750
1000
|
return tbl_version
|
|
751
1001
|
|
|
@@ -762,7 +1012,7 @@ class Catalog:
|
|
|
762
1012
|
base = base_path.tbl_version
|
|
763
1013
|
|
|
764
1014
|
tbl_version = TableVersion(
|
|
765
|
-
|
|
1015
|
+
tbl_id,
|
|
766
1016
|
tbl_md,
|
|
767
1017
|
effective_version,
|
|
768
1018
|
schema_version_md,
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -100,14 +100,14 @@ class IfNotExistsParam(enum.Enum):
|
|
|
100
100
|
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
def is_valid_identifier(name: str) -> bool:
|
|
104
|
-
return name.isidentifier() and not name.startswith('_')
|
|
103
|
+
def is_valid_identifier(name: str, allow_system_identifiers: bool = False) -> bool:
|
|
104
|
+
return name.isidentifier() and (allow_system_identifiers or not name.startswith('_'))
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def is_valid_path(path: str, empty_is_valid: bool) -> bool:
|
|
108
|
-
if
|
|
107
|
+
def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
|
|
108
|
+
if path == '':
|
|
109
109
|
return empty_is_valid
|
|
110
|
-
return all(is_valid_identifier(part) for part in path.split('.'))
|
|
110
|
+
return all(is_valid_identifier(part, allow_system_paths) for part in path.split('.'))
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
def is_system_column_name(name: str) -> bool:
|
|
@@ -17,6 +17,7 @@ from .table_version_handle import TableVersionHandle
|
|
|
17
17
|
from .table_version_path import TableVersionPath
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
|
+
from pixeltable import exprs
|
|
20
21
|
from pixeltable.globals import TableDataSource
|
|
21
22
|
from pixeltable.io.table_data_conduit import TableDataConduit
|
|
22
23
|
|
|
@@ -210,7 +211,7 @@ class InsertableTable(Table):
|
|
|
210
211
|
msg = str(e)
|
|
211
212
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
|
|
212
213
|
|
|
213
|
-
def delete(self, where: Optional['
|
|
214
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
214
215
|
"""Delete rows in this table.
|
|
215
216
|
|
|
216
217
|
Args:
|
pixeltable/catalog/path.py
CHANGED
|
@@ -11,8 +11,8 @@ _logger = logging.getLogger('pixeltable')
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Path:
|
|
14
|
-
def __init__(self, path: str, empty_is_valid: bool = False):
|
|
15
|
-
if not is_valid_path(path, empty_is_valid):
|
|
14
|
+
def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
|
|
15
|
+
if not is_valid_path(path, empty_is_valid, allow_system_paths):
|
|
16
16
|
raise excs.Error(f"Invalid path format: '{path}'")
|
|
17
17
|
self.components = path.split('.')
|
|
18
18
|
|
|
@@ -29,21 +29,25 @@ class Path:
|
|
|
29
29
|
def is_root(self) -> bool:
|
|
30
30
|
return not self.components[0]
|
|
31
31
|
|
|
32
|
+
@property
|
|
33
|
+
def is_system_path(self) -> bool:
|
|
34
|
+
return self.components[0].startswith('_')
|
|
35
|
+
|
|
32
36
|
@property
|
|
33
37
|
def parent(self) -> Path:
|
|
34
38
|
if len(self.components) == 1:
|
|
35
39
|
if self.is_root:
|
|
36
40
|
return self
|
|
37
41
|
else:
|
|
38
|
-
return Path('', empty_is_valid=True)
|
|
42
|
+
return Path('', empty_is_valid=True, allow_system_paths=True)
|
|
39
43
|
else:
|
|
40
|
-
return Path('.'.join(self.components[:-1]))
|
|
44
|
+
return Path('.'.join(self.components[:-1]), allow_system_paths=True)
|
|
41
45
|
|
|
42
46
|
def append(self, name: str) -> Path:
|
|
43
47
|
if self.is_root:
|
|
44
|
-
return Path(name)
|
|
48
|
+
return Path(name, allow_system_paths=True)
|
|
45
49
|
else:
|
|
46
|
-
return Path(f'{self
|
|
50
|
+
return Path(f'{self}.{name}', allow_system_paths=True)
|
|
47
51
|
|
|
48
52
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
49
53
|
"""
|
|
@@ -67,6 +71,9 @@ class Path:
|
|
|
67
71
|
for i in range(0, len(self.components)):
|
|
68
72
|
yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
|
|
69
73
|
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
return repr(str(self))
|
|
76
|
+
|
|
70
77
|
def __str__(self) -> str:
|
|
71
78
|
return '.'.join(self.components)
|
|
72
79
|
|