pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +9 -1
  4. pixeltable/catalog/catalog.py +333 -99
  5. pixeltable/catalog/column.py +28 -26
  6. pixeltable/catalog/globals.py +12 -0
  7. pixeltable/catalog/insertable_table.py +8 -8
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +111 -116
  10. pixeltable/catalog/table_version.py +36 -50
  11. pixeltable/catalog/table_version_handle.py +4 -1
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +10 -18
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -1
  22. pixeltable/exprs/column_ref.py +7 -6
  23. pixeltable/exprs/expr.py +4 -4
  24. pixeltable/func/__init__.py +1 -0
  25. pixeltable/func/mcp.py +74 -0
  26. pixeltable/func/query_template_function.py +4 -2
  27. pixeltable/func/tools.py +12 -2
  28. pixeltable/func/udf.py +2 -2
  29. pixeltable/functions/__init__.py +1 -0
  30. pixeltable/functions/groq.py +108 -0
  31. pixeltable/functions/huggingface.py +8 -6
  32. pixeltable/functions/mistralai.py +2 -13
  33. pixeltable/functions/openai.py +1 -6
  34. pixeltable/functions/replicate.py +2 -2
  35. pixeltable/functions/util.py +6 -1
  36. pixeltable/globals.py +0 -2
  37. pixeltable/io/external_store.py +2 -2
  38. pixeltable/io/label_studio.py +4 -4
  39. pixeltable/io/table_data_conduit.py +1 -1
  40. pixeltable/metadata/__init__.py +1 -1
  41. pixeltable/metadata/converters/convert_37.py +15 -0
  42. pixeltable/metadata/notes.py +1 -0
  43. pixeltable/metadata/schema.py +5 -0
  44. pixeltable/plan.py +37 -121
  45. pixeltable/share/packager.py +2 -2
  46. pixeltable/type_system.py +30 -0
  47. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.1.dist-info}/METADATA +1 -1
  48. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.1.dist-info}/RECORD +51 -49
  49. pixeltable/utils/sample.py +0 -25
  50. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.1.dist-info}/LICENSE +0 -0
  51. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.1.dist-info}/WHEEL +0 -0
  52. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.1.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@ import functools
5
5
  import logging
6
6
  import random
7
7
  import time
8
+ from collections import defaultdict
8
9
  from contextlib import contextmanager
9
10
  from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
10
11
  from uuid import UUID
@@ -17,10 +18,9 @@ from pixeltable.env import Env
17
18
  from pixeltable.iterators import ComponentIterator
18
19
  from pixeltable.metadata import schema
19
20
 
20
- if TYPE_CHECKING:
21
- from pixeltable.plan import SampleClause
21
+ from .column import Column
22
22
  from .dir import Dir
23
- from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
23
+ from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
24
24
  from .insertable_table import InsertableTable
25
25
  from .path import Path
26
26
  from .schema_object import SchemaObject
@@ -31,6 +31,8 @@ from .table_version_path import TableVersionPath
31
31
  from .view import View
32
32
 
33
33
  if TYPE_CHECKING:
34
+ from pixeltable.plan import SampleClause
35
+
34
36
  from .. import DataFrame, exprs
35
37
 
36
38
 
@@ -60,9 +62,10 @@ def _unpack_row(
60
62
  return result
61
63
 
62
64
 
65
+ # -1: unlimited
63
66
  # for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
64
67
  # grows uncontrollably
65
- _MAX_RETRIES = 0
68
+ _MAX_RETRIES = -1
66
69
 
67
70
  T = TypeVar('T')
68
71
 
@@ -71,25 +74,29 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
71
74
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
72
75
  @functools.wraps(op)
73
76
  def loop(*args: Any, **kwargs: Any) -> T:
74
- num_remaining_retries = _MAX_RETRIES
77
+ num_retries = 0
75
78
  while True:
76
79
  try:
77
80
  # in order for retry to work, we need to make sure that there aren't any prior db updates
78
81
  # that are part of an ongoing transaction
79
82
  assert not Env.get().in_xact
80
- with Catalog.get().begin_xact(for_write=for_write):
83
+ with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
81
84
  return op(*args, **kwargs)
82
85
  except sql.exc.DBAPIError as e:
83
86
  # TODO: what other exceptions should we be looking for?
84
- if isinstance(e.orig, psycopg.errors.SerializationFailure):
85
- if num_remaining_retries > 0:
86
- num_remaining_retries -= 1
87
- _logger.debug(f'Serialization failure, retrying ({num_remaining_retries} retries left)')
87
+ if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
88
+ if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
89
+ num_retries += 1
90
+ _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
88
91
  time.sleep(random.uniform(0.1, 0.5))
89
92
  else:
90
93
  raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
91
94
  else:
92
95
  raise
96
+ except Exception as e:
97
+ # for informational/debugging purposes
98
+ _logger.debug(f'retry_loop(): passing along {e}')
99
+ raise
93
100
 
94
101
  return loop
95
102
 
@@ -109,8 +116,8 @@ class Catalog:
109
116
  duplicate references to that table in the From clause (ie, incorrect Cartesian products)
110
117
  - in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
111
118
  Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
112
- - concurrent changes are detected by comparing TableVersion.version with the stored current version
113
- (TableMd.current_version)
119
+ - concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
120
+ (TableMd.current_version/view_sn)
114
121
  - cached live TableVersion instances (those with effective_version == None) are validated against the stored
115
122
  metadata on transaction boundaries; this is recorded in TableVersion.is_validated
116
123
  - metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
@@ -118,13 +125,22 @@ class Catalog:
118
125
 
119
126
  _instance: Optional[Catalog] = None
120
127
 
121
- # key: [id, version]
128
+ # cached TableVersion instances; key: [id, version]
122
129
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
123
130
  # - snapshot versions: records the version of the snapshot
124
131
  _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
125
132
  _tbls: dict[UUID, Table]
126
133
  _in_write_xact: bool # True if we're in a write transaction
127
- _x_locked_tbl_id: Optional[UUID] # set if begin_xact() was asked to write-lock a table
134
+ _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
135
+
136
+ # cached column dependencies
137
+ # - key: table id, value: mapping from column id to its dependencies
138
+ # - only maintained for dependencies between non-snapshot table versions
139
+ # - can contain stale entries (stemming from invalidated TV instances)
140
+ _column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
141
+
142
+ # column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
143
+ _column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
128
144
 
129
145
  @classmethod
130
146
  def get(cls) -> Catalog:
@@ -147,9 +163,14 @@ class Catalog:
147
163
  self._tbl_versions = {}
148
164
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
149
165
  self._in_write_xact = False
150
- self._x_locked_tbl_id = None
166
+ self._x_locked_tbl_ids = set()
167
+ self._column_dependencies = {}
168
+ self._column_dependents = None
151
169
  self._init_store()
152
170
 
171
+ def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
172
+ return f'Table was dropped (no record found for {tbl_id})'
173
+
153
174
  def validate(self) -> None:
154
175
  """Validate structural consistency of cached metadata"""
155
176
  for (tbl_id, effective_version), tbl_version in self._tbl_versions.items():
@@ -164,13 +185,24 @@ class Catalog:
164
185
  f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
165
186
  )
166
187
 
167
- if tbl_version.is_view and tbl_version.is_mutable:
188
+ if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
168
189
  # make sure this mutable view is recorded in a mutable base
169
190
  base = tbl_version.base
170
191
  assert base is not None
171
192
  if base.effective_version is None:
172
193
  assert (base.id, None) in self._tbl_versions
173
- assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views
194
+ base_tv = self._tbl_versions[base.id, None]
195
+ if not base_tv.is_validated:
196
+ continue
197
+ mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[base.id, None].mutable_views)
198
+ mutable_view_names = ', '.join(
199
+ tv._tbl_version.name
200
+ for tv in self._tbl_versions[base.id, None].mutable_views
201
+ if tv._tbl_version is not None
202
+ )
203
+ assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views, (
204
+ f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
205
+ )
174
206
 
175
207
  if len(tbl_version.mutable_views) > 0:
176
208
  # make sure we also loaded mutable view metadata, which is needed to detect column dependencies
@@ -178,24 +210,37 @@ class Catalog:
178
210
  assert v.effective_version is None, f'{v.id}:{v.effective_version}'
179
211
 
180
212
  @contextmanager
181
- def begin_xact(self, *, tbl_id: Optional[UUID] = None, for_write: bool = False) -> Iterator[sql.Connection]:
213
+ def begin_xact(
214
+ self,
215
+ *,
216
+ tbl: Optional[TableVersionPath] = None,
217
+ for_write: bool = False,
218
+ lock_mutable_tree: bool = False,
219
+ convert_db_excs: bool = True,
220
+ ) -> Iterator[sql.Connection]:
182
221
  """
183
222
  Return a context manager that yields a connection to the database. Idempotent.
184
223
 
185
224
  It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
186
225
  or metadata.
187
226
 
188
- Lock acquisition:
189
- - x-locks Table records by updating Table.lock_dummy
227
+ If tbl != None, follows this locking protocol:
228
+ - validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
229
+ SerializationErrors later on)
230
+ - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
231
+ - if for_write == False, validates TableVersion instance
232
+ - if lock_mutable_tree == True, also x-locks all mutable views of the table
190
233
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
191
234
  (SerializationFailure, LockNotAvailable)
192
235
  - for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
193
- to minimize (maybe avoid altogether) loosing that work
236
+ to minimize the probability of loosing that work due to a forced abort
237
+
238
+ If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
194
239
  """
195
240
  if Env.get().in_xact:
196
- if tbl_id is not None and for_write:
241
+ if tbl is not None and for_write:
197
242
  # make sure that we requested the required table lock at the beginning of the transaction
198
- assert tbl_id == self._x_locked_tbl_id, f'{tbl_id} != {self._x_locked_tbl_id}'
243
+ assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
199
244
  yield Env.get().conn
200
245
  return
201
246
 
@@ -209,30 +254,68 @@ class Catalog:
209
254
  num_retries = 0
210
255
  while True:
211
256
  try:
257
+ self._in_write_xact = False
258
+ self._x_locked_tbl_ids = set()
259
+ self._column_dependents = None
260
+
212
261
  with Env.get().begin_xact() as conn:
213
- if tbl_id is not None and for_write:
214
- # X-lock Table record
215
- conn.execute(
216
- sql.select(schema.Table).where(schema.Table.id == tbl_id).with_for_update(nowait=True)
217
- )
218
- conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(schema.Table.id == tbl_id))
219
- self._x_locked_tbl_id = tbl_id
262
+ if tbl is not None:
263
+ try:
264
+ if not self._acquire_path_locks(
265
+ tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
266
+ ):
267
+ # this is a snapshot
268
+ yield conn
269
+ return
270
+
271
+ if for_write:
272
+ if lock_mutable_tree:
273
+ self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
274
+ self._compute_column_dependents(self._x_locked_tbl_ids)
275
+ else:
276
+ self._x_locked_tbl_ids = {tbl.tbl_id}
277
+ if _logger.isEnabledFor(logging.DEBUG):
278
+ # validate only when we don't see errors
279
+ self.validate()
280
+
281
+ except sql.exc.DBAPIError as e:
282
+ if isinstance(
283
+ e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
284
+ ) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
285
+ num_retries += 1
286
+ _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
287
+ time.sleep(random.uniform(0.1, 0.5))
288
+ continue
289
+ else:
290
+ raise
220
291
 
221
292
  self._in_write_xact = for_write
222
293
  yield conn
223
294
  return
295
+
224
296
  except sql.exc.DBAPIError as e:
225
- if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)) and (
226
- num_retries < _MAX_RETRIES or _MAX_RETRIES == 0
227
- ):
228
- num_retries += 1
229
- _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
230
- time.sleep(random.uniform(0.1, 0.5))
297
+ # we got some db error during the actual operation (not just while trying to get locks on the metadata
298
+ # records): we convert these into Errors, if asked to do so, and abort
299
+ # TODO: what other concurrency-related exceptions should we expect?
300
+
301
+ # we always convert UndefinedTable exceptions (they can't be retried)
302
+ if isinstance(e.orig, psycopg.errors.UndefinedTable):
303
+ # the table got dropped in the middle of the table operation
304
+ _logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
305
+ assert tbl is not None
306
+ raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
307
+ elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
308
+ # we still got a serialization error, despite getting x-locks at the beginning
309
+ msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
310
+ _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
+ raise excs.Error('Serialization failure. Please re-run the operation.') from None
231
312
  else:
232
313
  raise
314
+
233
315
  finally:
234
316
  self._in_write_xact = False
235
- self._x_locked_tbl_id = None
317
+ self._x_locked_tbl_ids = set()
318
+ self._column_dependents = None
236
319
 
237
320
  # invalidate cached current TableVersion instances
238
321
  for tv in self._tbl_versions.values():
@@ -240,20 +323,117 @@ class Catalog:
240
323
  _logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
241
324
  tv.is_validated = False
242
325
 
243
- if _logger.isEnabledFor(logging.DEBUG):
244
- self.validate()
245
-
246
326
  @property
247
327
  def in_write_xact(self) -> bool:
248
328
  return self._in_write_xact
249
329
 
250
- def _acquire_dir_xlock(self, parent_id: Optional[UUID], dir_id: Optional[UUID], dir_name: Optional[str]) -> None:
330
+ def _acquire_path_locks(
331
+ self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
332
+ ) -> bool:
333
+ """
334
+ Path locking protocol:
335
+ - refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
336
+ reference the base tables)
337
+ - refresh cached TableVersion of tbl or get X-lock, depending on for_write
338
+ - if lock_mutable_tree, also X-lock all mutable views of tbl
339
+
340
+ Returns False if trying to lock a pure snapshot with for_write == True
341
+ Raises Error if tbl doesn't exist.
342
+ """
343
+ start_idx = 1 if for_write else 0
344
+ for handle in tbl.get_tbl_versions()[start_idx::-1]:
345
+ _ = self.get_tbl_version(handle.id, handle.effective_version)
346
+ if not for_write:
347
+ return True # nothing left to lock
348
+ return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
349
+
350
+ def _acquire_tbl_xlock(
351
+ self,
352
+ *,
353
+ tbl_id: Optional[UUID] = None,
354
+ dir_id: Optional[UUID] = None,
355
+ tbl_name: Optional[str] = None,
356
+ lock_mutable_tree: bool = False,
357
+ raise_if_not_exists: bool = False,
358
+ ) -> bool:
359
+ """Force acquisition of an X-lock on a Table record via a blind update
360
+
361
+ Either tbl_id or dir_id/tbl_name need to be specified.
362
+ Returns True if the table was locked, False if it was a snapshot or not found.
363
+ If lock_mutable_tree, recursively locks all mutable views of the table.
364
+
365
+ Returns False if the table is a snapshot or not found and !raise_if_not_exists.
366
+ """
367
+ where_clause: sql.ColumnElement
368
+ if tbl_id is not None:
369
+ where_clause = schema.Table.id == tbl_id
370
+ else:
371
+ where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
372
+ user = Env.get().user
373
+ if user is not None:
374
+ where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
375
+
376
+ conn = Env.get().conn
377
+ row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
378
+ if row is None:
379
+ if raise_if_not_exists:
380
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
381
+ return False # nothing to lock
382
+ if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
383
+ return False # nothing to lock
384
+ conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
385
+
386
+ if not lock_mutable_tree:
387
+ return True
388
+ # also lock mutable views
389
+ tv = self.get_tbl_version(tbl_id, None)
390
+ for view in tv.mutable_views:
391
+ self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
392
+ return True
393
+
394
+ def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
395
+ """Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
396
+ tv = self.get_tbl_version(tbl_id, None)
397
+ result: set[UUID] = {tv.id}
398
+ for view in tv.mutable_views:
399
+ result.update(self._get_mutable_tree(view.id))
400
+ return result
401
+
402
+ def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
403
+ """Populate self._column_dependents for all tables in mutable_tree"""
404
+ assert self._column_dependents is None
405
+ self._column_dependents = defaultdict(set)
406
+ for tbl_id in mutable_tree:
407
+ assert tbl_id in self._column_dependencies
408
+ for col, dependencies in self._column_dependencies[tbl_id].items():
409
+ for dependency in dependencies:
410
+ if dependency.tbl_id not in mutable_tree:
411
+ continue
412
+ dependents = self._column_dependents[dependency]
413
+ dependents.add(col)
414
+
415
+ def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
416
+ """Return all Columns that transitively depend on the given column."""
417
+ assert self._column_dependents is not None
418
+ dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
419
+ result: set[Column] = set()
420
+ for dependent in dependents:
421
+ tv = self.get_tbl_version(dependent.tbl_id, None)
422
+ col = tv.cols_by_id[dependent.col_id]
423
+ result.add(col)
424
+ return result
425
+
426
+ def _acquire_dir_xlock(
427
+ self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
428
+ ) -> None:
251
429
  """Force acquisition of an X-lock on a Dir record via a blind update.
252
430
 
253
431
  If dir_id is present, then all other conditions are ignored.
254
432
  Note that (parent_id==None) is a valid where condition.
255
433
  If dir_id is not specified, the user from the environment is added to the directory filters.
256
434
  """
435
+ assert (dir_name is None) != (dir_id is None)
436
+ assert not (parent_id is not None and dir_name is None)
257
437
  user = Env.get().user
258
438
  assert self._in_write_xact
259
439
  q = sql.update(schema.Dir).values(lock_dummy=1)
@@ -367,7 +547,7 @@ class Catalog:
367
547
  add_dir: Optional[schema.Dir] = None
368
548
  drop_dir: Optional[schema.Dir] = None
369
549
  for p in sorted(dir_paths):
370
- dir = self._get_dir(p, for_update=True)
550
+ dir = self._get_dir(p, lock_dir=True)
371
551
  if dir is None:
372
552
  raise excs.Error(f'Directory {str(p)!r} does not exist.')
373
553
  if p == add_dir_path:
@@ -377,7 +557,7 @@ class Catalog:
377
557
 
378
558
  add_obj: Optional[SchemaObject] = None
379
559
  if add_dir is not None:
380
- add_obj = self._get_dir_entry(add_dir.id, add_name, for_update=True)
560
+ add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
381
561
  if add_obj is not None and raise_if_exists:
382
562
  add_path = add_dir_path.append(add_name)
383
563
  raise excs.Error(f'Path {str(add_path)!r} already exists.')
@@ -385,7 +565,7 @@ class Catalog:
385
565
  drop_obj: Optional[SchemaObject] = None
386
566
  if drop_dir is not None:
387
567
  drop_path = drop_dir_path.append(drop_name)
388
- drop_obj = self._get_dir_entry(drop_dir.id, drop_name, for_update=True)
568
+ drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
389
569
  if drop_obj is None and raise_if_not_exists:
390
570
  raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
391
571
  if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
@@ -397,13 +577,13 @@ class Catalog:
397
577
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
398
578
  return add_obj, add_dir_obj, drop_obj
399
579
 
400
- def _get_dir_entry(self, dir_id: UUID, name: str, for_update: bool = False) -> Optional[SchemaObject]:
580
+ def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
401
581
  user = Env.get().user
402
582
  conn = Env.get().conn
403
583
 
404
584
  # check for subdirectory
405
- if for_update:
406
- self._acquire_dir_xlock(dir_id, None, name)
585
+ if lock_entry:
586
+ self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
407
587
  q = sql.select(schema.Dir).where(
408
588
  schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
409
589
  )
@@ -417,13 +597,13 @@ class Catalog:
417
597
  return Dir(dir_record.id, dir_record.parent_id, name)
418
598
 
419
599
  # check for table
600
+ if lock_entry:
601
+ self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
420
602
  q = sql.select(schema.Table.id).where(
421
603
  schema.Table.dir_id == dir_id,
422
604
  schema.Table.md['name'].astext == name,
423
605
  schema.Table.md['user'].astext == user,
424
606
  )
425
- if for_update:
426
- q = q.with_for_update()
427
607
  tbl_id = conn.execute(q).scalar_one_or_none()
428
608
  if tbl_id is not None:
429
609
  if tbl_id not in self._tbls:
@@ -438,7 +618,8 @@ class Catalog:
438
618
  expected: Optional[type[SchemaObject]] = None,
439
619
  raise_if_exists: bool = False,
440
620
  raise_if_not_exists: bool = False,
441
- for_update: bool = False,
621
+ lock_parent: bool = False,
622
+ lock_obj: bool = False,
442
623
  ) -> Optional[SchemaObject]:
443
624
  """Return the schema object at the given path, or None if it doesn't exist.
444
625
 
@@ -454,16 +635,16 @@ class Catalog:
454
635
  raise excs.Error(
455
636
  f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
456
637
  )
457
- dir = self._get_dir(path, for_update=for_update)
638
+ dir = self._get_dir(path, lock_dir=lock_obj)
458
639
  if dir is None:
459
640
  raise excs.Error(f'Unknown user: {Env.get().user}')
460
641
  return Dir(dir.id, dir.parent_id, dir.md['name'])
461
642
 
462
643
  parent_path = path.parent
463
- parent_dir = self._get_dir(parent_path, for_update=False)
644
+ parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
464
645
  if parent_dir is None:
465
646
  raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
466
- obj = self._get_dir_entry(parent_dir.id, path.name, for_update=for_update)
647
+ obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
467
648
 
468
649
  if obj is None and raise_if_not_exists:
469
650
  raise excs.Error(f'Path {str(path)!r} does not exist.')
@@ -480,12 +661,12 @@ class Catalog:
480
661
  tbl = self._load_tbl(tbl_id)
481
662
  if tbl is None:
482
663
  return None
483
- # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
484
- # dependencies
485
- tbl_version = tbl._tbl_version.get()
486
- if tbl_version.is_mutable:
487
- for v in tbl_version.mutable_views:
488
- _ = self.get_table_by_id(v.id)
664
+ # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
665
+ # # dependencies
666
+ # tbl_version = tbl._tbl_version.get()
667
+ # if tbl_version.is_mutable:
668
+ # for v in tbl_version.mutable_views:
669
+ # _ = self.get_table_by_id(v.id)
489
670
  return self._tbls[tbl_id]
490
671
 
491
672
  @_retry_loop(for_write=True)
@@ -539,6 +720,18 @@ class Catalog:
539
720
  ) -> Table:
540
721
  from pixeltable.utils.filecache import FileCache
541
722
 
723
+ if not is_snapshot and not base.is_snapshot():
724
+ # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
725
+ self._acquire_tbl_xlock(tbl_id=base.tbl_id)
726
+ base_tv = self.get_tbl_version(base.tbl_id, None)
727
+ base_tv.tbl_md.view_sn += 1
728
+ result = Env.get().conn.execute(
729
+ sql.update(schema.Table)
730
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
731
+ .where(schema.Table.id == base.tbl_id)
732
+ )
733
+ assert result.rowcount == 1, result.rowcount
734
+
542
735
  existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
543
736
  if existing is not None:
544
737
  assert isinstance(existing, View)
@@ -722,34 +915,31 @@ class Catalog:
722
915
 
723
916
  @_retry_loop(for_write=False)
724
917
  def get_table(self, path: Path) -> Table:
725
- obj = self._get_table(path)
726
- return obj
727
-
728
- def _get_table(self, path: Path) -> Table:
729
918
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
730
919
  assert isinstance(obj, Table)
731
- tbl_version = obj._tbl_version.get()
732
- # TODO: instead of calling this here, move the logic into TableVersion.init(), which is called after
733
- # registering the instance in _tbl_versions
734
- tbl_version.ensure_md_loaded()
735
- # if this table has mutable views, we need to load those as well, in order to record column dependencies
736
- for v in tbl_version.mutable_views:
737
- self.get_table_by_id(v.id)
738
920
  return obj
739
921
 
740
922
  @_retry_loop(for_write=True)
741
923
  def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
742
- _, _, src_obj = self._prepare_dir_op(
743
- drop_dir_path=path.parent,
744
- drop_name=path.name,
745
- drop_expected=Table,
924
+ tbl = self._get_schema_object(
925
+ path,
926
+ expected=Table,
746
927
  raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
928
+ lock_parent=True,
929
+ lock_obj=False,
747
930
  )
748
- if src_obj is None:
931
+ if tbl is None:
749
932
  _logger.info(f'Skipped table {str(path)!r} (does not exist).')
750
933
  return
751
- assert isinstance(src_obj, Table)
752
- self._drop_tbl(src_obj, force=force, is_replace=False)
934
+ assert isinstance(tbl, Table)
935
+
936
+ if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
937
+ # this is a mutable view of a mutable base;
938
+ # lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
939
+ base_id = tbl._tbl_version_path.base.tbl_id
940
+ self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
941
+
942
+ self._drop_tbl(tbl, force=force, is_replace=False)
753
943
 
754
944
  def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
755
945
  """
@@ -759,8 +949,11 @@ class Catalog:
759
949
  - X-lock base before X-locking any view
760
950
  - deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
761
951
  - X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
762
- in the same directory with the same name (which could lead to duplicate names if we get rolled back)
952
+ in the same directory with the same name (which could lead to duplicate names if we get aborted)
763
953
  """
954
+ self._acquire_dir_xlock(dir_id=tbl._dir_id)
955
+ self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
956
+
764
957
  view_ids = self.get_view_ids(tbl._id, for_update=True)
765
958
  if len(view_ids) > 0:
766
959
  if not force:
@@ -780,12 +973,34 @@ class Catalog:
780
973
  view = self.get_table_by_id(view_id)
781
974
  self._drop_tbl(view, force=force, is_replace=is_replace)
782
975
 
783
- _ = self.get_dir(tbl._dir_id, for_update=True) # X-lock the parent directory
784
- tbl._drop()
976
+ # if this is a mutable view of a mutable base, advance the base's view_sn
977
+ if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
978
+ base_id = tbl._tbl_version_path.base.tbl_id
979
+ base_tv = self.get_tbl_version(base_id, None)
980
+ base_tv.tbl_md.view_sn += 1
981
+ result = Env.get().conn.execute(
982
+ sql.update(schema.Table.__table__)
983
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
984
+ .where(schema.Table.id == base_id)
985
+ )
986
+ assert result.rowcount == 1, result.rowcount
987
+
988
+ tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
989
+ if tv is not None:
990
+ tv = tbl._tbl_version.get()
991
+ # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
992
+ tv.is_validated = False
993
+
994
+ self.delete_tbl_md(tbl._id)
785
995
  assert tbl._id in self._tbls
786
996
  del self._tbls[tbl._id]
787
997
  _logger.info(f'Dropped table `{tbl._path()}`.')
788
998
 
999
+ if tv is not None:
1000
+ tv.drop()
1001
+ assert (tv.id, tv.effective_version) in self._tbl_versions
1002
+ del self._tbl_versions[tv.id, tv.effective_version]
1003
+
789
1004
  @_retry_loop(for_write=True)
790
1005
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
791
1006
  return self._create_dir(path, if_exists, parents)
@@ -846,7 +1061,7 @@ class Catalog:
846
1061
  raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
847
1062
 
848
1063
  # drop existing subdirs
849
- self._acquire_dir_xlock(dir_id, None, None)
1064
+ self._acquire_dir_xlock(dir_id=dir_id)
850
1065
  dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
851
1066
  for row in conn.execute(dir_q).all():
852
1067
  self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
@@ -866,6 +1081,11 @@ class Catalog:
866
1081
  def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
867
1082
  """Return the ids of views that directly reference the given table"""
868
1083
  conn = Env.get().conn
1084
+ # check whether this table still exists
1085
+ q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.id == tbl_id)
1086
+ tbl_count = conn.execute(q).scalar()
1087
+ if tbl_count == 0:
1088
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
869
1089
  q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
870
1090
  if for_update:
871
1091
  q = q.with_for_update()
@@ -875,27 +1095,29 @@ class Catalog:
875
1095
  def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
876
1096
  # we need a transaction here, if we're not already in one; if this starts a new transaction,
877
1097
  # the returned TableVersion instance will not be validated
878
- with self.begin_xact(tbl_id=tbl_id, for_write=False) as conn:
1098
+ with self.begin_xact(for_write=False) as conn:
879
1099
  tv = self._tbl_versions.get((tbl_id, effective_version))
880
1100
  if tv is None:
881
1101
  tv = self._load_tbl_version(tbl_id, effective_version)
882
1102
  elif not tv.is_validated:
883
1103
  # only live instances are invalidated
884
1104
  assert effective_version is None
885
- # we validate live instances by comparing our cached version number to the stored current version
1105
+ # we validate live instances by comparing our cached TableMd.current_version/view_sn to what's stored
886
1106
  # _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
887
1107
  q = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
888
- row = conn.execute(q).one()
889
- current_version = row.md['current_version']
1108
+ row = conn.execute(q).one_or_none()
1109
+ if row is None:
1110
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1111
+ current_version, view_sn = row.md['current_version'], row.md['view_sn']
890
1112
 
891
1113
  # the stored version can be behind TableVersion.version, because we don't roll back the in-memory
892
1114
  # metadata changes after a failed update operation
893
- if current_version != tv.version:
1115
+ if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
894
1116
  # the cached metadata is invalid
895
1117
  _logger.debug(
896
1118
  f'reloading metadata for table {tbl_id} '
897
- f'(cached version: {tv.version}, current version: {current_version}'
898
- # f', id: {id(tv):x})'
1119
+ f'(cached/current version: {tv.version}/{current_version}, '
1120
+ f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
899
1121
  )
900
1122
  tv = self._load_tbl_version(tbl_id, None)
901
1123
  else:
@@ -913,7 +1135,7 @@ class Catalog:
913
1135
  """Return the Dir with the given id, or None if it doesn't exist"""
914
1136
  conn = Env.get().conn
915
1137
  if for_update:
916
- self._acquire_dir_xlock(None, dir_id, None)
1138
+ self._acquire_dir_xlock(dir_id=dir_id)
917
1139
  q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
918
1140
  row = conn.execute(q).one_or_none()
919
1141
  if row is None:
@@ -921,24 +1143,24 @@ class Catalog:
921
1143
  dir_record = schema.Dir(**row._mapping)
922
1144
  return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
923
1145
 
924
- def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
1146
+ def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
925
1147
  """
926
- Locking protocol: X locks on all ancestors
1148
+ lock_dir: if True, X-locks target (but not the ancestors)
927
1149
  """
928
1150
  user = Env.get().user
929
1151
  conn = Env.get().conn
930
1152
  if path.is_root:
931
- if for_update:
932
- self._acquire_dir_xlock(parent_id=None, dir_id=None, dir_name='')
1153
+ if lock_dir:
1154
+ self._acquire_dir_xlock(dir_name='')
933
1155
  q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
934
1156
  row = conn.execute(q).one_or_none()
935
1157
  return schema.Dir(**row._mapping) if row is not None else None
936
1158
  else:
937
- parent_dir = self._get_dir(path.parent, for_update=False)
1159
+ parent_dir = self._get_dir(path.parent, lock_dir=False)
938
1160
  if parent_dir is None:
939
1161
  return None
940
- if for_update:
941
- self._acquire_dir_xlock(parent_id=parent_dir.id, dir_id=None, dir_name=path.name)
1162
+ if lock_dir:
1163
+ self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
942
1164
  q = sql.select(schema.Dir).where(
943
1165
  schema.Dir.parent_id == parent_dir.id,
944
1166
  schema.Dir.md['name'].astext == path.name,
@@ -1060,7 +1282,8 @@ class Catalog:
1060
1282
  )
1061
1283
 
1062
1284
  row = conn.execute(q).one_or_none()
1063
- assert row is not None, f'Table record not found: {tbl_id}:{effective_version}'
1285
+ if row is None:
1286
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1064
1287
  tbl_record, version_record, schema_version_record = _unpack_row(
1065
1288
  row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
1066
1289
  )
@@ -1143,7 +1366,7 @@ class Catalog:
1143
1366
 
1144
1367
  # If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
1145
1368
  # TableVersionPath. We need to prepend it separately.
1146
- if tbl._id != tbl._tbl_version.id:
1369
+ if isinstance(tbl, View) and tbl._snapshot_only:
1147
1370
  snapshot_md = self.load_tbl_md(tbl._id, 0)
1148
1371
  md = [snapshot_md, *md]
1149
1372
 
@@ -1212,9 +1435,20 @@ class Catalog:
1212
1435
 
1213
1436
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1214
1437
  tbl_version.init()
1215
-
1216
1438
  return tbl_version
1217
1439
 
1440
+ def record_column_dependencies(self, tbl_version: TableVersion) -> None:
1441
+ """Update self._column_dependencies. Only valid for non-snapshot versions."""
1442
+ from pixeltable.exprs import Expr
1443
+
1444
+ assert not tbl_version.is_snapshot
1445
+ dependencies: dict[QColumnId, set[QColumnId]] = {}
1446
+ for col in tbl_version.cols_by_id.values():
1447
+ if col.value_expr_dict is None:
1448
+ continue
1449
+ dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
1450
+ self._column_dependencies[tbl_version.id] = dependencies
1451
+
1218
1452
  def _init_store(self) -> None:
1219
1453
  """One-time initialization of the stored catalog. Idempotent."""
1220
1454
  self.create_user(None)