pixeltable 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +509 -103
  3. pixeltable/catalog/column.py +1 -0
  4. pixeltable/catalog/dir.py +15 -6
  5. pixeltable/catalog/path.py +15 -0
  6. pixeltable/catalog/schema_object.py +7 -12
  7. pixeltable/catalog/table.py +3 -12
  8. pixeltable/catalog/table_version.py +5 -0
  9. pixeltable/catalog/view.py +0 -4
  10. pixeltable/env.py +14 -8
  11. pixeltable/exprs/__init__.py +2 -0
  12. pixeltable/exprs/arithmetic_expr.py +7 -11
  13. pixeltable/exprs/array_slice.py +1 -1
  14. pixeltable/exprs/column_property_ref.py +3 -3
  15. pixeltable/exprs/column_ref.py +5 -6
  16. pixeltable/exprs/comparison.py +2 -5
  17. pixeltable/exprs/compound_predicate.py +4 -4
  18. pixeltable/exprs/expr.py +32 -19
  19. pixeltable/exprs/expr_dict.py +3 -3
  20. pixeltable/exprs/expr_set.py +1 -1
  21. pixeltable/exprs/function_call.py +28 -41
  22. pixeltable/exprs/globals.py +3 -3
  23. pixeltable/exprs/in_predicate.py +1 -1
  24. pixeltable/exprs/inline_expr.py +3 -3
  25. pixeltable/exprs/is_null.py +1 -1
  26. pixeltable/exprs/json_mapper.py +5 -5
  27. pixeltable/exprs/json_path.py +27 -15
  28. pixeltable/exprs/literal.py +1 -1
  29. pixeltable/exprs/method_ref.py +2 -2
  30. pixeltable/exprs/row_builder.py +3 -5
  31. pixeltable/exprs/rowid_ref.py +4 -7
  32. pixeltable/exprs/similarity_expr.py +5 -5
  33. pixeltable/exprs/sql_element_cache.py +1 -1
  34. pixeltable/exprs/type_cast.py +2 -3
  35. pixeltable/exprs/variable.py +2 -2
  36. pixeltable/ext/__init__.py +2 -0
  37. pixeltable/ext/functions/__init__.py +2 -0
  38. pixeltable/ext/functions/yolox.py +3 -3
  39. pixeltable/func/__init__.py +2 -0
  40. pixeltable/func/aggregate_function.py +9 -9
  41. pixeltable/func/callable_function.py +7 -5
  42. pixeltable/func/expr_template_function.py +6 -16
  43. pixeltable/func/function.py +10 -8
  44. pixeltable/func/function_registry.py +1 -3
  45. pixeltable/func/query_template_function.py +8 -24
  46. pixeltable/func/signature.py +23 -22
  47. pixeltable/func/tools.py +3 -3
  48. pixeltable/func/udf.py +5 -3
  49. pixeltable/globals.py +118 -260
  50. pixeltable/share/__init__.py +2 -0
  51. pixeltable/share/packager.py +3 -3
  52. pixeltable/share/publish.py +3 -5
  53. pixeltable/utils/coco.py +4 -4
  54. pixeltable/utils/console_output.py +1 -3
  55. pixeltable/utils/coroutine.py +41 -0
  56. pixeltable/utils/description_helper.py +1 -1
  57. pixeltable/utils/documents.py +3 -3
  58. pixeltable/utils/filecache.py +18 -8
  59. pixeltable/utils/formatter.py +2 -3
  60. pixeltable/utils/media_store.py +1 -1
  61. pixeltable/utils/pytorch.py +1 -1
  62. pixeltable/utils/sql.py +4 -4
  63. pixeltable/utils/transactional_directory.py +2 -1
  64. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/METADATA +1 -1
  65. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/RECORD +68 -67
  66. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/LICENSE +0 -0
  67. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/WHEEL +0 -0
  68. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/entry_points.txt +0 -0
@@ -1,32 +1,96 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
+ import functools
4
5
  import logging
5
- from typing import Optional, Type
6
+ import time
7
+ from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
6
8
  from uuid import UUID
7
9
 
10
+ import psycopg
8
11
  import sqlalchemy as sql
9
12
 
10
- import pixeltable.env as env
11
13
  import pixeltable.exceptions as excs
12
14
  import pixeltable.metadata.schema as schema
13
15
  from pixeltable.env import Env
16
+ from pixeltable.iterators import ComponentIterator
14
17
 
15
18
  from .dir import Dir
19
+ from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
20
+ from .insertable_table import InsertableTable
21
+ from .path import Path
16
22
  from .schema_object import SchemaObject
17
23
  from .table import Table
18
24
  from .table_version import TableVersion
19
25
  from .table_version_handle import TableVersionHandle
20
26
  from .table_version_path import TableVersionPath
27
+ from .view import View
28
+
29
+ if TYPE_CHECKING:
30
+ from .. import DataFrame, exprs
21
31
 
22
- # from .. import InsertableTable
23
32
 
24
33
  _logger = logging.getLogger('pixeltable')
25
34
 
26
35
 
27
- def _join_path(path: str, name: str) -> str:
28
- """Append name to path, if path is not empty."""
29
- return name if path == '' else f'{path}.{name}'
36
+ def _lock_str(for_update: bool) -> str:
37
+ return 'X' if for_update else 'S'
38
+
39
+
40
+ # TODO: remove once the concurrent update behavior has been debugged
41
+ # def _debug_print(for_update: bool, msg: str) -> None:
42
+ # return
43
+ # print(f'{datetime.datetime.now()}: {_lock_str(for_update)}: {msg}')
44
+
45
+
46
+ def _unpack_row(
47
+ row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
48
+ ) -> Optional[list[Any]]:
49
+ """Convert a Row result into a list of entity instances.
50
+
51
+ Assumes that the query contains a select() of exactly those entities.
52
+ """
53
+ if row is None:
54
+ return None
55
+
56
+ result: list[sql.orm.decl_api.DeclarativeBase] = []
57
+ column_offset = 0
58
+
59
+ for entity in entities:
60
+ num_cols = len(entity.__table__.columns)
61
+ data = {name: row[column_offset + i] for i, name in enumerate(entity.__table__.columns.keys())}
62
+ inst = entity(**data)
63
+ result.append(inst)
64
+ column_offset += num_cols
65
+
66
+ return result
67
+
68
+
69
+ _MAX_RETRIES = 3
70
+ T = TypeVar('T')
71
+
72
+
73
+ def _retry_loop(op: Callable[..., T]) -> Callable[..., T]:
74
+ @functools.wraps(op)
75
+ def loop(*args: Any, **kwargs: Any) -> T:
76
+ num_remaining_retries = _MAX_RETRIES
77
+ while True:
78
+ try:
79
+ # in order for retry to work, we need to make sure that there aren't any prior db updates
80
+ # that are part of an ongoing transaction
81
+ assert not Env.get().in_xact()
82
+ with Env.get().begin_xact() as conn:
83
+ return op(*args, **kwargs)
84
+ except sql.exc.DBAPIError as e:
85
+ if isinstance(e.orig, psycopg.errors.SerializationFailure) and num_remaining_retries > 0:
86
+ num_remaining_retries -= 1
87
+ print(f'serialization failure:\n{e}')
88
+ print('retrying ************************************************************')
89
+ time.sleep(1)
90
+ else:
91
+ raise
92
+
93
+ return loop
30
94
 
31
95
 
32
96
  class Catalog:
@@ -59,25 +123,20 @@ class Catalog:
59
123
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
60
124
  self._init_store()
61
125
 
62
- def get_dir_path(self, dir_id: UUID) -> str:
126
+ def get_dir_path(self, dir_id: UUID) -> Path:
63
127
  """Return path for directory with given id"""
64
- session = env.Env.get().session
128
+ conn = Env.get().conn
65
129
  names: list[str] = []
66
130
  while True:
67
- dir = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one()
131
+ q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
132
+ # _debug_print(for_update=False, msg=f'dir id={dir_id}')
133
+ row = conn.execute(q).one()
134
+ dir = schema.Dir(**row._mapping)
68
135
  if dir.md['name'] == '':
69
136
  break
70
137
  names.insert(0, dir.md['name'])
71
138
  dir_id = dir.parent_id
72
- assert dir_id is not None
73
- return '.'.join(names)
74
-
75
- def get_tbl_path(self, tbl_id: UUID) -> str:
76
- """Return path for table with given id"""
77
- session = env.Env.get().session
78
- tbl = session.query(schema.Table).filter(schema.Table.id == tbl_id).one()
79
- dir_path = self.get_dir_path(tbl.dir_id)
80
- return _join_path(dir_path, tbl.md['name'])
139
+ return Path('.'.join(names), empty_is_valid=True)
81
140
 
82
141
  @dataclasses.dataclass
83
142
  class DirEntry:
@@ -85,35 +144,154 @@ class Catalog:
85
144
  dir_entries: dict[str, Catalog.DirEntry]
86
145
  table: Optional[schema.Table]
87
146
 
88
- def get_dir_contents(self, dir_id: UUID, recursive: bool = False) -> dict[str, DirEntry]:
147
+ @_retry_loop
148
+ def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
149
+ dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
150
+ return self._get_dir_contents(dir._id, recursive=recursive)
151
+
152
+ def _get_dir_contents(self, dir_id: UUID, recursive: bool = False) -> dict[str, DirEntry]:
89
153
  """Returns a dict mapping the entry names to DirEntry objects"""
90
- session = env.Env.get().session
154
+ conn = Env.get().conn
91
155
  result: dict[str, Catalog.DirEntry] = {}
92
156
 
93
- dirs = session.query(schema.Dir).filter(schema.Dir.parent_id == dir_id).all()
94
- for dir in dirs:
157
+ q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
158
+ # _debug_print(for_update=False, msg=f'dirs parent_id={dir_id}')
159
+ rows = conn.execute(q).all()
160
+ for row in rows:
161
+ dir = schema.Dir(**row._mapping)
95
162
  dir_contents: dict[str, Catalog.DirEntry] = {}
96
163
  if recursive:
97
- dir_contents = self.get_dir_contents(dir.id, recursive=True)
164
+ dir_contents = self._get_dir_contents(dir.id, recursive=True)
98
165
  result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
99
166
 
100
- tbls = session.query(schema.Table).filter(schema.Table.dir_id == dir_id).all()
101
- for tbl in tbls:
167
+ q = sql.select(schema.Table).where(schema.Table.dir_id == dir_id)
168
+ # _debug_print(for_update=False, msg=f'tbls parent_id={dir_id}')
169
+ rows = conn.execute(q).all()
170
+ for row in rows:
171
+ tbl = schema.Table(**row._mapping)
102
172
  result[tbl.md['name']] = self.DirEntry(dir=None, dir_entries={}, table=tbl)
103
173
 
104
174
  return result
105
175
 
106
- def drop_dir(self, dir_id: UUID) -> None:
107
- """Delete the directory with the given id"""
108
- session = env.Env.get().session
109
- session.query(schema.Dir).filter(schema.Dir.id == dir_id).delete()
176
+ @_retry_loop
177
+ def move(self, path: Path, new_path: Path) -> None:
178
+ _, dest_dir, src_obj = self._prepare_dir_op(
179
+ add_dir_path=new_path.parent,
180
+ add_name=new_path.name,
181
+ drop_dir_path=path.parent,
182
+ drop_name=path.name,
183
+ raise_if_exists=True,
184
+ raise_if_not_exists=True,
185
+ )
186
+ src_obj._move(new_path.name, dest_dir._id)
187
+
188
+ def _prepare_dir_op(
189
+ self,
190
+ add_dir_path: Optional[Path] = None,
191
+ add_name: Optional[str] = None,
192
+ drop_dir_path: Optional[Path] = None,
193
+ drop_name: Optional[str] = None,
194
+ drop_expected: Optional[type[SchemaObject]] = None,
195
+ raise_if_exists: bool = False,
196
+ raise_if_not_exists: bool = False,
197
+ ) -> tuple[Optional[SchemaObject], Optional[SchemaObject], Optional[SchemaObject]]:
198
+ """
199
+ Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
200
+ directory entry.
201
+
202
+ The target entry is either a table or directory. The directory operation can include
203
+ - adding an entry (<add_dir_path>.<add_name>)
204
+ - dropping an entry (<drop_dir_path>.<drop_name>)
110
205
 
111
- def get_schema_object(
206
+ Returns: (existing SchemaObject of add path, Dir of add path, existing SchemaObject of drop path)
207
+
208
+ Locking protocol:
209
+ - X locks on the immediate parent directories of the added/dropped entries; this prevents concurrent
210
+ modifications of the parent
211
+ - lock parent before child
212
+ - if both add and drop (= two directories are involved), lock the directories in a pre-determined order
213
+ (in this case, by name) in order to prevent deadlocks between concurrent directory modifications
214
+ """
215
+ assert (add_dir_path is None) == (add_name is None)
216
+ assert (drop_dir_path is None) == (drop_name is None)
217
+ dir_paths: set[Path] = set()
218
+ if add_dir_path is not None:
219
+ dir_paths.add(add_dir_path)
220
+ if drop_dir_path is not None:
221
+ dir_paths.add(drop_dir_path)
222
+
223
+ add_dir: Optional[schema.Dir] = None
224
+ drop_dir: Optional[schema.Dir] = None
225
+ for p in sorted(list(dir_paths)):
226
+ dir = self._get_dir(p, for_update=True)
227
+ if dir is None:
228
+ raise excs.Error(f'Directory {str(p)!r} does not exist')
229
+ if p == add_dir_path:
230
+ add_dir = dir
231
+ if p == drop_dir_path:
232
+ drop_dir = dir
233
+
234
+ add_obj: Optional[SchemaObject] = None
235
+ if add_dir is not None:
236
+ add_obj = self._get_dir_entry(add_dir.id, add_name, for_update=True)
237
+ if add_obj is not None and raise_if_exists:
238
+ add_path = add_dir_path.append(add_name)
239
+ raise excs.Error(f'Path {str(add_path)!r} already exists')
240
+
241
+ drop_obj: Optional[SchemaObject] = None
242
+ if drop_dir is not None:
243
+ drop_path = drop_dir_path.append(drop_name)
244
+ drop_obj = self._get_dir_entry(drop_dir.id, drop_name, for_update=True)
245
+ if drop_obj is None and raise_if_not_exists:
246
+ raise excs.Error(f'Path {str(drop_path)!r} does not exist')
247
+ if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
248
+ raise excs.Error(
249
+ f'{str(drop_path)!r} needs to be a {drop_expected._display_name()} '
250
+ f'but is a {type(drop_obj)._display_name()}'
251
+ )
252
+
253
+ add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
254
+ return add_obj, add_dir_obj, drop_obj
255
+
256
+ def _get_dir_entry(self, dir_id: UUID, name: str, for_update: bool = False) -> Optional[SchemaObject]:
257
+ conn = Env.get().conn
258
+
259
+ # check for subdirectory
260
+ q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name)
261
+ if for_update:
262
+ q = q.with_for_update()
263
+ # _debug_print(for_update, f'dir name={name!r} parent={dir_id}')
264
+ # row = conn.execute(q).one_or_none()
265
+ # if row is not None:
266
+ # dir_record = schema.Dir(**row._mapping)
267
+ # return Dir(dir_record.id, dir_record.parent_id, name)
268
+ rows = conn.execute(q).all()
269
+ if len(rows) > 1:
270
+ assert False, rows
271
+ if len(rows) == 1:
272
+ dir_record = schema.Dir(**rows[0]._mapping)
273
+ return Dir(dir_record.id, dir_record.parent_id, name)
274
+
275
+ # check for table
276
+ q = sql.select(schema.Table.id).where(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == name)
277
+ if for_update:
278
+ q = q.with_for_update()
279
+ # _debug_print(for_update, f'table name={name!r} parent={dir_id}')
280
+ tbl_id = conn.execute(q).scalar_one_or_none()
281
+ if tbl_id is not None:
282
+ if not tbl_id in self._tbls:
283
+ self._tbls[tbl_id] = self._load_tbl(tbl_id)
284
+ return self._tbls[tbl_id]
285
+
286
+ return None
287
+
288
+ def _get_schema_object(
112
289
  self,
113
- path: str,
114
- expected: Optional[Type[SchemaObject]] = None,
290
+ path: Path,
291
+ expected: Optional[type[SchemaObject]] = None,
115
292
  raise_if_exists: bool = False,
116
293
  raise_if_not_exists: bool = False,
294
+ for_update: bool = False,
117
295
  ) -> Optional[SchemaObject]:
118
296
  """Return the schema object at the given path, or None if it doesn't exist.
119
297
 
@@ -123,42 +301,18 @@ class Catalog:
123
301
  - raise_if_not_exists is True and the path does not exist
124
302
  - expected is not None and the existing object has a different type
125
303
  """
126
- session = env.Env.get().session
127
- if path == '':
304
+ if path.is_root:
128
305
  # the root dir
129
306
  if expected is not None and expected is not Dir:
130
307
  raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}')
131
- dir = self._get_dir(path)
308
+ dir = self._get_dir(path, for_update=for_update)
132
309
  return Dir(dir.id, dir.parent_id, dir.md['name'])
133
310
 
134
- components = path.split('.')
135
- parent_path = '.'.join(components[:-1])
136
- parent_dir = self._get_dir('.'.join(components[:-1]))
311
+ parent_path = path.parent
312
+ parent_dir = self._get_dir(parent_path, for_update=False)
137
313
  if parent_dir is None:
138
314
  raise excs.Error(f'Directory {parent_path!r} does not exist')
139
- name = components[-1]
140
-
141
- # check if path points to a directory
142
- obj: Optional[SchemaObject] = None
143
- dir = (
144
- session.query(schema.Dir)
145
- .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
146
- .one_or_none()
147
- )
148
- if dir is not None:
149
- obj = Dir(dir.id, dir.parent_id, dir.md['name'])
150
- else:
151
- # check if it's a table
152
- row = (
153
- session.query(schema.Table.id)
154
- .filter(schema.Table.dir_id == parent_dir.id, schema.Table.md['name'].astext == name)
155
- .one_or_none()
156
- )
157
- if row is not None:
158
- tbl_id = row[0]
159
- if not tbl_id in self._tbls:
160
- self._tbls[tbl_id] = self._load_tbl(tbl_id)
161
- obj = self._tbls[tbl_id]
315
+ obj = self._get_dir_entry(parent_dir.id, path.name, for_update=for_update)
162
316
 
163
317
  if obj is None and raise_if_not_exists:
164
318
  raise excs.Error(f'Path {path!r} does not exist')
@@ -168,7 +322,7 @@ class Catalog:
168
322
  raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}')
169
323
  return obj
170
324
 
171
- def get_tbl(self, tbl_id: UUID) -> Optional[Table]:
325
+ def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
172
326
  if not tbl_id in self._tbls:
173
327
  tbl = self._load_tbl(tbl_id)
174
328
  if tbl is None:
@@ -176,21 +330,225 @@ class Catalog:
176
330
  self._tbls[tbl_id] = tbl
177
331
  return self._tbls[tbl_id]
178
332
 
179
- def add_tbl(self, tbl: Table) -> None:
180
- """Explicitly add a Table"""
333
+ @_retry_loop
334
+ def create_table(
335
+ self,
336
+ path: Path,
337
+ schema: dict[str, Any],
338
+ df: 'DataFrame',
339
+ if_exists: IfExistsParam,
340
+ primary_key: Optional[list[str]],
341
+ num_retained_versions: int,
342
+ comment: str,
343
+ media_validation: MediaValidation,
344
+ ) -> Table:
345
+ existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
346
+ if existing is not None:
347
+ assert isinstance(existing, Table)
348
+ return existing
349
+
350
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
351
+ assert dir is not None
352
+
353
+ tbl = InsertableTable._create(
354
+ dir._id,
355
+ path.name,
356
+ schema,
357
+ df,
358
+ primary_key=primary_key,
359
+ num_retained_versions=num_retained_versions,
360
+ comment=comment,
361
+ media_validation=media_validation,
362
+ )
181
363
  self._tbls[tbl._id] = tbl
364
+ return tbl
365
+
366
+ @_retry_loop
367
+ def create_view(
368
+ self,
369
+ path: Path,
370
+ base: TableVersionPath,
371
+ select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
372
+ where: Optional[exprs.Expr],
373
+ additional_columns: Optional[dict[str, Any]],
374
+ is_snapshot: bool,
375
+ iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
376
+ num_retained_versions: int,
377
+ comment: str,
378
+ media_validation: MediaValidation,
379
+ if_exists: IfExistsParam,
380
+ ) -> Table:
381
+ from pixeltable.utils.filecache import FileCache
382
+
383
+ existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
384
+ if existing is not None:
385
+ assert isinstance(existing, View)
386
+ return existing
387
+
388
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
389
+ assert dir is not None
390
+ if iterator is None:
391
+ iterator_class, iterator_args = None, None
392
+ else:
393
+ iterator_class, iterator_args = iterator
394
+ view = View._create(
395
+ dir._id,
396
+ path.name,
397
+ base=base,
398
+ select_list=select_list,
399
+ additional_columns=additional_columns,
400
+ predicate=where,
401
+ is_snapshot=is_snapshot,
402
+ iterator_cls=iterator_class,
403
+ iterator_args=iterator_args,
404
+ num_retained_versions=num_retained_versions,
405
+ comment=comment,
406
+ media_validation=media_validation,
407
+ )
408
+ FileCache.get().emit_eviction_warnings()
409
+ self._tbls[view._id] = view
410
+ return view
411
+
412
+ @_retry_loop
413
+ def get_table(self, path: Path) -> Table:
414
+ obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
415
+ assert isinstance(obj, Table)
416
+ obj._tbl_version.get().ensure_md_loaded()
417
+ return obj
418
+
419
+ @_retry_loop
420
+ def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
421
+ _, _, src_obj = self._prepare_dir_op(
422
+ drop_dir_path=path.parent,
423
+ drop_name=path.name,
424
+ drop_expected=Table,
425
+ raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
426
+ )
427
+ if src_obj is None:
428
+ _logger.info(f'Skipped table {str(path)!r} (does not exist).')
429
+ return
430
+ assert isinstance(src_obj, Table)
431
+ self._drop_tbl(src_obj, force=force, is_replace=False)
432
+
433
+ def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
434
+ """
435
+ Drop the table (and recursively its views, if force == True).
182
436
 
183
- def get_views(self, tbl_id: UUID) -> list[UUID]:
437
+ Locking protocol:
438
+ - X-lock base before X-locking any view
439
+ - deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
440
+ - X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
441
+ in the same directory with the same name (which could lead to duplicate names if we get rolled back)
442
+ """
443
+ view_ids = self.get_view_ids(tbl._id, for_update=True)
444
+ if len(view_ids) > 0:
445
+ if not force:
446
+ is_snapshot = tbl._tbl_version_path.is_snapshot()
447
+ obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
448
+ msg: str
449
+ if is_replace:
450
+ msg = (
451
+ f'{obj_type_str} {tbl._path()} already exists and has dependents. '
452
+ "Use `if_exists='replace_force'` to replace it."
453
+ )
454
+ else:
455
+ msg = f'{obj_type_str} {tbl._path()} has dependents.'
456
+ raise excs.Error(msg)
457
+
458
+ for view_id in view_ids:
459
+ view = self.get_table_by_id(view_id)
460
+ self._drop_tbl(view, force=force, is_replace=is_replace)
461
+
462
+ _ = self.get_dir(tbl._dir_id, for_update=True) # X-lock the parent directory
463
+ tbl._drop()
464
+ assert tbl._id in self._tbls
465
+ del self._tbls[tbl._id]
466
+ _logger.info(f'Dropped table `{tbl._path()}`.')
467
+
468
+ @_retry_loop
469
+ def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
470
+ # existing = self._handle_path_collision(path, Dir, False, if_exists)
471
+ # if existing is not None:
472
+ # assert isinstance(existing, Dir)
473
+ # return existing
474
+ #
475
+ # parent = self._get_schema_object(path.parent)
476
+ # assert parent is not None
477
+ # dir = Dir._create(parent._id, path.name)
478
+ # Env.get().console_logger.info(f'Created directory {path!r}.')
479
+ # return dir
480
+
481
+ if parents:
482
+ # start walking down from the root
483
+ last_parent: Optional[SchemaObject] = None
484
+ for ancestor in path.ancestors():
485
+ ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
486
+ assert ancestor_obj is not None or last_parent is not None
487
+ last_parent = Dir._create(last_parent._id, ancestor.name) if ancestor_obj is None else ancestor_obj
488
+ parent = last_parent
489
+ else:
490
+ parent = self._get_schema_object(path.parent)
491
+ existing = self._handle_path_collision(path, Dir, False, if_exists)
492
+ if existing is not None:
493
+ assert isinstance(existing, Dir)
494
+ return existing
495
+ assert parent is not None
496
+ dir = Dir._create(parent._id, path.name)
497
+ Env.get().console_logger.info(f'Created directory {str(path)!r}.')
498
+ return dir
499
+
500
+ @_retry_loop
501
+ def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
502
+ _, _, schema_obj = self._prepare_dir_op(
503
+ drop_dir_path=path.parent,
504
+ drop_name=path.name,
505
+ drop_expected=Dir,
506
+ raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
507
+ )
508
+ if schema_obj is None:
509
+ _logger.info(f'Directory {str(path)!r} does not exist; skipped drop_dir().')
510
+ return
511
+ self._drop_dir(schema_obj._id, path, force=force)
512
+
513
+ def _drop_dir(self, dir_id: UUID, dir_path: Path, force: bool = False) -> None:
514
+ conn = Env.get().conn
515
+ if not force:
516
+ # check for existing entries
517
+ q = sql.select(sql.func.count()).select_from(schema.Dir).where(schema.Dir.parent_id == dir_id)
518
+ num_subdirs = conn.execute(q).scalar()
519
+ q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
520
+ num_tbls = conn.execute(q).scalar()
521
+ if num_subdirs + num_tbls > 0:
522
+ raise excs.Error(f'Directory {dir_path!r} is not empty.')
523
+
524
+ # drop existing subdirs
525
+ dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id).with_for_update()
526
+ for row in conn.execute(dir_q).all():
527
+ self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
528
+
529
+ # drop existing tables
530
+ tbl_q = sql.select(schema.Table).where(schema.Table.dir_id == dir_id).with_for_update()
531
+ for row in conn.execute(tbl_q).all():
532
+ tbl = self.get_table_by_id(row.id)
533
+ # this table would have been dropped already if it's a view of a base we dropped earlier
534
+ if tbl is not None:
535
+ self._drop_tbl(tbl, force=True, is_replace=False)
536
+
537
+ # self.drop_dir(dir_id)
538
+ # _debug_print(for_update=True, msg=f'drop dir id={dir_id}')
539
+ conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
540
+ _logger.info(f'Removed directory {str(dir_path)!r}.')
541
+
542
+ def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
184
543
  """Return the ids of views that directly reference the given table"""
185
- session = env.Env.get().session
186
- q = session.query(schema.Table.id).filter(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
187
- result = [r[0] for r in q.all()]
544
+ conn = Env.get().conn
545
+ q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
546
+ if for_update:
547
+ q = q.with_for_update()
548
+ # _debug_print(for_update=False, msg=f'views of tbl id={tbl_id}')
549
+ result = [r[0] for r in conn.execute(q).all()]
188
550
  return result
189
551
 
190
- def remove_tbl(self, tbl_id: UUID) -> None:
191
- assert tbl_id in self._tbls
192
- del self._tbls[tbl_id]
193
-
194
552
  def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
195
553
  if (tbl_id, effective_version) not in self._tbl_versions:
196
554
  self._tbl_versions[(tbl_id, effective_version)] = self._load_tbl_version(tbl_id, effective_version)
@@ -208,41 +566,54 @@ class Catalog:
208
566
  assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
209
567
  del self._tbl_versions[(tbl_version.id, tbl_version.effective_version)]
210
568
 
211
- def get_dir(self, dir_id: UUID) -> Optional[Dir]:
569
+ def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
212
570
  """Return the Dir with the given id, or None if it doesn't exist"""
213
- session = env.Env.get().session
214
- dir_record = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one_or_none()
215
- if dir_record is None:
571
+ conn = Env.get().conn
572
+ q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
573
+ if for_update:
574
+ q = q.with_for_update()
575
+ # _debug_print(for_update=False, msg=f'dir id={dir_id!r}')
576
+ row = conn.execute(q).one_or_none()
577
+ if row is None:
216
578
  return None
579
+ dir_record = schema.Dir(**row._mapping)
217
580
  return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
218
581
 
219
- def _get_dir(self, path: str) -> Optional[schema.Dir]:
220
- session = env.Env.get().session
221
- assert session is not None
222
- if path == '':
223
- return session.query(schema.Dir).filter(schema.Dir.parent_id.is_(None)).one()
582
+ def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
583
+ """
584
+ Locking protocol:
585
+ - S locks on all ancestors
586
+ - X lock on dir if for_update == True, otherwise also an S lock
587
+ """
588
+ conn = Env.get().conn
589
+ if path.is_root:
590
+ q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None))
591
+ if for_update:
592
+ q = q.with_for_update()
593
+ # _debug_print(for_update, 'root dir')
594
+ row = conn.execute(q).one()
595
+ return schema.Dir(**row._mapping)
224
596
  else:
225
- components = path.split('.')
226
- parent_path = '.'.join(components[:-1])
227
- parent_dir = self._get_dir(parent_path)
597
+ parent_dir = self._get_dir(path.parent, for_update=False)
228
598
  if parent_dir is None:
229
599
  return None
230
- name = components[-1]
231
- dir = (
232
- session.query(schema.Dir)
233
- .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
234
- .one_or_none()
600
+ q = sql.select(schema.Dir).where(
601
+ schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == path.name
235
602
  )
236
- return dir
603
+ if for_update:
604
+ q = q.with_for_update()
605
+ # _debug_print(for_update, f'dir {str(path)}')
606
+ row = conn.execute(q).one_or_none()
607
+ return schema.Dir(**row._mapping) if row is not None else None
237
608
 
238
609
  def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
239
610
  _logger.info(f'Loading table {tbl_id}')
240
611
  from .insertable_table import InsertableTable
241
612
  from .view import View
242
613
 
243
- session = env.Env.get().session
244
- tbl_record, schema_version_record = (
245
- session.query(schema.Table, schema.TableSchemaVersion)
614
+ conn = Env.get().conn
615
+ q = (
616
+ sql.select(schema.Table, schema.TableSchemaVersion)
246
617
  .join(schema.TableSchemaVersion)
247
618
  .where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
248
619
  # Table.md['current_schema_version'] == TableSchemaVersion.schema_version
@@ -253,10 +624,12 @@ class Catalog:
253
624
  )
254
625
  )
255
626
  .where(schema.Table.id == tbl_id)
256
- .one_or_none()
257
627
  )
258
- if tbl_record is None:
628
+ # _debug_print(for_update=False, msg=f'load table id={tbl_id!r}')
629
+ row = conn.execute(q).one_or_none()
630
+ if row is None:
259
631
  return None
632
+ tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
260
633
 
261
634
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
262
635
  view_md = tbl_md.view_md
@@ -293,9 +666,9 @@ class Catalog:
293
666
 
294
667
  def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
295
668
  _logger.info(f'Loading table version: {tbl_id}:{effective_version}')
296
- session = env.Env.get().session
669
+ conn = Env.get().conn
297
670
  q = (
298
- session.query(schema.Table, schema.TableSchemaVersion)
671
+ sql.select(schema.Table, schema.TableSchemaVersion)
299
672
  .select_from(schema.Table)
300
673
  .where(schema.Table.id == tbl_id)
301
674
  .join(schema.TableSchemaVersion)
@@ -337,19 +710,20 @@ class Catalog:
337
710
  )
338
711
  )
339
712
 
340
- tbl_record, schema_version_record = q.one_or_none()
713
+ row = conn.execute(q).one_or_none()
714
+ tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
341
715
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
342
716
  schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
343
717
  view_md = tbl_md.view_md
344
718
 
345
719
  # load mutable view ids
346
- q = session.query(schema.Table.id).filter(
720
+ q = sql.select(schema.Table.id).where(
347
721
  sql.text(
348
722
  f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
349
723
  "AND md->'view_md'->'base_versions'->0->1 IS NULL"
350
724
  )
351
725
  )
352
- mutable_view_ids = [r[0] for r in q.all()]
726
+ mutable_view_ids = [r[0] for r in conn.execute(q).all()]
353
727
  mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
354
728
 
355
729
  if view_md is None:
@@ -384,8 +758,8 @@ class Catalog:
384
758
 
385
759
  def _init_store(self) -> None:
386
760
  """One-time initialization of the stored catalog. Idempotent."""
387
- with env.Env.get().begin_xact():
388
- session = env.Env.get().session
761
+ with Env.get().begin_xact():
762
+ session = Env.get().session
389
763
  if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
390
764
  return
391
765
  # create a top-level directory, so that every schema object has a directory
@@ -393,5 +767,37 @@ class Catalog:
393
767
  dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
394
768
  session.add(dir_record)
395
769
  session.flush()
396
- session.commit()
397
770
  _logger.info(f'Initialized catalog')
771
+
772
+ def _handle_path_collision(
773
+ self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
774
+ ) -> Optional[SchemaObject]:
775
+ obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
776
+
777
+ if if_exists == IfExistsParam.ERROR and obj is not None:
778
+ raise excs.Error(f'Path {path!r} is an existing {type(obj)._display_name()}')
779
+ else:
780
+ is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
781
+ if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
782
+ obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
783
+ raise excs.Error(
784
+ f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
785
+ )
786
+
787
+ if obj is None:
788
+ return None
789
+ if if_exists == IfExistsParam.IGNORE:
790
+ return obj
791
+
792
+ # drop the existing schema object
793
+ if isinstance(obj, Dir):
794
+ dir_contents = self._get_dir_contents(obj._id)
795
+ if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
796
+ raise excs.Error(
797
+ f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
798
+ )
799
+ self._drop_dir(obj._id, path, force=True)
800
+ else:
801
+ assert isinstance(obj, Table)
802
+ self._drop_tbl(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
803
+ return None