pixeltable 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show
  1. pixeltable/__init__.py +2 -27
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +309 -59
  4. pixeltable/catalog/globals.py +5 -5
  5. pixeltable/catalog/insertable_table.py +13 -1
  6. pixeltable/catalog/path.py +13 -6
  7. pixeltable/catalog/table.py +28 -41
  8. pixeltable/catalog/table_version.py +100 -72
  9. pixeltable/catalog/view.py +35 -9
  10. pixeltable/dataframe.py +2 -2
  11. pixeltable/exceptions.py +20 -2
  12. pixeltable/exec/expr_eval/evaluators.py +0 -4
  13. pixeltable/exec/expr_eval/expr_eval_node.py +0 -1
  14. pixeltable/exec/sql_node.py +3 -3
  15. pixeltable/exprs/json_path.py +1 -5
  16. pixeltable/func/__init__.py +1 -1
  17. pixeltable/func/aggregate_function.py +1 -1
  18. pixeltable/func/callable_function.py +1 -1
  19. pixeltable/func/expr_template_function.py +2 -2
  20. pixeltable/func/function.py +3 -4
  21. pixeltable/func/query_template_function.py +87 -4
  22. pixeltable/func/tools.py +1 -1
  23. pixeltable/func/udf.py +1 -1
  24. pixeltable/functions/__init__.py +1 -0
  25. pixeltable/functions/anthropic.py +1 -1
  26. pixeltable/functions/bedrock.py +130 -0
  27. pixeltable/functions/huggingface.py +7 -6
  28. pixeltable/functions/image.py +15 -16
  29. pixeltable/functions/mistralai.py +3 -2
  30. pixeltable/functions/openai.py +9 -8
  31. pixeltable/functions/together.py +4 -3
  32. pixeltable/globals.py +7 -2
  33. pixeltable/io/datarows.py +4 -3
  34. pixeltable/io/label_studio.py +17 -17
  35. pixeltable/io/pandas.py +13 -12
  36. pixeltable/io/table_data_conduit.py +8 -2
  37. pixeltable/metadata/__init__.py +1 -1
  38. pixeltable/metadata/converters/convert_19.py +2 -2
  39. pixeltable/metadata/converters/convert_31.py +11 -0
  40. pixeltable/metadata/converters/convert_32.py +15 -0
  41. pixeltable/metadata/converters/convert_33.py +17 -0
  42. pixeltable/metadata/notes.py +3 -0
  43. pixeltable/metadata/schema.py +26 -1
  44. pixeltable/plan.py +2 -3
  45. pixeltable/share/packager.py +9 -25
  46. pixeltable/share/publish.py +20 -9
  47. pixeltable/store.py +7 -4
  48. pixeltable/utils/exception_handler.py +59 -0
  49. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/METADATA +1 -1
  50. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/RECORD +53 -48
  51. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/WHEEL +1 -1
  52. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/LICENSE +0 -0
  53. {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -4,7 +4,7 @@ from .__version__ import __version__, __version_tuple__
4
4
  from .catalog import Column, InsertableTable, Table, UpdateStatus, View
5
5
  from .dataframe import DataFrame
6
6
  from .exceptions import Error, ExprEvalError, PixeltableWarning
7
- from .func import Aggregator, Function, expr_udf, query, uda, udf
7
+ from .func import Aggregator, Function, expr_udf, query, retrieval_udf, uda, udf
8
8
  from .globals import (
9
9
  array,
10
10
  configure_logging,
@@ -24,32 +24,7 @@ from .globals import (
24
24
  tool,
25
25
  tools,
26
26
  )
27
- from .type_system import (
28
- Array,
29
- ArrayType,
30
- Audio,
31
- AudioType,
32
- Bool,
33
- BoolType,
34
- ColumnType,
35
- Document,
36
- DocumentType,
37
- Float,
38
- FloatType,
39
- Image,
40
- ImageType,
41
- Int,
42
- IntType,
43
- Json,
44
- JsonType,
45
- Required,
46
- String,
47
- StringType,
48
- Timestamp,
49
- TimestampType,
50
- Video,
51
- VideoType,
52
- )
27
+ from .type_system import Array, Audio, Bool, Document, Float, Image, Int, Json, Required, String, Timestamp, Video
53
28
 
54
29
  # This import must go last to avoid circular imports.
55
30
  from . import ext, functions, io, iterators # isort: skip
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.11'
3
- __version_tuple__ = (0, 3, 11)
2
+ __version__ = '0.3.13'
3
+ __version_tuple__ = (0, 3, 13)
@@ -33,16 +33,6 @@ if TYPE_CHECKING:
33
33
  _logger = logging.getLogger('pixeltable')
34
34
 
35
35
 
36
- def _lock_str(for_update: bool) -> str:
37
- return 'X' if for_update else 'S'
38
-
39
-
40
- # TODO: remove once the concurrent update behavior has been debugged
41
- # def _debug_print(for_update: bool, msg: str) -> None:
42
- # return
43
- # print(f'{datetime.datetime.now()}: {_lock_str(for_update)}: {msg}')
44
-
45
-
46
36
  def _unpack_row(
47
37
  row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
48
38
  ) -> Optional[list[Any]]:
@@ -82,11 +72,14 @@ def _retry_loop(op: Callable[..., T]) -> Callable[..., T]:
82
72
  with Env.get().begin_xact():
83
73
  return op(*args, **kwargs)
84
74
  except sql.exc.DBAPIError as e:
85
- if isinstance(e.orig, psycopg.errors.SerializationFailure) and num_remaining_retries > 0:
86
- num_remaining_retries -= 1
87
- print(f'serialization failure:\n{e}')
88
- print('retrying ************************************************************')
89
- time.sleep(1)
75
+ if isinstance(e.orig, psycopg.errors.SerializationFailure):
76
+ if num_remaining_retries > 0:
77
+ num_remaining_retries -= 1
78
+ # print(f'serialization failure:\n{e}')
79
+ # print('retrying ************************************************************')
80
+ time.sleep(1)
81
+ else:
82
+ raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
90
83
  else:
91
84
  raise
92
85
 
@@ -123,20 +116,39 @@ class Catalog:
123
116
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
124
117
  self._init_store()
125
118
 
119
+ @classmethod
120
+ def _lock_dir(cls, parent_id: Optional[UUID], dir_id: Optional[UUID], dir_name: Optional[str]) -> None:
121
+ """Update directory record(s) to sequentialize thread access. Lock is released when transaction commits.
122
+ If dir_id is present, then all other conditions are ignored.
123
+ Note that (parent_id==None) is a valid where condition.
124
+ If dir_id is not specified, the user from the environment is added to the directory filters.
125
+ """
126
+ user = Env.get().user
127
+ conn = Env.get().conn
128
+ q = sql.update(schema.Dir).values(lock_dummy=1)
129
+ if dir_id is not None:
130
+ q = q.where(schema.Dir.id == dir_id)
131
+ else:
132
+ q = q.where(schema.Dir.parent_id == parent_id)
133
+ if dir_name is not None:
134
+ q = q.where(schema.Dir.md['name'].astext == dir_name)
135
+ if user is not None:
136
+ q = q.where(schema.Dir.md['user'].astext == user)
137
+ conn.execute(q)
138
+
126
139
  def get_dir_path(self, dir_id: UUID) -> Path:
127
140
  """Return path for directory with given id"""
128
141
  conn = Env.get().conn
129
142
  names: list[str] = []
130
143
  while True:
131
144
  q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
132
- # _debug_print(for_update=False, msg=f'dir id={dir_id}')
133
145
  row = conn.execute(q).one()
134
146
  dir = schema.Dir(**row._mapping)
135
147
  if dir.md['name'] == '':
136
148
  break
137
149
  names.insert(0, dir.md['name'])
138
150
  dir_id = dir.parent_id
139
- return Path('.'.join(names), empty_is_valid=True)
151
+ return Path('.'.join(names), empty_is_valid=True, allow_system_paths=True)
140
152
 
141
153
  @dataclasses.dataclass
142
154
  class DirEntry:
@@ -155,7 +167,6 @@ class Catalog:
155
167
  result: dict[str, Catalog.DirEntry] = {}
156
168
 
157
169
  q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
158
- # _debug_print(for_update=False, msg=f'dirs parent_id={dir_id}')
159
170
  rows = conn.execute(q).all()
160
171
  for row in rows:
161
172
  dir = schema.Dir(**row._mapping)
@@ -165,7 +176,6 @@ class Catalog:
165
176
  result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
166
177
 
167
178
  q = sql.select(schema.Table).where(schema.Table.dir_id == dir_id)
168
- # _debug_print(for_update=False, msg=f'tbls parent_id={dir_id}')
169
179
  rows = conn.execute(q).all()
170
180
  for row in rows:
171
181
  tbl = schema.Table(**row._mapping)
@@ -175,6 +185,9 @@ class Catalog:
175
185
 
176
186
  @_retry_loop
177
187
  def move(self, path: Path, new_path: Path) -> None:
188
+ self._move(path, new_path)
189
+
190
+ def _move(self, path: Path, new_path: Path) -> None:
178
191
  _, dest_dir, src_obj = self._prepare_dir_op(
179
192
  add_dir_path=new_path.parent,
180
193
  add_name=new_path.name,
@@ -258,17 +271,14 @@ class Catalog:
258
271
  conn = Env.get().conn
259
272
 
260
273
  # check for subdirectory
274
+ if for_update:
275
+ self._lock_dir(dir_id, None, name)
261
276
  q = sql.select(schema.Dir).where(
262
277
  schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
263
278
  )
264
- if for_update:
265
- q = q.with_for_update()
266
- # _debug_print(for_update, f'dir name={name!r} parent={dir_id}')
267
- # row = conn.execute(q).one_or_none()
268
- # if row is not None:
269
- # dir_record = schema.Dir(**row._mapping)
270
- # return Dir(dir_record.id, dir_record.parent_id, name)
271
279
  rows = conn.execute(q).all()
280
+ # The condition below can occur if there is a synchronization failure across multiple processes
281
+ # It indicates database inconsistency.
272
282
  if len(rows) > 1:
273
283
  raise AssertionError(rows)
274
284
  if len(rows) == 1:
@@ -283,7 +293,6 @@ class Catalog:
283
293
  )
284
294
  if for_update:
285
295
  q = q.with_for_update()
286
- # _debug_print(for_update, f'table name={name!r} parent={dir_id}')
287
296
  tbl_id = conn.execute(q).scalar_one_or_none()
288
297
  if tbl_id is not None:
289
298
  if tbl_id not in self._tbls:
@@ -422,6 +431,155 @@ class Catalog:
422
431
  self._tbls[view._id] = view
423
432
  return view
424
433
 
434
+ @_retry_loop
435
+ def create_replica(self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam) -> Table:
436
+ """
437
+ Creates table, table_version, and table_schema_version records for a replica with the given metadata.
438
+ The metadata should be presented in standard "ancestor order", with the table being replicated at
439
+ list position 0 and the (root) base table at list position -1.
440
+ """
441
+ tbl_id = UUID(md[0].tbl_md.tbl_id)
442
+
443
+ # First handle path collisions (if_exists='ignore' or 'replace' or etc).
444
+ existing = self._handle_path_collision(path, View, False, if_exists)
445
+ if existing is not None:
446
+ if existing._id != tbl_id:
447
+ raise excs.Error(
448
+ f"An attempt was made to create a replica table at {path!r} with if_exists='ignore', "
449
+ 'but a different table already exists at that location.'
450
+ )
451
+ assert isinstance(existing, View)
452
+ return existing
453
+
454
+ # Ensure that the system directory exists.
455
+ self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
456
+
457
+ # Now check to see if this table already exists in the catalog.
458
+ # TODO: Handle concurrency in create_replica()
459
+ existing = Catalog.get().get_table_by_id(tbl_id)
460
+ if existing is not None:
461
+ existing_path = Path(existing._path(), allow_system_paths=True)
462
+ # It does exist. If it's a non-system table, that's an error: it's already been replicated.
463
+ if not existing_path.is_system_path:
464
+ raise excs.Error(
465
+ f'That table has already been replicated as {existing._path()!r}. \n'
466
+ f'Drop the existing replica if you wish to re-create it.'
467
+ )
468
+ # If it's a system table, then this means it was created at some point as the ancestor of some other
469
+ # table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
470
+ self._move(existing_path, path)
471
+
472
+ # Now store the metadata for this replica. In the case where the table already exists (and was just moved
473
+ # into a named location), this will be a no-op, but it still serves to validate that the newly received
474
+ # metadata is identical to what's in the catalog.
475
+ self.__store_replica_md(path, md[0])
476
+
477
+ # Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
478
+ # do not yet exist in the store, they will be created as anonymous system tables.
479
+ for ancestor_md in md[1:]:
480
+ ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
481
+ replica = Catalog.get().get_table_by_id(ancestor_id)
482
+ replica_path: Path
483
+ if replica is None:
484
+ # We've never seen this table before. Create a new anonymous system table for it.
485
+ replica_path = Path(f'_system.replica_{ancestor_id.hex}', allow_system_paths=True)
486
+ else:
487
+ # The table already exists in the catalog. The existing path might be a system path (if the table
488
+ # was created as an anonymous base table of some other table), or it might not (if it's a snapshot
489
+ # that was directly replicated by the user at some point). In either case, use the existing path.
490
+ replica_path = Path(replica._path(), allow_system_paths=True)
491
+
492
+ # Store the metadata; it could be a new version (in which case a new record will be created) or a
493
+ # known version (in which case the newly received metadata will be validated as identical).
494
+ self.__store_replica_md(replica_path, ancestor_md)
495
+
496
+ # Update the catalog (as a final step, after all DB operations completed successfully).
497
+ # Only the table being replicated is actually made visible in the catalog.
498
+ self._tbls[tbl_id] = self._load_tbl(tbl_id)
499
+ return self._tbls[tbl_id]
500
+
501
+ def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
502
+ _logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
503
+ # TODO: Handle concurrency
504
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
505
+ assert dir is not None
506
+
507
+ conn = Env.get().conn
508
+ tbl_id = md.tbl_md.tbl_id
509
+
510
+ new_tbl_md: Optional[schema.TableMd] = None
511
+ new_version_md: Optional[schema.TableVersionMd] = None
512
+ new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
513
+
514
+ # We need to ensure that the table metadata in the catalog always reflects the latest observed version of
515
+ # this table. (In particular, if this is a base table, then its table metadata need to be consistent
516
+ # with the latest version of this table having a replicated view somewhere in the catalog.)
517
+ q: sql.Executable = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
518
+ existing_md_row = conn.execute(q).one_or_none()
519
+
520
+ if existing_md_row is None:
521
+ # No existing table, so create a new record.
522
+ q = sql.insert(schema.Table.__table__).values(
523
+ id=tbl_id,
524
+ dir_id=dir._id,
525
+ md=dataclasses.asdict(
526
+ dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
527
+ ),
528
+ )
529
+ conn.execute(q)
530
+ else:
531
+ assert existing_md_row.md['is_replica']
532
+ if md.tbl_md.current_version > existing_md_row.md['current_version']:
533
+ # New metadata is more recent than the metadata currently stored in the DB; we'll update the record
534
+ # in place in the DB.
535
+ new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
536
+
537
+ # Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
538
+ # it already exists, check that the existing record is identical to the new one.
539
+ q = (
540
+ sql.select(schema.TableVersion.md)
541
+ .where(schema.TableVersion.tbl_id == tbl_id)
542
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {md.version_md.version}"))
543
+ )
544
+ existing_version_md_row = conn.execute(q).one_or_none()
545
+ if existing_version_md_row is None:
546
+ new_version_md = md.version_md
547
+ else:
548
+ existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
549
+ if existing_version_md != md.version_md:
550
+ raise excs.Error(
551
+ f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
552
+ 'the metadata recorded from a prior replica.\n'
553
+ 'This is likely due to data corruption in the replicated table.'
554
+ )
555
+
556
+ # Do the same thing for TableSchemaVersion.
557
+ q = (
558
+ sql.select(schema.TableSchemaVersion.md)
559
+ .where(schema.TableSchemaVersion.tbl_id == tbl_id)
560
+ .where(
561
+ sql.text(
562
+ f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
563
+ f'{md.schema_version_md.schema_version}'
564
+ )
565
+ )
566
+ )
567
+ existing_schema_version_md_row = conn.execute(q).one_or_none()
568
+ if existing_schema_version_md_row is None:
569
+ new_schema_version_md = md.schema_version_md
570
+ else:
571
+ existing_schema_version_md = schema.md_from_dict(
572
+ schema.TableSchemaVersionMd, existing_schema_version_md_row.md
573
+ )
574
+ if existing_schema_version_md != md.schema_version_md:
575
+ raise excs.Error(
576
+ f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
577
+ 'is inconsistent with the metadata recorded from a prior replica.\n'
578
+ 'This is likely due to data corruption in the replicated table.'
579
+ )
580
+
581
+ self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
582
+
425
583
  @_retry_loop
426
584
  def get_table(self, path: Path) -> Table:
427
585
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
@@ -480,6 +638,9 @@ class Catalog:
480
638
 
481
639
  @_retry_loop
482
640
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
641
+ return self._create_dir(path, if_exists, parents)
642
+
643
+ def _create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
483
644
  # existing = self._handle_path_collision(path, Dir, False, if_exists)
484
645
  # if existing is not None:
485
646
  # assert isinstance(existing, Dir)
@@ -535,7 +696,8 @@ class Catalog:
535
696
  raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
536
697
 
537
698
  # drop existing subdirs
538
- dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id).with_for_update()
699
+ self._lock_dir(dir_id, None, None)
700
+ dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
539
701
  for row in conn.execute(dir_q).all():
540
702
  self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
541
703
 
@@ -548,7 +710,6 @@ class Catalog:
548
710
  self._drop_tbl(tbl, force=True, is_replace=False)
549
711
 
550
712
  # self.drop_dir(dir_id)
551
- # _debug_print(for_update=True, msg=f'drop dir id={dir_id}')
552
713
  conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
553
714
  _logger.info(f'Removed directory {str(dir_path)!r}.')
554
715
 
@@ -558,7 +719,6 @@ class Catalog:
558
719
  q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
559
720
  if for_update:
560
721
  q = q.with_for_update()
561
- # _debug_print(for_update=False, msg=f'views of tbl id={tbl_id}')
562
722
  result = [r[0] for r in conn.execute(q).all()]
563
723
  return result
564
724
 
@@ -582,10 +742,9 @@ class Catalog:
582
742
  def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
583
743
  """Return the Dir with the given id, or None if it doesn't exist"""
584
744
  conn = Env.get().conn
585
- q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
586
745
  if for_update:
587
- q = q.with_for_update()
588
- # _debug_print(for_update=False, msg=f'dir id={dir_id!r}')
746
+ self._lock_dir(None, dir_id, None)
747
+ q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
589
748
  row = conn.execute(q).one_or_none()
590
749
  if row is None:
591
750
  return None
@@ -594,31 +753,27 @@ class Catalog:
594
753
 
595
754
  def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
596
755
  """
597
- Locking protocol:
598
- - S locks on all ancestors
599
- - X lock on dir if for_update == True, otherwise also an S lock
756
+ Locking protocol: X locks on all ancestors
600
757
  """
601
758
  user = Env.get().user
602
759
  conn = Env.get().conn
603
760
  if path.is_root:
604
- q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
605
761
  if for_update:
606
- q = q.with_for_update()
607
- # _debug_print(for_update, 'root dir')
762
+ self._lock_dir(parent_id=None, dir_id=None, dir_name='')
763
+ q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
608
764
  row = conn.execute(q).one_or_none()
609
765
  return schema.Dir(**row._mapping) if row is not None else None
610
766
  else:
611
767
  parent_dir = self._get_dir(path.parent, for_update=False)
612
768
  if parent_dir is None:
613
769
  return None
770
+ if for_update:
771
+ self._lock_dir(parent_id=parent_dir.id, dir_id=None, dir_name=path.name)
614
772
  q = sql.select(schema.Dir).where(
615
773
  schema.Dir.parent_id == parent_dir.id,
616
774
  schema.Dir.md['name'].astext == path.name,
617
775
  schema.Dir.md['user'].astext == user,
618
776
  )
619
- if for_update:
620
- q = q.with_for_update()
621
- # _debug_print(for_update, f'dir {str(path)}')
622
777
  row = conn.execute(q).one_or_none()
623
778
  return schema.Dir(**row._mapping) if row is not None else None
624
779
 
@@ -641,7 +796,6 @@ class Catalog:
641
796
  )
642
797
  .where(schema.Table.id == tbl_id)
643
798
  )
644
- # _debug_print(for_update=False, msg=f'load table id={tbl_id!r}')
645
799
  row = conn.execute(q).one_or_none()
646
800
  if row is None:
647
801
  return None
@@ -680,13 +834,19 @@ class Catalog:
680
834
  # TODO: also load mutable views
681
835
  return view
682
836
 
683
- def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
684
- _logger.info(f'Loading table version: {tbl_id}:{effective_version}')
837
+ def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
838
+ """
839
+ Loads metadata from the store for a given table UUID and version.
840
+ """
841
+ _logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
685
842
  conn = Env.get().conn
843
+
686
844
  q = (
687
- sql.select(schema.Table, schema.TableSchemaVersion)
845
+ sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
688
846
  .select_from(schema.Table)
689
847
  .where(schema.Table.id == tbl_id)
848
+ .join(schema.TableVersion)
849
+ .where(schema.TableVersion.tbl_id == tbl_id)
690
850
  .join(schema.TableSchemaVersion)
691
851
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
692
852
  )
@@ -698,16 +858,11 @@ class Catalog:
698
858
  # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
699
859
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
700
860
  # WHERE t.id = tbl_id
701
- q = (
702
- q.join(schema.TableVersion)
703
- .where(schema.TableVersion.tbl_id == tbl_id)
704
- .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}"))
705
- .where(
706
- sql.text(
707
- (
708
- f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
709
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
710
- )
861
+ q = q.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}")).where(
862
+ sql.text(
863
+ (
864
+ f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
865
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
711
866
  )
712
867
  )
713
868
  )
@@ -715,9 +870,15 @@ class Catalog:
715
870
  # we are loading the current version
716
871
  # SELECT *
717
872
  # FROM Table t
873
+ # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND t.current_version = tv.version)
718
874
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
719
875
  # WHERE t.id = tbl_id
720
876
  q = q.where(
877
+ sql.text(
878
+ f"({schema.Table.__table__}.md->>'current_version')::int = "
879
+ f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
880
+ )
881
+ ).where(
721
882
  sql.text(
722
883
  (
723
884
  f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
@@ -727,11 +888,100 @@ class Catalog:
727
888
  )
728
889
 
729
890
  row = conn.execute(q).one_or_none()
730
- tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
891
+ assert row is not None, f'Table record not found: {tbl_id}:{effective_version}'
892
+ tbl_record, version_record, schema_version_record = _unpack_row(
893
+ row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
894
+ )
895
+ assert tbl_record.id == tbl_id
731
896
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
897
+ version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
732
898
  schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
899
+
900
+ return schema.FullTableMd(tbl_md, version_md, schema_version_md)
901
+
902
+ def store_tbl_md(
903
+ self,
904
+ tbl_id: UUID,
905
+ tbl_md: Optional[schema.TableMd],
906
+ version_md: Optional[schema.TableVersionMd],
907
+ schema_version_md: Optional[schema.TableSchemaVersionMd],
908
+ ) -> None:
909
+ """
910
+ Stores metadata to the DB. If specified, `tbl_md` will be updated in place (only one such record can exist
911
+ per UUID); `version_md` and `schema_version_md` will be inserted as new records.
912
+
913
+ If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
914
+ """
915
+ conn = Env.get().conn
916
+
917
+ if tbl_md is not None:
918
+ result = conn.execute(
919
+ sql.update(schema.Table.__table__)
920
+ .values({schema.Table.md: dataclasses.asdict(tbl_md)})
921
+ .where(schema.Table.id == tbl_id)
922
+ )
923
+ assert result.rowcount == 1, result.rowcount
924
+
925
+ if version_md is not None:
926
+ conn.execute(
927
+ sql.insert(schema.TableVersion.__table__).values(
928
+ tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
929
+ )
930
+ )
931
+
932
+ if schema_version_md is not None:
933
+ conn.execute(
934
+ sql.insert(schema.TableSchemaVersion.__table__).values(
935
+ tbl_id=tbl_id,
936
+ schema_version=schema_version_md.schema_version,
937
+ md=dataclasses.asdict(schema_version_md),
938
+ )
939
+ )
940
+
941
+ def delete_tbl_md(self, tbl_id: UUID) -> None:
942
+ """
943
+ Deletes all table metadata from the store for the given table UUID.
944
+ """
945
+ conn = Env.get().conn
946
+ conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
947
+ conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
948
+ conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
949
+
950
+ def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
951
+ """
952
+ Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
953
+ TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
954
+ consistent) table state.
955
+ """
956
+ # TODO: First acquire X-locks for all relevant metadata entries
957
+
958
+ # Load metadata for every table in the TableVersionPath for `tbl`.
959
+ md = [self.load_tbl_md(tv.id, tv.effective_version) for tv in tbl._tbl_version_path.get_tbl_versions()]
960
+
961
+ # If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
962
+ # TableVersionPath. We need to prepend it separately.
963
+ if tbl._id != tbl._tbl_version.id:
964
+ snapshot_md = self.load_tbl_md(tbl._id, 0)
965
+ md = [snapshot_md, *md]
966
+
967
+ for ancestor_md in md[1:]:
968
+ # For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
969
+ # match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
970
+ # when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
971
+ # current_version and current_schema_version will always point to versions that are known to the
972
+ # destination catalog.
973
+ ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
974
+ ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
975
+
976
+ return md
977
+
978
+ def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
979
+ tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
733
980
  view_md = tbl_md.view_md
734
981
 
982
+ _logger.info(f'Loading table version: {tbl_id}:{effective_version}')
983
+ conn = Env.get().conn
984
+
735
985
  # load mutable view ids
736
986
  q = sql.select(schema.Table.id).where(
737
987
  sql.text(
@@ -745,7 +995,7 @@ class Catalog:
745
995
  if view_md is None:
746
996
  # this is a base table
747
997
  tbl_version = TableVersion(
748
- tbl_record.id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
998
+ tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
749
999
  )
750
1000
  return tbl_version
751
1001
 
@@ -762,7 +1012,7 @@ class Catalog:
762
1012
  base = base_path.tbl_version
763
1013
 
764
1014
  tbl_version = TableVersion(
765
- tbl_record.id,
1015
+ tbl_id,
766
1016
  tbl_md,
767
1017
  effective_version,
768
1018
  schema_version_md,
@@ -100,14 +100,14 @@ class IfNotExistsParam(enum.Enum):
100
100
  raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
101
101
 
102
102
 
103
- def is_valid_identifier(name: str) -> bool:
104
- return name.isidentifier() and not name.startswith('_')
103
+ def is_valid_identifier(name: str, allow_system_identifiers: bool = False) -> bool:
104
+ return name.isidentifier() and (allow_system_identifiers or not name.startswith('_'))
105
105
 
106
106
 
107
- def is_valid_path(path: str, empty_is_valid: bool) -> bool:
108
- if not path:
107
+ def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
108
+ if path == '':
109
109
  return empty_is_valid
110
- return all(is_valid_identifier(part) for part in path.split('.'))
110
+ return all(is_valid_identifier(part, allow_system_paths) for part in path.split('.'))
111
111
 
112
112
 
113
113
  def is_system_column_name(name: str) -> bool:
@@ -17,6 +17,7 @@ from .table_version_handle import TableVersionHandle
17
17
  from .table_version_path import TableVersionPath
18
18
 
19
19
  if TYPE_CHECKING:
20
+ from pixeltable import exprs
20
21
  from pixeltable.globals import TableDataSource
21
22
  from pixeltable.io.table_data_conduit import TableDataConduit
22
23
 
@@ -210,7 +211,7 @@ class InsertableTable(Table):
210
211
  msg = str(e)
211
212
  raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
212
213
 
213
- def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
214
+ def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
214
215
  """Delete rows in this table.
215
216
 
216
217
  Args:
@@ -227,3 +228,14 @@ class InsertableTable(Table):
227
228
  """
228
229
  with Env.get().begin_xact():
229
230
  return self._tbl_version.get().delete(where=where)
231
+
232
+ @property
233
+ def _base_table(self) -> Optional['Table']:
234
+ return None
235
+
236
+ @property
237
+ def _effective_base_versions(self) -> list[Optional[int]]:
238
+ return []
239
+
240
+ def _table_descriptor(self) -> str:
241
+ return f'Table {self._path()!r}'