pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -4,7 +4,7 @@ Core Pixeltable API for table operations, data processing, and UDF management.
4
4
 
5
5
  # ruff: noqa: F401
6
6
 
7
- from .__version__ import __version__, __version_tuple__
7
+ from ._version import __version__
8
8
  from .catalog import (
9
9
  Column,
10
10
  ColumnMetadata,
pixeltable/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__: str = '0.4.19'
@@ -7,7 +7,7 @@ import random
7
7
  import time
8
8
  from collections import defaultdict
9
9
  from contextlib import contextmanager
10
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
11
11
  from uuid import UUID
12
12
 
13
13
  import psycopg
@@ -37,15 +37,13 @@ from .view import View
37
37
  if TYPE_CHECKING:
38
38
  from pixeltable.plan import SampleClause
39
39
 
40
- from .. import DataFrame, exprs
40
+ from .. import exprs
41
41
 
42
42
 
43
43
  _logger = logging.getLogger('pixeltable')
44
44
 
45
45
 
46
- def _unpack_row(
47
- row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
48
- ) -> Optional[list[Any]]:
46
+ def _unpack_row(row: sql.engine.Row | None, entities: list[type[sql.orm.decl_api.DeclarativeBase]]) -> list[Any] | None:
49
47
  """Convert a Row result into a list of entity instances.
50
48
 
51
49
  Assumes that the query contains a select() of exactly those entities.
@@ -75,7 +73,7 @@ T = TypeVar('T')
75
73
 
76
74
 
77
75
  def retry_loop(
78
- *, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
76
+ *, tbl: TableVersionPath | None = None, for_write: bool, lock_mutable_tree: bool = False
79
77
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
80
78
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
81
79
  @functools.wraps(op)
@@ -159,13 +157,13 @@ class Catalog:
159
157
  - metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
160
158
  """
161
159
 
162
- _instance: Optional[Catalog] = None
160
+ _instance: Catalog | None = None
163
161
 
164
162
  # cached TableVersion instances; key: [id, version]
165
163
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
166
164
  # - snapshot versions: records the version of the snapshot
167
- _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
168
- _tbls: dict[tuple[UUID, Optional[int]], Table]
165
+ _tbl_versions: dict[tuple[UUID, int | None], TableVersion]
166
+ _tbls: dict[tuple[UUID, int | None], Table]
169
167
  _in_write_xact: bool # True if we're in a write transaction
170
168
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
169
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
@@ -179,7 +177,7 @@ class Catalog:
179
177
  _column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
180
178
 
181
179
  # column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
182
- _column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
180
+ _column_dependents: dict[QColumnId, set[QColumnId]] | None
183
181
 
184
182
  @classmethod
185
183
  def get(cls) -> Catalog:
@@ -258,8 +256,8 @@ class Catalog:
258
256
  def begin_xact(
259
257
  self,
260
258
  *,
261
- tbl: Optional[TableVersionPath] = None,
262
- tbl_id: Optional[UUID] = None,
259
+ tbl: TableVersionPath | None = None,
260
+ tbl_id: UUID | None = None,
263
261
  for_write: bool = False,
264
262
  lock_mutable_tree: bool = False,
265
263
  convert_db_excs: bool = True,
@@ -280,7 +278,7 @@ class Catalog:
280
278
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
281
279
  (SerializationFailure, LockNotAvailable)
282
280
  - for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
283
- to minimize the probability of loosing that work due to a forced abort
281
+ to minimize the probability of losing that work due to a forced abort
284
282
 
285
283
  If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
286
284
  """
@@ -303,7 +301,7 @@ class Catalog:
303
301
  # )
304
302
  # _logger.debug(f'begin_xact(): {tv_msg}')
305
303
  num_retries = 0
306
- pending_ops_tbl_id: Optional[UUID] = None
304
+ pending_ops_tbl_id: UUID | None = None
307
305
  has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
308
306
  while True:
309
307
  if pending_ops_tbl_id is not None:
@@ -322,7 +320,7 @@ class Catalog:
322
320
  with Env.get().begin_xact(for_write=for_write) as conn:
323
321
  if tbl is not None or tbl_id is not None:
324
322
  try:
325
- target: Optional[TableVersionHandle] = None
323
+ target: TableVersionHandle | None = None
326
324
  if tbl is not None:
327
325
  if self._acquire_path_locks(
328
326
  tbl=tbl,
@@ -433,7 +431,7 @@ class Catalog:
433
431
 
434
432
  The function should not raise exceptions; if it does, they are logged and ignored.
435
433
  """
436
- assert Env.get().in_xact
434
+ assert self.in_write_xact
437
435
  self._undo_actions.append(func)
438
436
  return func
439
437
 
@@ -451,7 +449,7 @@ class Catalog:
451
449
  if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
452
450
  # the table got dropped in the middle of the operation
453
451
  tbl_name = tbl.get().name
454
- _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
452
+ _logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
455
453
  raise excs.Error(f'Table was dropped: {tbl_name}') from None
456
454
  elif (
457
455
  isinstance(
@@ -490,7 +488,7 @@ class Catalog:
490
488
  tbl: TableVersionPath,
491
489
  for_write: bool = False,
492
490
  lock_mutable_tree: bool = False,
493
- check_pending_ops: Optional[bool] = None,
491
+ check_pending_ops: bool | None = None,
494
492
  ) -> bool:
495
493
  """
496
494
  Path locking protocol:
@@ -524,13 +522,13 @@ class Catalog:
524
522
  self,
525
523
  *,
526
524
  for_write: bool,
527
- tbl_id: Optional[UUID] = None,
528
- dir_id: Optional[UUID] = None,
529
- tbl_name: Optional[str] = None,
525
+ tbl_id: UUID | None = None,
526
+ dir_id: UUID | None = None,
527
+ tbl_name: str | None = None,
530
528
  lock_mutable_tree: bool = False,
531
529
  raise_if_not_exists: bool = True,
532
- check_pending_ops: Optional[bool] = None,
533
- ) -> Optional[TableVersionHandle]:
530
+ check_pending_ops: bool | None = None,
531
+ ) -> TableVersionHandle | None:
534
532
  """
535
533
  For writes: force acquisition of an X-lock on a Table record via a blind update.
536
534
 
@@ -594,7 +592,7 @@ class Catalog:
594
592
  while True:
595
593
  try:
596
594
  tbl_version: int
597
- op: Optional[TableOp] = None
595
+ op: TableOp | None = None
598
596
  delete_next_op_stmt: sql.Delete
599
597
  reset_has_pending_stmt: sql.Update
600
598
  with self.begin_xact(
@@ -613,7 +611,10 @@ class Catalog:
613
611
  row = conn.execute(q).one_or_none()
614
612
  if row is None:
615
613
  return
616
- tbl_version = row.md.get('current_version')
614
+ view_md = row.md.get('view_md')
615
+ is_snapshot = False if view_md is None else view_md.get('is_snapshot')
616
+ assert is_snapshot is not None
617
+ tbl_version = row.md.get('current_version') if is_snapshot else None
617
618
  op = schema.md_from_dict(TableOp, row.op)
618
619
  delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
619
620
  schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
@@ -720,7 +721,7 @@ class Catalog:
720
721
  return result
721
722
 
722
723
  def _acquire_dir_xlock(
723
- self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
724
+ self, *, parent_id: UUID | None = None, dir_id: UUID | None = None, dir_name: str | None = None
724
725
  ) -> None:
725
726
  """Force acquisition of an X-lock on a Dir record via a blind update.
726
727
 
@@ -760,9 +761,9 @@ class Catalog:
760
761
 
761
762
  @dataclasses.dataclass
762
763
  class DirEntry:
763
- dir: Optional[schema.Dir]
764
+ dir: schema.Dir | None
764
765
  dir_entries: dict[str, Catalog.DirEntry]
765
- table: Optional[schema.Table]
766
+ table: schema.Table | None
766
767
 
767
768
  @retry_loop(for_write=False)
768
769
  def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
@@ -792,30 +793,36 @@ class Catalog:
792
793
  return result
793
794
 
794
795
  @retry_loop(for_write=True)
795
- def move(self, path: Path, new_path: Path) -> None:
796
- self._move(path, new_path)
796
+ def move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
797
+ self._move(path, new_path, if_exists, if_not_exists)
797
798
 
798
- def _move(self, path: Path, new_path: Path) -> None:
799
- _, dest_dir, src_obj = self._prepare_dir_op(
799
+ def _move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
800
+ dest_obj, dest_dir, src_obj = self._prepare_dir_op(
800
801
  add_dir_path=new_path.parent,
801
802
  add_name=new_path.name,
802
803
  drop_dir_path=path.parent,
803
804
  drop_name=path.name,
804
- raise_if_exists=True,
805
- raise_if_not_exists=True,
805
+ raise_if_exists=(if_exists == IfExistsParam.ERROR),
806
+ raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR),
806
807
  )
807
- src_obj._move(new_path.name, dest_dir._id)
808
+ assert dest_obj is None or if_exists == IfExistsParam.IGNORE
809
+ assert src_obj is not None or if_not_exists == IfNotExistsParam.IGNORE
810
+ if dest_obj is None and src_obj is not None:
811
+ # If dest_obj is not None, it means `if_exists='ignore'` and the destination already exists.
812
+ # If src_obj is None, it means `if_not_exists='ignore'` and the source doesn't exist.
813
+ # If dest_obj is None and src_obj is not None, then we can proceed with the move.
814
+ src_obj._move(new_path.name, dest_dir._id)
808
815
 
809
816
  def _prepare_dir_op(
810
817
  self,
811
- add_dir_path: Optional[Path] = None,
812
- add_name: Optional[str] = None,
813
- drop_dir_path: Optional[Path] = None,
814
- drop_name: Optional[str] = None,
815
- drop_expected: Optional[type[SchemaObject]] = None,
818
+ add_dir_path: Path | None = None,
819
+ add_name: str | None = None,
820
+ drop_dir_path: Path | None = None,
821
+ drop_name: str | None = None,
822
+ drop_expected: type[SchemaObject] | None = None,
816
823
  raise_if_exists: bool = False,
817
824
  raise_if_not_exists: bool = False,
818
- ) -> tuple[Optional[SchemaObject], Optional[SchemaObject], Optional[SchemaObject]]:
825
+ ) -> tuple[SchemaObject | None, Dir | None, SchemaObject | None]:
819
826
  """
820
827
  Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
821
828
  directory entry.
@@ -842,25 +849,29 @@ class Catalog:
842
849
  if drop_dir_path is not None:
843
850
  dir_paths.add(drop_dir_path)
844
851
 
845
- add_dir: Optional[schema.Dir] = None
846
- drop_dir: Optional[schema.Dir] = None
852
+ add_dir: schema.Dir | None = None
853
+ drop_dir: schema.Dir | None = None
847
854
  for p in sorted(dir_paths):
848
855
  dir = self._get_dir(p, lock_dir=True)
849
856
  if dir is None:
850
- raise excs.Error(f'Directory {p!r} does not exist.')
857
+ # Dir does not exist; raise an appropriate error.
858
+ if add_dir_path is not None or add_name is not None:
859
+ raise excs.Error(f'Directory {p!r} does not exist. Create it first with:\npxt.create_dir({p!r})')
860
+ else:
861
+ raise excs.Error(f'Directory {p!r} does not exist.')
851
862
  if p == add_dir_path:
852
863
  add_dir = dir
853
864
  if p == drop_dir_path:
854
865
  drop_dir = dir
855
866
 
856
- add_obj: Optional[SchemaObject] = None
867
+ add_obj: SchemaObject | None = None
857
868
  if add_dir is not None:
858
869
  add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
859
870
  if add_obj is not None and raise_if_exists:
860
871
  add_path = add_dir_path.append(add_name)
861
872
  raise excs.Error(f'Path {add_path!r} already exists.')
862
873
 
863
- drop_obj: Optional[SchemaObject] = None
874
+ drop_obj: SchemaObject | None = None
864
875
  if drop_dir is not None:
865
876
  drop_path = drop_dir_path.append(drop_name)
866
877
  drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
@@ -874,8 +885,8 @@ class Catalog:
874
885
  return add_obj, add_dir_obj, drop_obj
875
886
 
876
887
  def _get_dir_entry(
877
- self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
878
- ) -> Optional[SchemaObject]:
888
+ self, dir_id: UUID, name: str, version: int | None = None, lock_entry: bool = False
889
+ ) -> SchemaObject | None:
879
890
  user = Env.get().user
880
891
  conn = Env.get().conn
881
892
 
@@ -902,21 +913,22 @@ class Catalog:
902
913
  schema.Table.md['name'].astext == name,
903
914
  schema.Table.md['user'].astext == user,
904
915
  )
905
- tbl_id = conn.execute(q).scalar_one_or_none()
906
- if tbl_id is not None:
907
- return self.get_table_by_id(tbl_id, version)
916
+ tbl_id = conn.execute(q).scalars().all()
917
+ assert len(tbl_id) <= 1, name
918
+ if len(tbl_id) == 1:
919
+ return self.get_table_by_id(tbl_id[0], version)
908
920
 
909
921
  return None
910
922
 
911
923
  def _get_schema_object(
912
924
  self,
913
925
  path: Path,
914
- expected: Optional[type[SchemaObject]] = None,
926
+ expected: type[SchemaObject] | None = None,
915
927
  raise_if_exists: bool = False,
916
928
  raise_if_not_exists: bool = False,
917
929
  lock_parent: bool = False,
918
930
  lock_obj: bool = False,
919
- ) -> Optional[SchemaObject]:
931
+ ) -> SchemaObject | None:
920
932
  """Return the schema object at the given path, or None if it doesn't exist.
921
933
 
922
934
  Raises Error if
@@ -951,7 +963,7 @@ class Catalog:
951
963
  raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
952
964
  return obj
953
965
 
954
- def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
966
+ def get_table_by_id(self, tbl_id: UUID, version: int | None = None) -> Table | None:
955
967
  """Must be executed inside a transaction. Might raise PendingTableOpsError."""
956
968
  if (tbl_id, version) not in self._tbls:
957
969
  if version is None:
@@ -960,17 +972,16 @@ class Catalog:
960
972
  return self._load_tbl_at_version(tbl_id, version)
961
973
  return self._tbls.get((tbl_id, version))
962
974
 
963
- @retry_loop(for_write=True)
964
975
  def create_table(
965
976
  self,
966
977
  path: Path,
967
978
  schema: dict[str, Any],
968
- df: 'DataFrame',
969
979
  if_exists: IfExistsParam,
970
- primary_key: Optional[list[str]],
980
+ primary_key: list[str] | None,
971
981
  num_retained_versions: int,
972
982
  comment: str,
973
983
  media_validation: MediaValidation,
984
+ create_default_idxs: bool,
974
985
  ) -> tuple[Table, bool]:
975
986
  """
976
987
  Creates a new InsertableTable at the given path.
@@ -979,37 +990,49 @@ class Catalog:
979
990
 
980
991
  Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
981
992
  """
982
- existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
983
- if existing is not None:
984
- assert isinstance(existing, Table)
985
- return existing, False
986
993
 
987
- dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
988
- assert dir is not None
994
+ @retry_loop(for_write=True)
995
+ def create_fn() -> tuple[UUID, bool]:
996
+ existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
997
+ if existing is not None:
998
+ assert isinstance(existing, Table)
999
+ return existing._id, False
989
1000
 
990
- tbl = InsertableTable._create(
991
- dir._id,
992
- path.name,
993
- schema,
994
- df,
995
- primary_key=primary_key,
996
- num_retained_versions=num_retained_versions,
997
- comment=comment,
998
- media_validation=media_validation,
999
- )
1000
- self._tbls[tbl._id, None] = tbl
1001
- return tbl, True
1001
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
1002
+ assert dir is not None
1003
+
1004
+ md, ops = InsertableTable._create(
1005
+ path.name,
1006
+ schema,
1007
+ primary_key=primary_key,
1008
+ num_retained_versions=num_retained_versions,
1009
+ comment=comment,
1010
+ media_validation=media_validation,
1011
+ create_default_idxs=create_default_idxs,
1012
+ )
1013
+ tbl_id = UUID(md.tbl_md.tbl_id)
1014
+ self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
1015
+ return tbl_id, True
1016
+
1017
+ tbl_id, is_created = create_fn()
1018
+ # finalize pending ops
1019
+ with self.begin_xact(tbl_id=tbl_id, for_write=True, finalize_pending_ops=True):
1020
+ tbl = self.get_table_by_id(tbl_id)
1021
+ _logger.info(f'Created table {tbl._name!r}, id={tbl._id}')
1022
+ Env.get().console_logger.info(f'Created table {tbl._name!r}.')
1023
+ return tbl, is_created
1002
1024
 
1003
1025
  def create_view(
1004
1026
  self,
1005
1027
  path: Path,
1006
1028
  base: TableVersionPath,
1007
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
1008
- where: Optional[exprs.Expr],
1009
- sample_clause: Optional['SampleClause'],
1010
- additional_columns: Optional[dict[str, Any]],
1029
+ select_list: list[tuple[exprs.Expr, str | None]] | None,
1030
+ where: exprs.Expr | None,
1031
+ sample_clause: 'SampleClause' | None,
1032
+ additional_columns: dict[str, Any] | None,
1011
1033
  is_snapshot: bool,
1012
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
1034
+ create_default_idxs: bool,
1035
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None,
1013
1036
  num_retained_versions: int,
1014
1037
  comment: str,
1015
1038
  media_validation: MediaValidation,
@@ -1050,6 +1073,7 @@ class Catalog:
1050
1073
  predicate=where,
1051
1074
  sample_clause=sample_clause,
1052
1075
  is_snapshot=is_snapshot,
1076
+ create_default_idxs=create_default_idxs,
1053
1077
  iterator_cls=iterator_class,
1054
1078
  iterator_args=iterator_args,
1055
1079
  num_retained_versions=num_retained_versions,
@@ -1072,7 +1096,7 @@ class Catalog:
1072
1096
  with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
1073
1097
  return self.get_table_by_id(view_id)
1074
1098
 
1075
- def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
1099
+ def _clear_tv_cache(self, tbl_id: UUID, effective_version: int | None) -> None:
1076
1100
  if (tbl_id, effective_version) in self._tbl_versions:
1077
1101
  tv = self._tbl_versions[tbl_id, effective_version]
1078
1102
  tv.is_validated = False
@@ -1084,7 +1108,7 @@ class Catalog:
1084
1108
  The metadata should be presented in standard "ancestor order", with the table being replicated at
1085
1109
  list position 0 and the (root) base table at list position -1.
1086
1110
  """
1087
- assert Env.get().in_xact
1111
+ assert self.in_write_xact
1088
1112
 
1089
1113
  tbl_id = UUID(md[0].tbl_md.tbl_id)
1090
1114
 
@@ -1150,11 +1174,11 @@ class Catalog:
1150
1174
  # We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
1151
1175
  # order to be instantiated as a schema object.
1152
1176
  existing = self.get_table_by_id(tbl_id)
1153
- if existing is not None:
1154
- existing_path = Path.parse(existing._path(), allow_system_path=True)
1155
- if existing_path != path:
1156
- assert existing_path.is_system_path
1157
- self._move(existing_path, path)
1177
+ assert existing is not None
1178
+ existing_path = Path.parse(existing._path(), allow_system_path=True)
1179
+ if existing_path != path:
1180
+ assert existing_path.is_system_path
1181
+ self._move(existing_path, path, IfExistsParam.ERROR, IfNotExistsParam.ERROR)
1158
1182
 
1159
1183
  def __ensure_system_dir_exists(self) -> Dir:
1160
1184
  system_path = Path.parse('_system', allow_system_path=True)
@@ -1169,9 +1193,9 @@ class Catalog:
1169
1193
  conn = Env.get().conn
1170
1194
  tbl_id = md.tbl_md.tbl_id
1171
1195
 
1172
- new_tbl_md: Optional[schema.TableMd] = None
1173
- new_version_md: Optional[schema.TableVersionMd] = None
1174
- new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
1196
+ new_tbl_md: schema.TableMd | None = None
1197
+ new_version_md: schema.TableVersionMd | None = None
1198
+ new_schema_version_md: schema.TableSchemaVersionMd | None = None
1175
1199
  is_new_tbl_version: bool = False
1176
1200
 
1177
1201
  # We need to ensure that the table metadata in the catalog always reflects the latest observed version of
@@ -1345,11 +1369,11 @@ class Catalog:
1345
1369
  msg: str
1346
1370
  if is_replace:
1347
1371
  msg = (
1348
- f'{tbl._display_name()} {tbl._path()!r} already exists and has dependents. '
1372
+ f'{tbl._display_str()} already exists and has dependents. '
1349
1373
  "Use `if_exists='replace_force'` to replace it."
1350
1374
  )
1351
1375
  else:
1352
- msg = f'{tbl._display_name()} {tbl._path()!r} has dependents.'
1376
+ msg = f'{tbl._display_str()} has dependents.'
1353
1377
  raise excs.Error(msg)
1354
1378
 
1355
1379
  # if this is a mutable view of a mutable base, advance the base's view_sn
@@ -1424,7 +1448,7 @@ class Catalog:
1424
1448
 
1425
1449
  if parents:
1426
1450
  # start walking down from the root
1427
- last_parent: Optional[SchemaObject] = None
1451
+ last_parent: SchemaObject | None = None
1428
1452
  for ancestor in path.ancestors():
1429
1453
  ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
1430
1454
  assert ancestor_obj is not None or last_parent is not None
@@ -1500,10 +1524,10 @@ class Catalog:
1500
1524
  def get_tbl_version(
1501
1525
  self,
1502
1526
  tbl_id: UUID,
1503
- effective_version: Optional[int],
1504
- check_pending_ops: Optional[bool] = None,
1527
+ effective_version: int | None,
1528
+ check_pending_ops: bool | None = None,
1505
1529
  validate_initialized: bool = False,
1506
- ) -> Optional[TableVersion]:
1530
+ ) -> TableVersion | None:
1507
1531
  """
1508
1532
  Returns the TableVersion instance for the given table and version and updates the cache.
1509
1533
 
@@ -1552,7 +1576,7 @@ class Catalog:
1552
1576
  assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
1553
1577
  del self._tbl_versions[tbl_version.id, tbl_version.effective_version]
1554
1578
 
1555
- def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
1579
+ def get_dir(self, dir_id: UUID, for_update: bool = False) -> Dir | None:
1556
1580
  """Return the Dir with the given id, or None if it doesn't exist"""
1557
1581
  conn = Env.get().conn
1558
1582
  if for_update:
@@ -1564,7 +1588,7 @@ class Catalog:
1564
1588
  dir_record = schema.Dir(**row._mapping)
1565
1589
  return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
1566
1590
 
1567
- def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
1591
+ def _get_dir(self, path: Path, lock_dir: bool = False) -> schema.Dir | None:
1568
1592
  """
1569
1593
  lock_dir: if True, X-locks target (but not the ancestors)
1570
1594
  """
@@ -1590,7 +1614,7 @@ class Catalog:
1590
1614
  row = conn.execute(q).one_or_none()
1591
1615
  return schema.Dir(**row._mapping) if row is not None else None
1592
1616
 
1593
- def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
1617
+ def _load_tbl(self, tbl_id: UUID) -> Table | None:
1594
1618
  """Loads metadata for the table with the given id and caches it."""
1595
1619
  _logger.info(f'Loading table {tbl_id}')
1596
1620
  from .insertable_table import InsertableTable
@@ -1650,7 +1674,7 @@ class Catalog:
1650
1674
  return tbl
1651
1675
 
1652
1676
  # this is a view; determine the sequence of TableVersions to load
1653
- tbl_version_path: list[tuple[UUID, Optional[int]]] = []
1677
+ tbl_version_path: list[tuple[UUID, int | None]] = []
1654
1678
  if tbl_md.is_pure_snapshot:
1655
1679
  # this is a pure snapshot, without a physical table backing it; we only need the bases
1656
1680
  pass
@@ -1663,8 +1687,8 @@ class Catalog:
1663
1687
  tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1664
1688
 
1665
1689
  # load TableVersions, starting at the root
1666
- base_path: Optional[TableVersionPath] = None
1667
- view_path: Optional[TableVersionPath] = None
1690
+ base_path: TableVersionPath | None = None
1691
+ view_path: TableVersionPath | None = None
1668
1692
  for id, effective_version in tbl_version_path[::-1]:
1669
1693
  if (id, effective_version) not in self._tbl_versions:
1670
1694
  _ = self._load_tbl_version(id, effective_version)
@@ -1674,7 +1698,7 @@ class Catalog:
1674
1698
  self._tbls[tbl_id, None] = view
1675
1699
  return view
1676
1700
 
1677
- def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Optional[Table]:
1701
+ def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Table | None:
1678
1702
  from .view import View
1679
1703
 
1680
1704
  # Load the specified TableMd and TableVersionMd records from the db.
@@ -1701,7 +1725,7 @@ class Catalog:
1701
1725
  # Build the list of ancestor versions, starting with the given table and traversing back to the base table.
1702
1726
  # For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
1703
1727
  # given TableVersion's created_at timestamp.
1704
- ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
1728
+ ancestors: list[tuple[UUID, int | None]] = [(tbl_id, version)]
1705
1729
  if tbl_md.view_md is not None:
1706
1730
  for ancestor_id, _ in tbl_md.view_md.base_versions:
1707
1731
  q = (
@@ -1728,7 +1752,7 @@ class Catalog:
1728
1752
  _ = self._load_tbl_version(anc_id, anc_version)
1729
1753
 
1730
1754
  # Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
1731
- tvp: Optional[TableVersionPath] = None
1755
+ tvp: TableVersionPath | None = None
1732
1756
  for anc_id, anc_version in ancestors[::-1]:
1733
1757
  tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
1734
1758
 
@@ -1737,10 +1761,10 @@ class Catalog:
1737
1761
  return view
1738
1762
 
1739
1763
  @retry_loop(for_write=False)
1740
- def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1764
+ def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1741
1765
  return self._collect_tbl_history(tbl_id, n)
1742
1766
 
1743
- def _collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1767
+ def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1744
1768
  """
1745
1769
  Returns the history of up to n versions of the table with the given UUID.
1746
1770
 
@@ -1776,7 +1800,7 @@ class Catalog:
1776
1800
  for row in src_rows
1777
1801
  ]
1778
1802
 
1779
- def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
1803
+ def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> schema.FullTableMd:
1780
1804
  """
1781
1805
  Loads metadata from the store for a given table UUID and version.
1782
1806
  """
@@ -1832,11 +1856,11 @@ class Catalog:
1832
1856
  def store_tbl_md(
1833
1857
  self,
1834
1858
  tbl_id: UUID,
1835
- dir_id: Optional[UUID],
1836
- tbl_md: Optional[schema.TableMd],
1837
- version_md: Optional[schema.TableVersionMd],
1838
- schema_version_md: Optional[schema.TableSchemaVersionMd],
1839
- pending_ops: Optional[list[TableOp]] = None,
1859
+ dir_id: UUID | None,
1860
+ tbl_md: schema.TableMd | None,
1861
+ version_md: schema.TableVersionMd | None,
1862
+ schema_version_md: schema.TableSchemaVersionMd | None,
1863
+ pending_ops: list[TableOp] | None = None,
1840
1864
  ) -> None:
1841
1865
  """
1842
1866
  Stores metadata to the DB.
@@ -1878,6 +1902,7 @@ class Catalog:
1878
1902
  .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1879
1903
  .where(schema.Table.id == tbl_id)
1880
1904
  )
1905
+ assert isinstance(result, sql.CursorResult)
1881
1906
  assert result.rowcount == 1, result.rowcount
1882
1907
 
1883
1908
  # Construct and insert new table version record if requested.
@@ -1907,6 +1932,7 @@ class Catalog:
1907
1932
  .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
1908
1933
  .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
1909
1934
  )
1935
+ assert isinstance(result, sql.CursorResult)
1910
1936
  assert result.rowcount == 1, result.rowcount
1911
1937
 
1912
1938
  # Construct and insert a new schema version record if requested.
@@ -1988,8 +2014,8 @@ class Catalog:
1988
2014
  return md
1989
2015
 
1990
2016
  def _load_tbl_version(
1991
- self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
1992
- ) -> Optional[TableVersion]:
2017
+ self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
2018
+ ) -> TableVersion | None:
1993
2019
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
1994
2020
  tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1995
2021
  view_md = tbl_md.view_md
@@ -2045,7 +2071,7 @@ class Catalog:
2045
2071
  assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
2046
2072
 
2047
2073
  base: TableVersionHandle
2048
- base_path: Optional[TableVersionPath] = None # needed for live view
2074
+ base_path: TableVersionPath | None = None # needed for live view
2049
2075
  if view_md.is_snapshot:
2050
2076
  base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
2051
2077
  else:
@@ -2076,7 +2102,7 @@ class Catalog:
2076
2102
  self.create_user(None)
2077
2103
  _logger.info('Initialized catalog.')
2078
2104
 
2079
- def create_user(self, user: Optional[str]) -> None:
2105
+ def create_user(self, user: str | None) -> None:
2080
2106
  """
2081
2107
  Creates a catalog record (root directory) for the specified user, if one does not already exist.
2082
2108
  """
@@ -2095,7 +2121,7 @@ class Catalog:
2095
2121
 
2096
2122
  def _handle_path_collision(
2097
2123
  self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
2098
- ) -> Optional[SchemaObject]:
2124
+ ) -> SchemaObject | None:
2099
2125
  obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
2100
2126
 
2101
2127
  if if_exists == IfExistsParam.ERROR and obj is not None: