pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -4,7 +4,7 @@ Core Pixeltable API for table operations, data processing, and UDF management.
4
4
 
5
5
  # ruff: noqa: F401
6
6
 
7
- from .__version__ import __version__, __version_tuple__
7
+ from ._version import __version__
8
8
  from .catalog import (
9
9
  Column,
10
10
  ColumnMetadata,
pixeltable/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__: str = '0.4.19'
@@ -7,7 +7,7 @@ import random
7
7
  import time
8
8
  from collections import defaultdict
9
9
  from contextlib import contextmanager
10
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
11
11
  from uuid import UUID
12
12
 
13
13
  import psycopg
@@ -37,15 +37,13 @@ from .view import View
37
37
  if TYPE_CHECKING:
38
38
  from pixeltable.plan import SampleClause
39
39
 
40
- from .. import DataFrame, exprs
40
+ from .. import exprs
41
41
 
42
42
 
43
43
  _logger = logging.getLogger('pixeltable')
44
44
 
45
45
 
46
- def _unpack_row(
47
- row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
48
- ) -> Optional[list[Any]]:
46
+ def _unpack_row(row: sql.engine.Row | None, entities: list[type[sql.orm.decl_api.DeclarativeBase]]) -> list[Any] | None:
49
47
  """Convert a Row result into a list of entity instances.
50
48
 
51
49
  Assumes that the query contains a select() of exactly those entities.
@@ -75,7 +73,7 @@ T = TypeVar('T')
75
73
 
76
74
 
77
75
  def retry_loop(
78
- *, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
76
+ *, tbl: TableVersionPath | None = None, for_write: bool, lock_mutable_tree: bool = False
79
77
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
80
78
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
81
79
  @functools.wraps(op)
@@ -159,13 +157,13 @@ class Catalog:
159
157
  - metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
160
158
  """
161
159
 
162
- _instance: Optional[Catalog] = None
160
+ _instance: Catalog | None = None
163
161
 
164
162
  # cached TableVersion instances; key: [id, version]
165
163
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
166
164
  # - snapshot versions: records the version of the snapshot
167
- _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
168
- _tbls: dict[tuple[UUID, Optional[int]], Table]
165
+ _tbl_versions: dict[tuple[UUID, int | None], TableVersion]
166
+ _tbls: dict[tuple[UUID, int | None], Table]
169
167
  _in_write_xact: bool # True if we're in a write transaction
170
168
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
169
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
@@ -179,7 +177,7 @@ class Catalog:
179
177
  _column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
180
178
 
181
179
  # column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
182
- _column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
180
+ _column_dependents: dict[QColumnId, set[QColumnId]] | None
183
181
 
184
182
  @classmethod
185
183
  def get(cls) -> Catalog:
@@ -258,8 +256,8 @@ class Catalog:
258
256
  def begin_xact(
259
257
  self,
260
258
  *,
261
- tbl: Optional[TableVersionPath] = None,
262
- tbl_id: Optional[UUID] = None,
259
+ tbl: TableVersionPath | None = None,
260
+ tbl_id: UUID | None = None,
263
261
  for_write: bool = False,
264
262
  lock_mutable_tree: bool = False,
265
263
  convert_db_excs: bool = True,
@@ -303,7 +301,7 @@ class Catalog:
303
301
  # )
304
302
  # _logger.debug(f'begin_xact(): {tv_msg}')
305
303
  num_retries = 0
306
- pending_ops_tbl_id: Optional[UUID] = None
304
+ pending_ops_tbl_id: UUID | None = None
307
305
  has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
308
306
  while True:
309
307
  if pending_ops_tbl_id is not None:
@@ -322,7 +320,7 @@ class Catalog:
322
320
  with Env.get().begin_xact(for_write=for_write) as conn:
323
321
  if tbl is not None or tbl_id is not None:
324
322
  try:
325
- target: Optional[TableVersionHandle] = None
323
+ target: TableVersionHandle | None = None
326
324
  if tbl is not None:
327
325
  if self._acquire_path_locks(
328
326
  tbl=tbl,
@@ -451,7 +449,7 @@ class Catalog:
451
449
  if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
452
450
  # the table got dropped in the middle of the operation
453
451
  tbl_name = tbl.get().name
454
- _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
452
+ _logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
455
453
  raise excs.Error(f'Table was dropped: {tbl_name}') from None
456
454
  elif (
457
455
  isinstance(
@@ -490,7 +488,7 @@ class Catalog:
490
488
  tbl: TableVersionPath,
491
489
  for_write: bool = False,
492
490
  lock_mutable_tree: bool = False,
493
- check_pending_ops: Optional[bool] = None,
491
+ check_pending_ops: bool | None = None,
494
492
  ) -> bool:
495
493
  """
496
494
  Path locking protocol:
@@ -524,13 +522,13 @@ class Catalog:
524
522
  self,
525
523
  *,
526
524
  for_write: bool,
527
- tbl_id: Optional[UUID] = None,
528
- dir_id: Optional[UUID] = None,
529
- tbl_name: Optional[str] = None,
525
+ tbl_id: UUID | None = None,
526
+ dir_id: UUID | None = None,
527
+ tbl_name: str | None = None,
530
528
  lock_mutable_tree: bool = False,
531
529
  raise_if_not_exists: bool = True,
532
- check_pending_ops: Optional[bool] = None,
533
- ) -> Optional[TableVersionHandle]:
530
+ check_pending_ops: bool | None = None,
531
+ ) -> TableVersionHandle | None:
534
532
  """
535
533
  For writes: force acquisition of an X-lock on a Table record via a blind update.
536
534
 
@@ -594,7 +592,7 @@ class Catalog:
594
592
  while True:
595
593
  try:
596
594
  tbl_version: int
597
- op: Optional[TableOp] = None
595
+ op: TableOp | None = None
598
596
  delete_next_op_stmt: sql.Delete
599
597
  reset_has_pending_stmt: sql.Update
600
598
  with self.begin_xact(
@@ -613,7 +611,10 @@ class Catalog:
613
611
  row = conn.execute(q).one_or_none()
614
612
  if row is None:
615
613
  return
616
- tbl_version = row.md.get('current_version')
614
+ view_md = row.md.get('view_md')
615
+ is_snapshot = False if view_md is None else view_md.get('is_snapshot')
616
+ assert is_snapshot is not None
617
+ tbl_version = row.md.get('current_version') if is_snapshot else None
617
618
  op = schema.md_from_dict(TableOp, row.op)
618
619
  delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
619
620
  schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
@@ -720,7 +721,7 @@ class Catalog:
720
721
  return result
721
722
 
722
723
  def _acquire_dir_xlock(
723
- self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
724
+ self, *, parent_id: UUID | None = None, dir_id: UUID | None = None, dir_name: str | None = None
724
725
  ) -> None:
725
726
  """Force acquisition of an X-lock on a Dir record via a blind update.
726
727
 
@@ -760,9 +761,9 @@ class Catalog:
760
761
 
761
762
  @dataclasses.dataclass
762
763
  class DirEntry:
763
- dir: Optional[schema.Dir]
764
+ dir: schema.Dir | None
764
765
  dir_entries: dict[str, Catalog.DirEntry]
765
- table: Optional[schema.Table]
766
+ table: schema.Table | None
766
767
 
767
768
  @retry_loop(for_write=False)
768
769
  def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
@@ -814,14 +815,14 @@ class Catalog:
814
815
 
815
816
  def _prepare_dir_op(
816
817
  self,
817
- add_dir_path: Optional[Path] = None,
818
- add_name: Optional[str] = None,
819
- drop_dir_path: Optional[Path] = None,
820
- drop_name: Optional[str] = None,
821
- drop_expected: Optional[type[SchemaObject]] = None,
818
+ add_dir_path: Path | None = None,
819
+ add_name: str | None = None,
820
+ drop_dir_path: Path | None = None,
821
+ drop_name: str | None = None,
822
+ drop_expected: type[SchemaObject] | None = None,
822
823
  raise_if_exists: bool = False,
823
824
  raise_if_not_exists: bool = False,
824
- ) -> tuple[Optional[SchemaObject], Optional[Dir], Optional[SchemaObject]]:
825
+ ) -> tuple[SchemaObject | None, Dir | None, SchemaObject | None]:
825
826
  """
826
827
  Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
827
828
  directory entry.
@@ -848,25 +849,29 @@ class Catalog:
848
849
  if drop_dir_path is not None:
849
850
  dir_paths.add(drop_dir_path)
850
851
 
851
- add_dir: Optional[schema.Dir] = None
852
- drop_dir: Optional[schema.Dir] = None
852
+ add_dir: schema.Dir | None = None
853
+ drop_dir: schema.Dir | None = None
853
854
  for p in sorted(dir_paths):
854
855
  dir = self._get_dir(p, lock_dir=True)
855
856
  if dir is None:
856
- raise excs.Error(f'Directory {p!r} does not exist.')
857
+ # Dir does not exist; raise an appropriate error.
858
+ if add_dir_path is not None or add_name is not None:
859
+ raise excs.Error(f'Directory {p!r} does not exist. Create it first with:\npxt.create_dir({p!r})')
860
+ else:
861
+ raise excs.Error(f'Directory {p!r} does not exist.')
857
862
  if p == add_dir_path:
858
863
  add_dir = dir
859
864
  if p == drop_dir_path:
860
865
  drop_dir = dir
861
866
 
862
- add_obj: Optional[SchemaObject] = None
867
+ add_obj: SchemaObject | None = None
863
868
  if add_dir is not None:
864
869
  add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
865
870
  if add_obj is not None and raise_if_exists:
866
871
  add_path = add_dir_path.append(add_name)
867
872
  raise excs.Error(f'Path {add_path!r} already exists.')
868
873
 
869
- drop_obj: Optional[SchemaObject] = None
874
+ drop_obj: SchemaObject | None = None
870
875
  if drop_dir is not None:
871
876
  drop_path = drop_dir_path.append(drop_name)
872
877
  drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
@@ -880,8 +885,8 @@ class Catalog:
880
885
  return add_obj, add_dir_obj, drop_obj
881
886
 
882
887
  def _get_dir_entry(
883
- self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
884
- ) -> Optional[SchemaObject]:
888
+ self, dir_id: UUID, name: str, version: int | None = None, lock_entry: bool = False
889
+ ) -> SchemaObject | None:
885
890
  user = Env.get().user
886
891
  conn = Env.get().conn
887
892
 
@@ -918,12 +923,12 @@ class Catalog:
918
923
  def _get_schema_object(
919
924
  self,
920
925
  path: Path,
921
- expected: Optional[type[SchemaObject]] = None,
926
+ expected: type[SchemaObject] | None = None,
922
927
  raise_if_exists: bool = False,
923
928
  raise_if_not_exists: bool = False,
924
929
  lock_parent: bool = False,
925
930
  lock_obj: bool = False,
926
- ) -> Optional[SchemaObject]:
931
+ ) -> SchemaObject | None:
927
932
  """Return the schema object at the given path, or None if it doesn't exist.
928
933
 
929
934
  Raises Error if
@@ -958,7 +963,7 @@ class Catalog:
958
963
  raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
959
964
  return obj
960
965
 
961
- def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
966
+ def get_table_by_id(self, tbl_id: UUID, version: int | None = None) -> Table | None:
962
967
  """Must be executed inside a transaction. Might raise PendingTableOpsError."""
963
968
  if (tbl_id, version) not in self._tbls:
964
969
  if version is None:
@@ -967,17 +972,16 @@ class Catalog:
967
972
  return self._load_tbl_at_version(tbl_id, version)
968
973
  return self._tbls.get((tbl_id, version))
969
974
 
970
- @retry_loop(for_write=True)
971
975
  def create_table(
972
976
  self,
973
977
  path: Path,
974
978
  schema: dict[str, Any],
975
- df: 'DataFrame',
976
979
  if_exists: IfExistsParam,
977
- primary_key: Optional[list[str]],
980
+ primary_key: list[str] | None,
978
981
  num_retained_versions: int,
979
982
  comment: str,
980
983
  media_validation: MediaValidation,
984
+ create_default_idxs: bool,
981
985
  ) -> tuple[Table, bool]:
982
986
  """
983
987
  Creates a new InsertableTable at the given path.
@@ -986,37 +990,49 @@ class Catalog:
986
990
 
987
991
  Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
988
992
  """
989
- existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
990
- if existing is not None:
991
- assert isinstance(existing, Table)
992
- return existing, False
993
993
 
994
- dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
995
- assert dir is not None
994
+ @retry_loop(for_write=True)
995
+ def create_fn() -> tuple[UUID, bool]:
996
+ existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
997
+ if existing is not None:
998
+ assert isinstance(existing, Table)
999
+ return existing._id, False
996
1000
 
997
- tbl = InsertableTable._create(
998
- dir._id,
999
- path.name,
1000
- schema,
1001
- df,
1002
- primary_key=primary_key,
1003
- num_retained_versions=num_retained_versions,
1004
- comment=comment,
1005
- media_validation=media_validation,
1006
- )
1007
- self._tbls[tbl._id, None] = tbl
1008
- return tbl, True
1001
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
1002
+ assert dir is not None
1003
+
1004
+ md, ops = InsertableTable._create(
1005
+ path.name,
1006
+ schema,
1007
+ primary_key=primary_key,
1008
+ num_retained_versions=num_retained_versions,
1009
+ comment=comment,
1010
+ media_validation=media_validation,
1011
+ create_default_idxs=create_default_idxs,
1012
+ )
1013
+ tbl_id = UUID(md.tbl_md.tbl_id)
1014
+ self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
1015
+ return tbl_id, True
1016
+
1017
+ tbl_id, is_created = create_fn()
1018
+ # finalize pending ops
1019
+ with self.begin_xact(tbl_id=tbl_id, for_write=True, finalize_pending_ops=True):
1020
+ tbl = self.get_table_by_id(tbl_id)
1021
+ _logger.info(f'Created table {tbl._name!r}, id={tbl._id}')
1022
+ Env.get().console_logger.info(f'Created table {tbl._name!r}.')
1023
+ return tbl, is_created
1009
1024
 
1010
1025
  def create_view(
1011
1026
  self,
1012
1027
  path: Path,
1013
1028
  base: TableVersionPath,
1014
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
1015
- where: Optional[exprs.Expr],
1016
- sample_clause: Optional['SampleClause'],
1017
- additional_columns: Optional[dict[str, Any]],
1029
+ select_list: list[tuple[exprs.Expr, str | None]] | None,
1030
+ where: exprs.Expr | None,
1031
+ sample_clause: 'SampleClause' | None,
1032
+ additional_columns: dict[str, Any] | None,
1018
1033
  is_snapshot: bool,
1019
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
1034
+ create_default_idxs: bool,
1035
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None,
1020
1036
  num_retained_versions: int,
1021
1037
  comment: str,
1022
1038
  media_validation: MediaValidation,
@@ -1057,6 +1073,7 @@ class Catalog:
1057
1073
  predicate=where,
1058
1074
  sample_clause=sample_clause,
1059
1075
  is_snapshot=is_snapshot,
1076
+ create_default_idxs=create_default_idxs,
1060
1077
  iterator_cls=iterator_class,
1061
1078
  iterator_args=iterator_args,
1062
1079
  num_retained_versions=num_retained_versions,
@@ -1079,7 +1096,7 @@ class Catalog:
1079
1096
  with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
1080
1097
  return self.get_table_by_id(view_id)
1081
1098
 
1082
- def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
1099
+ def _clear_tv_cache(self, tbl_id: UUID, effective_version: int | None) -> None:
1083
1100
  if (tbl_id, effective_version) in self._tbl_versions:
1084
1101
  tv = self._tbl_versions[tbl_id, effective_version]
1085
1102
  tv.is_validated = False
@@ -1176,9 +1193,9 @@ class Catalog:
1176
1193
  conn = Env.get().conn
1177
1194
  tbl_id = md.tbl_md.tbl_id
1178
1195
 
1179
- new_tbl_md: Optional[schema.TableMd] = None
1180
- new_version_md: Optional[schema.TableVersionMd] = None
1181
- new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
1196
+ new_tbl_md: schema.TableMd | None = None
1197
+ new_version_md: schema.TableVersionMd | None = None
1198
+ new_schema_version_md: schema.TableSchemaVersionMd | None = None
1182
1199
  is_new_tbl_version: bool = False
1183
1200
 
1184
1201
  # We need to ensure that the table metadata in the catalog always reflects the latest observed version of
@@ -1352,11 +1369,11 @@ class Catalog:
1352
1369
  msg: str
1353
1370
  if is_replace:
1354
1371
  msg = (
1355
- f'{tbl._display_name()} {tbl._path()!r} already exists and has dependents. '
1372
+ f'{tbl._display_str()} already exists and has dependents. '
1356
1373
  "Use `if_exists='replace_force'` to replace it."
1357
1374
  )
1358
1375
  else:
1359
- msg = f'{tbl._display_name()} {tbl._path()!r} has dependents.'
1376
+ msg = f'{tbl._display_str()} has dependents.'
1360
1377
  raise excs.Error(msg)
1361
1378
 
1362
1379
  # if this is a mutable view of a mutable base, advance the base's view_sn
@@ -1431,7 +1448,7 @@ class Catalog:
1431
1448
 
1432
1449
  if parents:
1433
1450
  # start walking down from the root
1434
- last_parent: Optional[SchemaObject] = None
1451
+ last_parent: SchemaObject | None = None
1435
1452
  for ancestor in path.ancestors():
1436
1453
  ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
1437
1454
  assert ancestor_obj is not None or last_parent is not None
@@ -1507,10 +1524,10 @@ class Catalog:
1507
1524
  def get_tbl_version(
1508
1525
  self,
1509
1526
  tbl_id: UUID,
1510
- effective_version: Optional[int],
1511
- check_pending_ops: Optional[bool] = None,
1527
+ effective_version: int | None,
1528
+ check_pending_ops: bool | None = None,
1512
1529
  validate_initialized: bool = False,
1513
- ) -> Optional[TableVersion]:
1530
+ ) -> TableVersion | None:
1514
1531
  """
1515
1532
  Returns the TableVersion instance for the given table and version and updates the cache.
1516
1533
 
@@ -1559,7 +1576,7 @@ class Catalog:
1559
1576
  assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
1560
1577
  del self._tbl_versions[tbl_version.id, tbl_version.effective_version]
1561
1578
 
1562
- def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
1579
+ def get_dir(self, dir_id: UUID, for_update: bool = False) -> Dir | None:
1563
1580
  """Return the Dir with the given id, or None if it doesn't exist"""
1564
1581
  conn = Env.get().conn
1565
1582
  if for_update:
@@ -1571,7 +1588,7 @@ class Catalog:
1571
1588
  dir_record = schema.Dir(**row._mapping)
1572
1589
  return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
1573
1590
 
1574
- def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
1591
+ def _get_dir(self, path: Path, lock_dir: bool = False) -> schema.Dir | None:
1575
1592
  """
1576
1593
  lock_dir: if True, X-locks target (but not the ancestors)
1577
1594
  """
@@ -1597,7 +1614,7 @@ class Catalog:
1597
1614
  row = conn.execute(q).one_or_none()
1598
1615
  return schema.Dir(**row._mapping) if row is not None else None
1599
1616
 
1600
- def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
1617
+ def _load_tbl(self, tbl_id: UUID) -> Table | None:
1601
1618
  """Loads metadata for the table with the given id and caches it."""
1602
1619
  _logger.info(f'Loading table {tbl_id}')
1603
1620
  from .insertable_table import InsertableTable
@@ -1657,7 +1674,7 @@ class Catalog:
1657
1674
  return tbl
1658
1675
 
1659
1676
  # this is a view; determine the sequence of TableVersions to load
1660
- tbl_version_path: list[tuple[UUID, Optional[int]]] = []
1677
+ tbl_version_path: list[tuple[UUID, int | None]] = []
1661
1678
  if tbl_md.is_pure_snapshot:
1662
1679
  # this is a pure snapshot, without a physical table backing it; we only need the bases
1663
1680
  pass
@@ -1670,8 +1687,8 @@ class Catalog:
1670
1687
  tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1671
1688
 
1672
1689
  # load TableVersions, starting at the root
1673
- base_path: Optional[TableVersionPath] = None
1674
- view_path: Optional[TableVersionPath] = None
1690
+ base_path: TableVersionPath | None = None
1691
+ view_path: TableVersionPath | None = None
1675
1692
  for id, effective_version in tbl_version_path[::-1]:
1676
1693
  if (id, effective_version) not in self._tbl_versions:
1677
1694
  _ = self._load_tbl_version(id, effective_version)
@@ -1681,7 +1698,7 @@ class Catalog:
1681
1698
  self._tbls[tbl_id, None] = view
1682
1699
  return view
1683
1700
 
1684
- def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Optional[Table]:
1701
+ def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Table | None:
1685
1702
  from .view import View
1686
1703
 
1687
1704
  # Load the specified TableMd and TableVersionMd records from the db.
@@ -1708,7 +1725,7 @@ class Catalog:
1708
1725
  # Build the list of ancestor versions, starting with the given table and traversing back to the base table.
1709
1726
  # For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
1710
1727
  # given TableVersion's created_at timestamp.
1711
- ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
1728
+ ancestors: list[tuple[UUID, int | None]] = [(tbl_id, version)]
1712
1729
  if tbl_md.view_md is not None:
1713
1730
  for ancestor_id, _ in tbl_md.view_md.base_versions:
1714
1731
  q = (
@@ -1735,7 +1752,7 @@ class Catalog:
1735
1752
  _ = self._load_tbl_version(anc_id, anc_version)
1736
1753
 
1737
1754
  # Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
1738
- tvp: Optional[TableVersionPath] = None
1755
+ tvp: TableVersionPath | None = None
1739
1756
  for anc_id, anc_version in ancestors[::-1]:
1740
1757
  tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
1741
1758
 
@@ -1744,10 +1761,10 @@ class Catalog:
1744
1761
  return view
1745
1762
 
1746
1763
  @retry_loop(for_write=False)
1747
- def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1764
+ def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1748
1765
  return self._collect_tbl_history(tbl_id, n)
1749
1766
 
1750
- def _collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1767
+ def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
1751
1768
  """
1752
1769
  Returns the history of up to n versions of the table with the given UUID.
1753
1770
 
@@ -1783,7 +1800,7 @@ class Catalog:
1783
1800
  for row in src_rows
1784
1801
  ]
1785
1802
 
1786
- def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
1803
+ def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> schema.FullTableMd:
1787
1804
  """
1788
1805
  Loads metadata from the store for a given table UUID and version.
1789
1806
  """
@@ -1839,11 +1856,11 @@ class Catalog:
1839
1856
  def store_tbl_md(
1840
1857
  self,
1841
1858
  tbl_id: UUID,
1842
- dir_id: Optional[UUID],
1843
- tbl_md: Optional[schema.TableMd],
1844
- version_md: Optional[schema.TableVersionMd],
1845
- schema_version_md: Optional[schema.TableSchemaVersionMd],
1846
- pending_ops: Optional[list[TableOp]] = None,
1859
+ dir_id: UUID | None,
1860
+ tbl_md: schema.TableMd | None,
1861
+ version_md: schema.TableVersionMd | None,
1862
+ schema_version_md: schema.TableSchemaVersionMd | None,
1863
+ pending_ops: list[TableOp] | None = None,
1847
1864
  ) -> None:
1848
1865
  """
1849
1866
  Stores metadata to the DB.
@@ -1885,6 +1902,7 @@ class Catalog:
1885
1902
  .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1886
1903
  .where(schema.Table.id == tbl_id)
1887
1904
  )
1905
+ assert isinstance(result, sql.CursorResult)
1888
1906
  assert result.rowcount == 1, result.rowcount
1889
1907
 
1890
1908
  # Construct and insert new table version record if requested.
@@ -1914,6 +1932,7 @@ class Catalog:
1914
1932
  .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
1915
1933
  .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
1916
1934
  )
1935
+ assert isinstance(result, sql.CursorResult)
1917
1936
  assert result.rowcount == 1, result.rowcount
1918
1937
 
1919
1938
  # Construct and insert a new schema version record if requested.
@@ -1995,8 +2014,8 @@ class Catalog:
1995
2014
  return md
1996
2015
 
1997
2016
  def _load_tbl_version(
1998
- self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
1999
- ) -> Optional[TableVersion]:
2017
+ self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
2018
+ ) -> TableVersion | None:
2000
2019
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
2001
2020
  tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
2002
2021
  view_md = tbl_md.view_md
@@ -2052,7 +2071,7 @@ class Catalog:
2052
2071
  assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
2053
2072
 
2054
2073
  base: TableVersionHandle
2055
- base_path: Optional[TableVersionPath] = None # needed for live view
2074
+ base_path: TableVersionPath | None = None # needed for live view
2056
2075
  if view_md.is_snapshot:
2057
2076
  base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
2058
2077
  else:
@@ -2083,7 +2102,7 @@ class Catalog:
2083
2102
  self.create_user(None)
2084
2103
  _logger.info('Initialized catalog.')
2085
2104
 
2086
- def create_user(self, user: Optional[str]) -> None:
2105
+ def create_user(self, user: str | None) -> None:
2087
2106
  """
2088
2107
  Creates a catalog record (root directory) for the specified user, if one does not already exist.
2089
2108
  """
@@ -2102,7 +2121,7 @@ class Catalog:
2102
2121
 
2103
2122
  def _handle_path_collision(
2104
2123
  self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
2105
- ) -> Optional[SchemaObject]:
2124
+ ) -> SchemaObject | None:
2106
2125
  obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
2107
2126
 
2108
2127
  if if_exists == IfExistsParam.ERROR and obj is not None: