pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ import json
7
7
  import logging
8
8
  from keyword import iskeyword as is_python_keyword
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
10
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import pandas as pd
@@ -69,7 +69,7 @@ class Table(SchemaObject):
69
69
  _tbl_version_path: TableVersionPath
70
70
 
71
71
  # the physical TableVersion backing this Table; None for pure snapshots
72
- _tbl_version: Optional[TableVersionHandle]
72
+ _tbl_version: TableVersionHandle | None
73
73
 
74
74
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
75
75
  super().__init__(id, name, dir_id)
@@ -128,7 +128,7 @@ class Table(SchemaObject):
128
128
  is_primary_key=col.is_pk,
129
129
  media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
130
130
  computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
131
- defined_in=col.tbl.name,
131
+ defined_in=col.get_tbl().name,
132
132
  )
133
133
  # Pure snapshots have no indices
134
134
  indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
@@ -178,7 +178,7 @@ class Table(SchemaObject):
178
178
  """Return a ColumnRef for the given name."""
179
179
  col = self._tbl_version_path.get_column(name)
180
180
  if col is None:
181
- raise AttributeError(f'Column {name!r} unknown')
181
+ raise AttributeError(f'Unknown column: {name}')
182
182
  return ColumnRef(col, reference_tbl=self._tbl_version_path)
183
183
 
184
184
  def __getitem__(self, name: str) -> 'exprs.ColumnRef':
@@ -243,11 +243,7 @@ class Table(SchemaObject):
243
243
  return self._df().where(pred)
244
244
 
245
245
  def join(
246
- self,
247
- other: 'Table',
248
- *,
249
- on: Optional['exprs.Expr'] = None,
250
- how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
246
+ self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
251
247
  ) -> 'pxt.DataFrame':
252
248
  """Join this table with another table."""
253
249
  from pixeltable.catalog import Catalog
@@ -284,10 +280,10 @@ class Table(SchemaObject):
284
280
 
285
281
  def sample(
286
282
  self,
287
- n: Optional[int] = None,
288
- n_per_stratum: Optional[int] = None,
289
- fraction: Optional[float] = None,
290
- seed: Optional[int] = None,
283
+ n: int | None = None,
284
+ n_per_stratum: int | None = None,
285
+ fraction: float | None = None,
286
+ seed: int | None = None,
291
287
  stratify_by: Any = None,
292
288
  ) -> pxt.DataFrame:
293
289
  """Choose a shuffled sample of rows
@@ -327,11 +323,11 @@ class Table(SchemaObject):
327
323
  """Return the schema (column names and column types) of this table."""
328
324
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
329
325
 
330
- def get_base_table(self) -> Optional['Table']:
326
+ def get_base_table(self) -> 'Table' | None:
331
327
  return self._get_base_table()
332
328
 
333
329
  @abc.abstractmethod
334
- def _get_base_table(self) -> Optional['Table']:
330
+ def _get_base_table(self) -> 'Table' | None:
335
331
  """The base's Table instance. Requires a transaction context"""
336
332
 
337
333
  def _get_base_tables(self) -> list['Table']:
@@ -345,7 +341,7 @@ class Table(SchemaObject):
345
341
 
346
342
  @property
347
343
  @abc.abstractmethod
348
- def _effective_base_versions(self) -> list[Optional[int]]:
344
+ def _effective_base_versions(self) -> list[int | None]:
349
345
  """The effective versions of the ancestor bases, starting with its immediate base."""
350
346
 
351
347
  def _get_comment(self) -> str:
@@ -383,7 +379,7 @@ class Table(SchemaObject):
383
379
  helper.append(f'COMMENT: {self._get_comment()}')
384
380
  return helper
385
381
 
386
- def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
382
+ def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
387
383
  return pd.DataFrame(
388
384
  {
389
385
  'Column Name': col.name,
@@ -394,7 +390,7 @@ class Table(SchemaObject):
394
390
  if columns is None or col.name in columns
395
391
  )
396
392
 
397
- def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
393
+ def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
398
394
  from pixeltable import index
399
395
 
400
396
  if self._tbl_version is None:
@@ -453,7 +449,7 @@ class Table(SchemaObject):
453
449
  assert col is not None
454
450
  assert col.name in self._get_schema()
455
451
  cat = catalog.Catalog.get()
456
- if any(c.name is not None for c in cat.get_column_dependents(col.tbl.id, col.id)):
452
+ if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
457
453
  return True
458
454
  assert self._tbl_version is not None
459
455
  return any(
@@ -473,7 +469,7 @@ class Table(SchemaObject):
473
469
  for new_col_name in new_col_names:
474
470
  if new_col_name in existing_col_names:
475
471
  if if_exists == IfExistsParam.ERROR:
476
- raise excs.Error(f'Duplicate column name: {new_col_name!r}')
472
+ raise excs.Error(f'Duplicate column name: {new_col_name}')
477
473
  elif if_exists == IfExistsParam.IGNORE:
478
474
  cols_to_ignore.append(new_col_name)
479
475
  elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
@@ -602,8 +598,8 @@ class Table(SchemaObject):
602
598
  # verify kwargs and construct column schema dict
603
599
  if len(kwargs) != 1:
604
600
  raise excs.Error(
605
- f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
606
- f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
601
+ f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
602
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
607
603
  )
608
604
  col_type = next(iter(kwargs.values()))
609
605
  if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
@@ -615,8 +611,8 @@ class Table(SchemaObject):
615
611
  def add_computed_column(
616
612
  self,
617
613
  *,
618
- stored: Optional[bool] = None,
619
- destination: Optional[str | Path] = None,
614
+ stored: bool | None = None,
615
+ destination: str | Path | None = None,
620
616
  print_stats: bool = False,
621
617
  on_error: Literal['abort', 'ignore'] = 'abort',
622
618
  if_exists: Literal['error', 'ignore', 'replace'] = 'error',
@@ -668,12 +664,12 @@ class Table(SchemaObject):
668
664
  if len(kwargs) != 1:
669
665
  raise excs.Error(
670
666
  f'add_computed_column() requires exactly one keyword argument of the form '
671
- '"column-name=type|value-expression"; '
672
- f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
667
+ '`col_name=col_type` or `col_name=expression`; '
668
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
673
669
  )
674
670
  col_name, spec = next(iter(kwargs.items()))
675
671
  if not is_valid_identifier(col_name):
676
- raise excs.Error(f'Invalid column name: {col_name!r}')
672
+ raise excs.Error(f'Invalid column name: {col_name}')
677
673
 
678
674
  col_schema: dict[str, Any] = {'value': spec}
679
675
  if stored is not None:
@@ -720,42 +716,42 @@ class Table(SchemaObject):
720
716
  valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
721
717
  for k in spec:
722
718
  if k not in valid_keys:
723
- raise excs.Error(f'Column {name}: invalid key {k!r}')
719
+ raise excs.Error(f'Column {name!r}: invalid key {k!r}')
724
720
 
725
721
  if 'type' not in spec and 'value' not in spec:
726
- raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
722
+ raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
727
723
 
728
724
  if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
729
- raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
725
+ raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
730
726
 
731
727
  if 'value' in spec:
732
728
  value_expr = exprs.Expr.from_object(spec['value'])
733
729
  if value_expr is None:
734
- raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
730
+ raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
735
731
  if 'type' in spec:
736
- raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
732
+ raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
737
733
 
738
734
  if 'media_validation' in spec:
739
- _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
735
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
740
736
 
741
737
  if 'stored' in spec and not isinstance(spec['stored'], bool):
742
- raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
738
+ raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
743
739
 
744
740
  d = spec.get('destination')
745
741
  if d is not None and not isinstance(d, (str, Path)):
746
- raise excs.Error(f'Column {name}: `destination` must be a string or path, got {d}')
742
+ raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
747
743
 
748
744
  @classmethod
749
745
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
750
746
  """Construct list of Columns, given schema"""
751
747
  columns: list[Column] = []
752
748
  for name, spec in schema.items():
753
- col_type: Optional[ts.ColumnType] = None
754
- value_expr: Optional[exprs.Expr] = None
749
+ col_type: ts.ColumnType | None = None
750
+ value_expr: exprs.Expr | None = None
755
751
  primary_key: bool = False
756
- media_validation: Optional[catalog.MediaValidation] = None
752
+ media_validation: catalog.MediaValidation | None = None
757
753
  stored = True
758
- destination: Optional[str] = None
754
+ destination: str | None = None
759
755
 
760
756
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
761
757
  col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
@@ -780,8 +776,7 @@ class Table(SchemaObject):
780
776
  media_validation = (
781
777
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
782
778
  )
783
- if 'destination' in spec:
784
- destination = ObjectOps.validate_destination(spec['destination'], name)
779
+ destination = spec.get('destination')
785
780
  else:
786
781
  raise excs.Error(f'Invalid value for column {name!r}')
787
782
 
@@ -794,34 +789,36 @@ class Table(SchemaObject):
794
789
  media_validation=media_validation,
795
790
  destination=destination,
796
791
  )
792
+ # Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
793
+ # media destination, it gets validated at this time.
794
+ ObjectOps.validate_destination(column.destination, column.name)
797
795
  columns.append(column)
796
+
798
797
  return columns
799
798
 
800
799
  @classmethod
801
800
  def validate_column_name(cls, name: str) -> None:
802
- """Check that a name is usable as a pixeltalbe column name"""
801
+ """Check that a name is usable as a pixeltable column name"""
803
802
  if is_system_column_name(name) or is_python_keyword(name):
804
803
  raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
805
804
  if not is_valid_identifier(name):
806
- raise excs.Error(f'Invalid column name: {name!r}')
805
+ raise excs.Error(f'Invalid column name: {name}')
807
806
 
808
807
  @classmethod
809
808
  def _verify_column(cls, col: Column) -> None:
810
809
  """Check integrity of user-supplied Column and supply defaults"""
811
810
  cls.validate_column_name(col.name)
812
811
  if col.stored is False and not col.is_computed:
813
- raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
812
+ raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
814
813
  if col.stored is False and col.has_window_fn_call():
815
814
  raise excs.Error(
816
815
  (
817
- f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
816
+ f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
818
817
  f'streaming function'
819
818
  )
820
819
  )
821
- if col.destination is not None and not (col.stored and col.is_computed):
822
- raise excs.Error(
823
- f'Column {col.name!r}: destination={col.destination} only applies to stored computed columns'
824
- )
820
+ if col._explicit_destination is not None and not (col.stored and col.is_computed):
821
+ raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
825
822
 
826
823
  @classmethod
827
824
  def _verify_schema(cls, schema: list[Column]) -> None:
@@ -873,10 +870,10 @@ class Table(SchemaObject):
873
870
  col = self._tbl_version_path.get_column(column)
874
871
  if col is None:
875
872
  if if_not_exists_ == IfNotExistsParam.ERROR:
876
- raise excs.Error(f'Column {column!r} unknown')
873
+ raise excs.Error(f'Unknown column: {column}')
877
874
  assert if_not_exists_ == IfNotExistsParam.IGNORE
878
875
  return
879
- if col.tbl.id != self._tbl_version_path.tbl_id:
876
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
880
877
  raise excs.Error(f'Cannot drop base table column {col.name!r}')
881
878
  col = self._tbl_version.get().cols_by_name[column]
882
879
  else:
@@ -887,10 +884,10 @@ class Table(SchemaObject):
887
884
  assert if_not_exists_ == IfNotExistsParam.IGNORE
888
885
  return
889
886
  col = column.col
890
- if col.tbl.id != self._tbl_version_path.tbl_id:
887
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
891
888
  raise excs.Error(f'Cannot drop base table column {col.name!r}')
892
889
 
893
- dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
890
+ dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
894
891
  if len(dependent_user_cols) > 0:
895
892
  raise excs.Error(
896
893
  f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
@@ -900,21 +897,21 @@ class Table(SchemaObject):
900
897
  views = self._get_views(recursive=True, mutable_only=True)
901
898
 
902
899
  # See if any view predicates depend on this column
903
- dependent_views = []
900
+ dependent_views: list[tuple[Table, exprs.Expr]] = []
904
901
  for view in views:
905
902
  if view._tbl_version is not None:
906
903
  predicate = view._tbl_version.get().predicate
907
904
  if predicate is not None:
908
905
  for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
909
- if predicate_col.tbl_id == col.tbl.id and predicate_col.col_id == col.id:
906
+ if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
910
907
  dependent_views.append((view, predicate))
911
908
 
912
909
  if len(dependent_views) > 0:
913
910
  dependent_views_str = '\n'.join(
914
- f'view: {view._path()}, predicate: {predicate!s}' for view, predicate in dependent_views
911
+ f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
915
912
  )
916
913
  raise excs.Error(
917
- f'Cannot drop column `{col.name}` because the following views depend on it:\n{dependent_views_str}'
914
+ f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
918
915
  )
919
916
 
920
917
  # See if this column has a dependent store. We need to look through all stores in all
@@ -928,17 +925,17 @@ class Table(SchemaObject):
928
925
  ]
929
926
  if len(dependent_stores) > 0:
930
927
  dependent_store_names = [
931
- store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
928
+ store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
932
929
  for view, store in dependent_stores
933
930
  ]
934
931
  raise excs.Error(
935
- f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
932
+ f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
936
933
  f'{", ".join(dependent_store_names)}'
937
934
  )
938
935
  all_columns = self.columns()
939
936
  if len(all_columns) == 1 and col.name == all_columns[0]:
940
937
  raise excs.Error(
941
- f'Cannot drop column `{col.name}` because it is the last remaining column in this table.'
938
+ f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
942
939
  f' Tables must have at least one column.'
943
940
  )
944
941
 
@@ -982,11 +979,11 @@ class Table(SchemaObject):
982
979
  self,
983
980
  column: str | ColumnRef,
984
981
  *,
985
- idx_name: Optional[str] = None,
986
- embedding: Optional[pxt.Function] = None,
987
- string_embed: Optional[pxt.Function] = None,
988
- image_embed: Optional[pxt.Function] = None,
989
- metric: str = 'cosine',
982
+ idx_name: str | None = None,
983
+ embedding: pxt.Function | None = None,
984
+ string_embed: pxt.Function | None = None,
985
+ image_embed: pxt.Function | None = None,
986
+ metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
990
987
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
991
988
  ) -> None:
992
989
  """
@@ -994,31 +991,28 @@ class Table(SchemaObject):
994
991
  rows are inserted into the table.
995
992
 
996
993
  To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
997
- Only `String` and `Image` columns are currently supported. Here's an example that uses a
998
- [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
994
+ Only `String` and `Image` columns are currently supported.
999
995
 
1000
- ```
1001
- >>> from pixeltable.functions.huggingface import clip
1002
- >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1003
- >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1004
- ```
996
+ Examples:
997
+ Here's an example that uses a
998
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
1005
999
 
1006
- Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
1000
+ >>> from pixeltable.functions.huggingface import clip
1001
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1002
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1007
1003
 
1008
- ```
1009
- >>> reference_img = PIL.Image.open('my_image.jpg')
1010
- >>> sim = tbl.img.similarity(reference_img)
1011
- >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1012
- ```
1004
+ Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
1013
1005
 
1014
- If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1015
- performed using any of its supported types. In our example, CLIP supports both text and images, so we can
1016
- also search for images using a text description:
1006
+ >>> reference_img = PIL.Image.open('my_image.jpg')
1007
+ >>> sim = tbl.img.similarity(reference_img)
1008
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1017
1009
 
1018
- ```
1019
- >>> sim = tbl.img.similarity('a picture of a train')
1020
- >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1021
- ```
1010
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1011
+ performed using any of its supported types. In our example, CLIP supports both text and images, so we can
1012
+ also search for images using a text description:
1013
+
1014
+ >>> sim = tbl.img.similarity('a picture of a train')
1015
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1022
1016
 
1023
1017
  Args:
1024
1018
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
@@ -1089,7 +1083,7 @@ class Table(SchemaObject):
1089
1083
  raise excs.Error(f'Duplicate index name: {idx_name}')
1090
1084
  if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
1091
1085
  raise excs.Error(
1092
- f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1086
+ f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1093
1087
  )
1094
1088
  if if_exists_ == IfExistsParam.IGNORE:
1095
1089
  return
@@ -1102,10 +1096,9 @@ class Table(SchemaObject):
1102
1096
  if idx_name is not None:
1103
1097
  Table.validate_column_name(idx_name)
1104
1098
 
1105
- # create the EmbeddingIndex instance to verify args
1106
- idx = EmbeddingIndex(
1107
- col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
1108
- )
1099
+ # validate EmbeddingIndex args
1100
+ idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
1101
+ _ = idx.create_value_expr(col)
1109
1102
  _ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
1110
1103
  # TODO: how to deal with exceptions here? drop the index and raise?
1111
1104
  FileCache.get().emit_eviction_warnings()
@@ -1114,7 +1107,7 @@ class Table(SchemaObject):
1114
1107
  self,
1115
1108
  *,
1116
1109
  column: str | ColumnRef | None = None,
1117
- idx_name: Optional[str] = None,
1110
+ idx_name: str | None = None,
1118
1111
  if_not_exists: Literal['error', 'ignore'] = 'error',
1119
1112
  ) -> None:
1120
1113
  """
@@ -1179,7 +1172,7 @@ class Table(SchemaObject):
1179
1172
  if isinstance(column, str):
1180
1173
  col = self._tbl_version_path.get_column(column)
1181
1174
  if col is None:
1182
- raise excs.Error(f'Column {column!r} unknown')
1175
+ raise excs.Error(f'Unknown column: {column}')
1183
1176
  elif isinstance(column, ColumnRef):
1184
1177
  exists = self._tbl_version_path.has_column(column.col)
1185
1178
  if not exists:
@@ -1193,7 +1186,7 @@ class Table(SchemaObject):
1193
1186
  self,
1194
1187
  *,
1195
1188
  column: str | ColumnRef | None = None,
1196
- idx_name: Optional[str] = None,
1189
+ idx_name: str | None = None,
1197
1190
  if_not_exists: Literal['error', 'ignore'] = 'error',
1198
1191
  ) -> None:
1199
1192
  """
@@ -1255,9 +1248,9 @@ class Table(SchemaObject):
1255
1248
  def _drop_index(
1256
1249
  self,
1257
1250
  *,
1258
- col: Optional[Column] = None,
1259
- idx_name: Optional[str] = None,
1260
- _idx_class: Optional[type[index.IndexBase]] = None,
1251
+ col: Column | None = None,
1252
+ idx_name: str | None = None,
1253
+ _idx_class: type[index.IndexBase] | None = None,
1261
1254
  if_not_exists: Literal['error', 'ignore'] = 'error',
1262
1255
  ) -> None:
1263
1256
  from pixeltable.catalog import Catalog
@@ -1274,9 +1267,10 @@ class Table(SchemaObject):
1274
1267
  return
1275
1268
  idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1276
1269
  else:
1277
- if col.tbl.id != self._tbl_version.id:
1270
+ if col.get_tbl().id != self._tbl_version.id:
1278
1271
  raise excs.Error(
1279
- f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name!r})'
1272
+ f'Column {col.name!r}: '
1273
+ f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
1280
1274
  )
1281
1275
  idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1282
1276
  if _idx_class is not None:
@@ -1288,17 +1282,17 @@ class Table(SchemaObject):
1288
1282
  assert if_not_exists_ == IfNotExistsParam.IGNORE
1289
1283
  return
1290
1284
  if len(idx_info_list) > 1:
1291
- raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1285
+ raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
1292
1286
  idx_info = idx_info_list[0]
1293
1287
 
1294
1288
  # Find out if anything depends on this index
1295
1289
  val_col = idx_info.val_col
1296
1290
  dependent_user_cols = [
1297
- c for c in Catalog.get().get_column_dependents(val_col.tbl.id, val_col.id) if c.name is not None
1291
+ c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
1298
1292
  ]
1299
1293
  if len(dependent_user_cols) > 0:
1300
1294
  raise excs.Error(
1301
- f'Cannot drop index because the following columns depend on it:\n'
1295
+ f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
1302
1296
  f'{", ".join(c.name for c in dependent_user_cols)}'
1303
1297
  )
1304
1298
  self._tbl_version.get().drop_index(idx_info.id)
@@ -1309,8 +1303,8 @@ class Table(SchemaObject):
1309
1303
  source: TableDataSource,
1310
1304
  /,
1311
1305
  *,
1312
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1313
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1306
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1307
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1314
1308
  on_error: Literal['abort', 'ignore'] = 'abort',
1315
1309
  print_stats: bool = False,
1316
1310
  **kwargs: Any,
@@ -1324,11 +1318,11 @@ class Table(SchemaObject):
1324
1318
  @abc.abstractmethod
1325
1319
  def insert(
1326
1320
  self,
1327
- source: Optional[TableDataSource] = None,
1321
+ source: TableDataSource | None = None,
1328
1322
  /,
1329
1323
  *,
1330
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1331
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1324
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1325
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1332
1326
  on_error: Literal['abort', 'ignore'] = 'abort',
1333
1327
  print_stats: bool = False,
1334
1328
  **kwargs: Any,
@@ -1413,7 +1407,7 @@ class Table(SchemaObject):
1413
1407
  raise NotImplementedError
1414
1408
 
1415
1409
  def update(
1416
- self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
1410
+ self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
1417
1411
  ) -> UpdateStatus:
1418
1412
  """Update rows in this table.
1419
1413
 
@@ -1508,7 +1502,9 @@ class Table(SchemaObject):
1508
1502
  col_names = {col.name for col in col_vals}
1509
1503
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1510
1504
  missing_cols = pk_col_names - {col.name for col in col_vals}
1511
- raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1505
+ raise excs.Error(
1506
+ f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
1507
+ )
1512
1508
  row_updates.append(col_vals)
1513
1509
 
1514
1510
  result = self._tbl_version.get().batch_update(
@@ -1574,22 +1570,22 @@ class Table(SchemaObject):
1574
1570
  if isinstance(column, str):
1575
1571
  col = self._tbl_version_path.get_column(column)
1576
1572
  if col is None:
1577
- raise excs.Error(f'Unknown column: {column!r}')
1573
+ raise excs.Error(f'Unknown column: {column}')
1578
1574
  col_name = column
1579
1575
  else:
1580
1576
  assert isinstance(column, ColumnRef)
1581
1577
  col = column.col
1582
1578
  if not self._tbl_version_path.has_column(col):
1583
- raise excs.Error(f'Unknown column: {col.name!r}')
1579
+ raise excs.Error(f'Unknown column: {col.name}')
1584
1580
  col_name = col.name
1585
1581
  if not col.is_computed:
1586
1582
  raise excs.Error(f'Column {col_name!r} is not a computed column')
1587
- if col.tbl.id != self._tbl_version_path.tbl_id:
1588
- raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
1583
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
1584
+ raise excs.Error(f'Cannot recompute column of a base: {col_name}')
1589
1585
  col_names.append(col_name)
1590
1586
 
1591
1587
  if where is not None and not where.is_bound_by([self._tbl_version_path]):
1592
- raise excs.Error(f"'where' ({where}) not bound by {self._display_str()}")
1588
+ raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
1593
1589
 
1594
1590
  result = self._tbl_version.get().recompute_columns(
1595
1591
  col_names, where=where, errors_only=errors_only, cascade=cascade
@@ -1597,7 +1593,7 @@ class Table(SchemaObject):
1597
1593
  FileCache.get().emit_eviction_warnings()
1598
1594
  return result
1599
1595
 
1600
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1596
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
1601
1597
  """Delete rows in this table.
1602
1598
 
1603
1599
  Args:
@@ -1640,12 +1636,12 @@ class Table(SchemaObject):
1640
1636
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1641
1637
  self.__check_mutable('link an external store to')
1642
1638
  if store.name in self.external_stores():
1643
- raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1644
- _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
1639
+ raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
1640
+ _logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
1645
1641
 
1646
1642
  store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
1647
1643
  self._tbl_version.get().link_external_store(store)
1648
- env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
1644
+ env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
1649
1645
 
1650
1646
  def unlink_external_stores(
1651
1647
  self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
@@ -1677,7 +1673,7 @@ class Table(SchemaObject):
1677
1673
  if not ignore_errors:
1678
1674
  for store_name in stores:
1679
1675
  if store_name not in all_stores:
1680
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store_name}')
1676
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
1681
1677
 
1682
1678
  for store_name in stores:
1683
1679
  store = self._tbl_version.get().external_stores[store_name]
@@ -1687,7 +1683,7 @@ class Table(SchemaObject):
1687
1683
  self._tbl_version.get().unlink_external_store(store)
1688
1684
  if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1689
1685
  store.delete()
1690
- env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
1686
+ env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
1691
1687
 
1692
1688
  def sync(
1693
1689
  self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
@@ -1718,7 +1714,7 @@ class Table(SchemaObject):
1718
1714
 
1719
1715
  for store in stores:
1720
1716
  if store not in all_stores:
1721
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1717
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
1722
1718
 
1723
1719
  sync_status = UpdateStatus()
1724
1720
  for store in stores:
@@ -1734,7 +1730,7 @@ class Table(SchemaObject):
1734
1730
  def _ipython_key_completions_(self) -> list[str]:
1735
1731
  return list(self._get_schema().keys())
1736
1732
 
1737
- def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
1733
+ def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
1738
1734
  """
1739
1735
  Returns information about versions of this table, most recent first.
1740
1736
 
@@ -1806,7 +1802,7 @@ class Table(SchemaObject):
1806
1802
 
1807
1803
  return metadata_dicts
1808
1804
 
1809
- def history(self, n: Optional[int] = None) -> pd.DataFrame:
1805
+ def history(self, n: int | None = None) -> pd.DataFrame:
1810
1806
  """
1811
1807
  Returns a human-readable report about versions of this table.
1812
1808