pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ import json
7
7
  import logging
8
8
  from keyword import iskeyword as is_python_keyword
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
10
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import pandas as pd
@@ -69,7 +69,7 @@ class Table(SchemaObject):
69
69
  _tbl_version_path: TableVersionPath
70
70
 
71
71
  # the physical TableVersion backing this Table; None for pure snapshots
72
- _tbl_version: Optional[TableVersionHandle]
72
+ _tbl_version: TableVersionHandle | None
73
73
 
74
74
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
75
75
  super().__init__(id, name, dir_id)
@@ -77,6 +77,17 @@ class Table(SchemaObject):
77
77
  self._tbl_version = None
78
78
 
79
79
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
80
+ old_name = self._name
81
+ old_dir_id = self._dir_id
82
+
83
+ cat = catalog.Catalog.get()
84
+
85
+ @cat.register_undo_action
86
+ def _() -> None:
87
+ # TODO: We should really be invalidating the Table instance and forcing a reload.
88
+ self._name = old_name
89
+ self._dir_id = old_dir_id
90
+
80
91
  super()._move(new_name, new_dir_id)
81
92
  conn = env.Env.get().conn
82
93
  stmt = sql.text(
@@ -117,7 +128,7 @@ class Table(SchemaObject):
117
128
  is_primary_key=col.is_pk,
118
129
  media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
119
130
  computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
120
- defined_in=col.tbl.name,
131
+ defined_in=col.get_tbl().name,
121
132
  )
122
133
  # Pure snapshots have no indices
123
134
  indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
@@ -167,7 +178,7 @@ class Table(SchemaObject):
167
178
  """Return a ColumnRef for the given name."""
168
179
  col = self._tbl_version_path.get_column(name)
169
180
  if col is None:
170
- raise AttributeError(f'Column {name!r} unknown')
181
+ raise AttributeError(f'Unknown column: {name}')
171
182
  return ColumnRef(col, reference_tbl=self._tbl_version_path)
172
183
 
173
184
  def __getitem__(self, name: str) -> 'exprs.ColumnRef':
@@ -232,11 +243,7 @@ class Table(SchemaObject):
232
243
  return self._df().where(pred)
233
244
 
234
245
  def join(
235
- self,
236
- other: 'Table',
237
- *,
238
- on: Optional['exprs.Expr'] = None,
239
- how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
246
+ self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
240
247
  ) -> 'pxt.DataFrame':
241
248
  """Join this table with another table."""
242
249
  from pixeltable.catalog import Catalog
@@ -273,10 +280,10 @@ class Table(SchemaObject):
273
280
 
274
281
  def sample(
275
282
  self,
276
- n: Optional[int] = None,
277
- n_per_stratum: Optional[int] = None,
278
- fraction: Optional[float] = None,
279
- seed: Optional[int] = None,
283
+ n: int | None = None,
284
+ n_per_stratum: int | None = None,
285
+ fraction: float | None = None,
286
+ seed: int | None = None,
280
287
  stratify_by: Any = None,
281
288
  ) -> pxt.DataFrame:
282
289
  """Choose a shuffled sample of rows
@@ -316,11 +323,11 @@ class Table(SchemaObject):
316
323
  """Return the schema (column names and column types) of this table."""
317
324
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
318
325
 
319
- def get_base_table(self) -> Optional['Table']:
326
+ def get_base_table(self) -> 'Table' | None:
320
327
  return self._get_base_table()
321
328
 
322
329
  @abc.abstractmethod
323
- def _get_base_table(self) -> Optional['Table']:
330
+ def _get_base_table(self) -> 'Table' | None:
324
331
  """The base's Table instance. Requires a transaction context"""
325
332
 
326
333
  def _get_base_tables(self) -> list['Table']:
@@ -334,7 +341,7 @@ class Table(SchemaObject):
334
341
 
335
342
  @property
336
343
  @abc.abstractmethod
337
- def _effective_base_versions(self) -> list[Optional[int]]:
344
+ def _effective_base_versions(self) -> list[int | None]:
338
345
  """The effective versions of the ancestor bases, starting with its immediate base."""
339
346
 
340
347
  def _get_comment(self) -> str:
@@ -372,7 +379,7 @@ class Table(SchemaObject):
372
379
  helper.append(f'COMMENT: {self._get_comment()}')
373
380
  return helper
374
381
 
375
- def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
382
+ def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
376
383
  return pd.DataFrame(
377
384
  {
378
385
  'Column Name': col.name,
@@ -383,7 +390,7 @@ class Table(SchemaObject):
383
390
  if columns is None or col.name in columns
384
391
  )
385
392
 
386
- def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
393
+ def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
387
394
  from pixeltable import index
388
395
 
389
396
  if self._tbl_version is None:
@@ -442,7 +449,7 @@ class Table(SchemaObject):
442
449
  assert col is not None
443
450
  assert col.name in self._get_schema()
444
451
  cat = catalog.Catalog.get()
445
- if any(c.name is not None for c in cat.get_column_dependents(col.tbl.id, col.id)):
452
+ if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
446
453
  return True
447
454
  assert self._tbl_version is not None
448
455
  return any(
@@ -462,7 +469,7 @@ class Table(SchemaObject):
462
469
  for new_col_name in new_col_names:
463
470
  if new_col_name in existing_col_names:
464
471
  if if_exists == IfExistsParam.ERROR:
465
- raise excs.Error(f'Duplicate column name: {new_col_name!r}')
472
+ raise excs.Error(f'Duplicate column name: {new_col_name}')
466
473
  elif if_exists == IfExistsParam.IGNORE:
467
474
  cols_to_ignore.append(new_col_name)
468
475
  elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
@@ -591,8 +598,8 @@ class Table(SchemaObject):
591
598
  # verify kwargs and construct column schema dict
592
599
  if len(kwargs) != 1:
593
600
  raise excs.Error(
594
- f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
595
- f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
601
+ f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
602
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
596
603
  )
597
604
  col_type = next(iter(kwargs.values()))
598
605
  if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
@@ -604,8 +611,8 @@ class Table(SchemaObject):
604
611
  def add_computed_column(
605
612
  self,
606
613
  *,
607
- stored: Optional[bool] = None,
608
- destination: Optional[str | Path] = None,
614
+ stored: bool | None = None,
615
+ destination: str | Path | None = None,
609
616
  print_stats: bool = False,
610
617
  on_error: Literal['abort', 'ignore'] = 'abort',
611
618
  if_exists: Literal['error', 'ignore', 'replace'] = 'error',
@@ -625,7 +632,7 @@ class Table(SchemaObject):
625
632
  - `'abort'`: an exception will be raised and the column will not be added.
626
633
  - `'ignore'`: execution will continue and the column will be added. Any rows
627
634
  with errors will have a `None` value for the column, with information about the error stored in the
628
- corresponding `tbl.col_name.errormsg` tbl.col_name.errortype` fields.
635
+ corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
629
636
  if_exists: Determines the behavior if the column already exists. Must be one of the following:
630
637
 
631
638
  - `'error'`: an exception will be raised.
@@ -657,12 +664,12 @@ class Table(SchemaObject):
657
664
  if len(kwargs) != 1:
658
665
  raise excs.Error(
659
666
  f'add_computed_column() requires exactly one keyword argument of the form '
660
- '"column-name=type|value-expression"; '
661
- f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
667
+ '`col_name=col_type` or `col_name=expression`; '
668
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
662
669
  )
663
670
  col_name, spec = next(iter(kwargs.items()))
664
671
  if not is_valid_identifier(col_name):
665
- raise excs.Error(f'Invalid column name: {col_name!r}')
672
+ raise excs.Error(f'Invalid column name: {col_name}')
666
673
 
667
674
  col_schema: dict[str, Any] = {'value': spec}
668
675
  if stored is not None:
@@ -709,42 +716,42 @@ class Table(SchemaObject):
709
716
  valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
710
717
  for k in spec:
711
718
  if k not in valid_keys:
712
- raise excs.Error(f'Column {name}: invalid key {k!r}')
719
+ raise excs.Error(f'Column {name!r}: invalid key {k!r}')
713
720
 
714
721
  if 'type' not in spec and 'value' not in spec:
715
- raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
722
+ raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
716
723
 
717
724
  if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
718
- raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
725
+ raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
719
726
 
720
727
  if 'value' in spec:
721
728
  value_expr = exprs.Expr.from_object(spec['value'])
722
729
  if value_expr is None:
723
- raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
730
+ raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
724
731
  if 'type' in spec:
725
- raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
732
+ raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
726
733
 
727
734
  if 'media_validation' in spec:
728
- _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
735
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
729
736
 
730
737
  if 'stored' in spec and not isinstance(spec['stored'], bool):
731
- raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
738
+ raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
732
739
 
733
740
  d = spec.get('destination')
734
741
  if d is not None and not isinstance(d, (str, Path)):
735
- raise excs.Error(f'Column {name}: `destination` must be a string or path, got {d}')
742
+ raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
736
743
 
737
744
  @classmethod
738
745
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
739
746
  """Construct list of Columns, given schema"""
740
747
  columns: list[Column] = []
741
748
  for name, spec in schema.items():
742
- col_type: Optional[ts.ColumnType] = None
743
- value_expr: Optional[exprs.Expr] = None
749
+ col_type: ts.ColumnType | None = None
750
+ value_expr: exprs.Expr | None = None
744
751
  primary_key: bool = False
745
- media_validation: Optional[catalog.MediaValidation] = None
752
+ media_validation: catalog.MediaValidation | None = None
746
753
  stored = True
747
- destination: Optional[str] = None
754
+ destination: str | None = None
748
755
 
749
756
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
750
757
  col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
@@ -769,8 +776,7 @@ class Table(SchemaObject):
769
776
  media_validation = (
770
777
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
771
778
  )
772
- if 'destination' in spec:
773
- destination = ObjectOps.validate_destination(spec['destination'], name)
779
+ destination = spec.get('destination')
774
780
  else:
775
781
  raise excs.Error(f'Invalid value for column {name!r}')
776
782
 
@@ -783,34 +789,36 @@ class Table(SchemaObject):
783
789
  media_validation=media_validation,
784
790
  destination=destination,
785
791
  )
792
+ # Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
793
+ # media destination, it gets validated at this time.
794
+ ObjectOps.validate_destination(column.destination, column.name)
786
795
  columns.append(column)
796
+
787
797
  return columns
788
798
 
789
799
  @classmethod
790
800
  def validate_column_name(cls, name: str) -> None:
791
- """Check that a name is usable as a pixeltalbe column name"""
801
+ """Check that a name is usable as a pixeltable column name"""
792
802
  if is_system_column_name(name) or is_python_keyword(name):
793
803
  raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
794
804
  if not is_valid_identifier(name):
795
- raise excs.Error(f'Invalid column name: {name!r}')
805
+ raise excs.Error(f'Invalid column name: {name}')
796
806
 
797
807
  @classmethod
798
808
  def _verify_column(cls, col: Column) -> None:
799
809
  """Check integrity of user-supplied Column and supply defaults"""
800
810
  cls.validate_column_name(col.name)
801
811
  if col.stored is False and not col.is_computed:
802
- raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
812
+ raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
803
813
  if col.stored is False and col.has_window_fn_call():
804
814
  raise excs.Error(
805
815
  (
806
- f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
816
+ f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
807
817
  f'streaming function'
808
818
  )
809
819
  )
810
- if col.destination is not None and not (col.stored and col.is_computed):
811
- raise excs.Error(
812
- f'Column {col.name!r}: destination={col.destination} only applies to stored computed columns'
813
- )
820
+ if col._explicit_destination is not None and not (col.stored and col.is_computed):
821
+ raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
814
822
 
815
823
  @classmethod
816
824
  def _verify_schema(cls, schema: list[Column]) -> None:
@@ -862,10 +870,10 @@ class Table(SchemaObject):
862
870
  col = self._tbl_version_path.get_column(column)
863
871
  if col is None:
864
872
  if if_not_exists_ == IfNotExistsParam.ERROR:
865
- raise excs.Error(f'Column {column!r} unknown')
873
+ raise excs.Error(f'Unknown column: {column}')
866
874
  assert if_not_exists_ == IfNotExistsParam.IGNORE
867
875
  return
868
- if col.tbl.id != self._tbl_version_path.tbl_id:
876
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
869
877
  raise excs.Error(f'Cannot drop base table column {col.name!r}')
870
878
  col = self._tbl_version.get().cols_by_name[column]
871
879
  else:
@@ -876,10 +884,10 @@ class Table(SchemaObject):
876
884
  assert if_not_exists_ == IfNotExistsParam.IGNORE
877
885
  return
878
886
  col = column.col
879
- if col.tbl.id != self._tbl_version_path.tbl_id:
887
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
880
888
  raise excs.Error(f'Cannot drop base table column {col.name!r}')
881
889
 
882
- dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
890
+ dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
883
891
  if len(dependent_user_cols) > 0:
884
892
  raise excs.Error(
885
893
  f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
@@ -889,21 +897,21 @@ class Table(SchemaObject):
889
897
  views = self._get_views(recursive=True, mutable_only=True)
890
898
 
891
899
  # See if any view predicates depend on this column
892
- dependent_views = []
900
+ dependent_views: list[tuple[Table, exprs.Expr]] = []
893
901
  for view in views:
894
902
  if view._tbl_version is not None:
895
903
  predicate = view._tbl_version.get().predicate
896
904
  if predicate is not None:
897
905
  for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
898
- if predicate_col.tbl_id == col.tbl.id and predicate_col.col_id == col.id:
906
+ if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
899
907
  dependent_views.append((view, predicate))
900
908
 
901
909
  if len(dependent_views) > 0:
902
910
  dependent_views_str = '\n'.join(
903
- f'view: {view._path()}, predicate: {predicate!s}' for view, predicate in dependent_views
911
+ f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
904
912
  )
905
913
  raise excs.Error(
906
- f'Cannot drop column `{col.name}` because the following views depend on it:\n{dependent_views_str}'
914
+ f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
907
915
  )
908
916
 
909
917
  # See if this column has a dependent store. We need to look through all stores in all
@@ -917,17 +925,17 @@ class Table(SchemaObject):
917
925
  ]
918
926
  if len(dependent_stores) > 0:
919
927
  dependent_store_names = [
920
- store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
928
+ store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
921
929
  for view, store in dependent_stores
922
930
  ]
923
931
  raise excs.Error(
924
- f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
932
+ f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
925
933
  f'{", ".join(dependent_store_names)}'
926
934
  )
927
935
  all_columns = self.columns()
928
936
  if len(all_columns) == 1 and col.name == all_columns[0]:
929
937
  raise excs.Error(
930
- f'Cannot drop column `{col.name}` because it is the last remaining column in this table.'
938
+ f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
931
939
  f' Tables must have at least one column.'
932
940
  )
933
941
 
@@ -971,11 +979,11 @@ class Table(SchemaObject):
971
979
  self,
972
980
  column: str | ColumnRef,
973
981
  *,
974
- idx_name: Optional[str] = None,
975
- embedding: Optional[pxt.Function] = None,
976
- string_embed: Optional[pxt.Function] = None,
977
- image_embed: Optional[pxt.Function] = None,
978
- metric: str = 'cosine',
982
+ idx_name: str | None = None,
983
+ embedding: pxt.Function | None = None,
984
+ string_embed: pxt.Function | None = None,
985
+ image_embed: pxt.Function | None = None,
986
+ metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
979
987
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
980
988
  ) -> None:
981
989
  """
@@ -983,25 +991,28 @@ class Table(SchemaObject):
983
991
  rows are inserted into the table.
984
992
 
985
993
  To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
986
- Only `String` and `Image` columns are currently supported. Here's an example that uses a
987
- [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
994
+ Only `String` and `Image` columns are currently supported.
988
995
 
989
- >>> from pixeltable.functions.huggingface import clip
990
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
991
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
996
+ Examples:
997
+ Here's an example that uses a
998
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
999
+
1000
+ >>> from pixeltable.functions.huggingface import clip
1001
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1002
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
992
1003
 
993
- Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
1004
+ Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
994
1005
 
995
- >>> reference_img = PIL.Image.open('my_image.jpg')
996
- ... sim = tbl.img.similarity(reference_img)
997
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1006
+ >>> reference_img = PIL.Image.open('my_image.jpg')
1007
+ >>> sim = tbl.img.similarity(reference_img)
1008
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
998
1009
 
999
- If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1000
- performed using any of its supported types. In our example, CLIP supports both text and images, so we can
1001
- also search for images using a text description:
1010
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1011
+ performed using any of its supported types. In our example, CLIP supports both text and images, so we can
1012
+ also search for images using a text description:
1002
1013
 
1003
- >>> sim = tbl.img.similarity('a picture of a train')
1004
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1014
+ >>> sim = tbl.img.similarity('a picture of a train')
1015
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1005
1016
 
1006
1017
  Args:
1007
1018
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
@@ -1032,9 +1043,9 @@ class Table(SchemaObject):
1032
1043
  Add an index to the `img` column of the table `my_table`:
1033
1044
 
1034
1045
  >>> from pixeltable.functions.huggingface import clip
1035
- ... tbl = pxt.get_table('my_table')
1036
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1037
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1046
+ >>> tbl = pxt.get_table('my_table')
1047
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1048
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1038
1049
 
1039
1050
  Alternatively, the `img` column may be specified by name:
1040
1051
 
@@ -1072,7 +1083,7 @@ class Table(SchemaObject):
1072
1083
  raise excs.Error(f'Duplicate index name: {idx_name}')
1073
1084
  if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
1074
1085
  raise excs.Error(
1075
- f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1086
+ f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1076
1087
  )
1077
1088
  if if_exists_ == IfExistsParam.IGNORE:
1078
1089
  return
@@ -1085,10 +1096,9 @@ class Table(SchemaObject):
1085
1096
  if idx_name is not None:
1086
1097
  Table.validate_column_name(idx_name)
1087
1098
 
1088
- # create the EmbeddingIndex instance to verify args
1089
- idx = EmbeddingIndex(
1090
- col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
1091
- )
1099
+ # validate EmbeddingIndex args
1100
+ idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
1101
+ _ = idx.create_value_expr(col)
1092
1102
  _ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
1093
1103
  # TODO: how to deal with exceptions here? drop the index and raise?
1094
1104
  FileCache.get().emit_eviction_warnings()
@@ -1097,7 +1107,7 @@ class Table(SchemaObject):
1097
1107
  self,
1098
1108
  *,
1099
1109
  column: str | ColumnRef | None = None,
1100
- idx_name: Optional[str] = None,
1110
+ idx_name: str | None = None,
1101
1111
  if_not_exists: Literal['error', 'ignore'] = 'error',
1102
1112
  ) -> None:
1103
1113
  """
@@ -1162,7 +1172,7 @@ class Table(SchemaObject):
1162
1172
  if isinstance(column, str):
1163
1173
  col = self._tbl_version_path.get_column(column)
1164
1174
  if col is None:
1165
- raise excs.Error(f'Column {column!r} unknown')
1175
+ raise excs.Error(f'Unknown column: {column}')
1166
1176
  elif isinstance(column, ColumnRef):
1167
1177
  exists = self._tbl_version_path.has_column(column.col)
1168
1178
  if not exists:
@@ -1176,7 +1186,7 @@ class Table(SchemaObject):
1176
1186
  self,
1177
1187
  *,
1178
1188
  column: str | ColumnRef | None = None,
1179
- idx_name: Optional[str] = None,
1189
+ idx_name: str | None = None,
1180
1190
  if_not_exists: Literal['error', 'ignore'] = 'error',
1181
1191
  ) -> None:
1182
1192
  """
@@ -1238,9 +1248,9 @@ class Table(SchemaObject):
1238
1248
  def _drop_index(
1239
1249
  self,
1240
1250
  *,
1241
- col: Optional[Column] = None,
1242
- idx_name: Optional[str] = None,
1243
- _idx_class: Optional[type[index.IndexBase]] = None,
1251
+ col: Column | None = None,
1252
+ idx_name: str | None = None,
1253
+ _idx_class: type[index.IndexBase] | None = None,
1244
1254
  if_not_exists: Literal['error', 'ignore'] = 'error',
1245
1255
  ) -> None:
1246
1256
  from pixeltable.catalog import Catalog
@@ -1257,9 +1267,10 @@ class Table(SchemaObject):
1257
1267
  return
1258
1268
  idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1259
1269
  else:
1260
- if col.tbl.id != self._tbl_version.id:
1270
+ if col.get_tbl().id != self._tbl_version.id:
1261
1271
  raise excs.Error(
1262
- f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name!r})'
1272
+ f'Column {col.name!r}: '
1273
+ f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
1263
1274
  )
1264
1275
  idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1265
1276
  if _idx_class is not None:
@@ -1271,17 +1282,17 @@ class Table(SchemaObject):
1271
1282
  assert if_not_exists_ == IfNotExistsParam.IGNORE
1272
1283
  return
1273
1284
  if len(idx_info_list) > 1:
1274
- raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1285
+ raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
1275
1286
  idx_info = idx_info_list[0]
1276
1287
 
1277
1288
  # Find out if anything depends on this index
1278
1289
  val_col = idx_info.val_col
1279
1290
  dependent_user_cols = [
1280
- c for c in Catalog.get().get_column_dependents(val_col.tbl.id, val_col.id) if c.name is not None
1291
+ c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
1281
1292
  ]
1282
1293
  if len(dependent_user_cols) > 0:
1283
1294
  raise excs.Error(
1284
- f'Cannot drop index because the following columns depend on it:\n'
1295
+ f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
1285
1296
  f'{", ".join(c.name for c in dependent_user_cols)}'
1286
1297
  )
1287
1298
  self._tbl_version.get().drop_index(idx_info.id)
@@ -1292,8 +1303,8 @@ class Table(SchemaObject):
1292
1303
  source: TableDataSource,
1293
1304
  /,
1294
1305
  *,
1295
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1296
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1306
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1307
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1297
1308
  on_error: Literal['abort', 'ignore'] = 'abort',
1298
1309
  print_stats: bool = False,
1299
1310
  **kwargs: Any,
@@ -1307,11 +1318,11 @@ class Table(SchemaObject):
1307
1318
  @abc.abstractmethod
1308
1319
  def insert(
1309
1320
  self,
1310
- source: Optional[TableDataSource] = None,
1321
+ source: TableDataSource | None = None,
1311
1322
  /,
1312
1323
  *,
1313
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1314
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1324
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1325
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1315
1326
  on_error: Literal['abort', 'ignore'] = 'abort',
1316
1327
  print_stats: bool = False,
1317
1328
  **kwargs: Any,
@@ -1328,7 +1339,8 @@ class Table(SchemaObject):
1328
1339
  on_error: Literal['abort', 'ignore'] = 'abort',
1329
1340
  print_stats: bool = False,
1330
1341
  **kwargs: Any,
1331
- )```
1342
+ )
1343
+ ```
1332
1344
 
1333
1345
  To insert just a single row, you can use the more concise syntax:
1334
1346
 
@@ -1338,7 +1350,8 @@ class Table(SchemaObject):
1338
1350
  on_error: Literal['abort', 'ignore'] = 'abort',
1339
1351
  print_stats: bool = False,
1340
1352
  **kwargs: Any
1341
- )```
1353
+ )
1354
+ ```
1342
1355
 
1343
1356
  Args:
1344
1357
  source: A data source from which data can be imported.
@@ -1394,7 +1407,7 @@ class Table(SchemaObject):
1394
1407
  raise NotImplementedError
1395
1408
 
1396
1409
  def update(
1397
- self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
1410
+ self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
1398
1411
  ) -> UpdateStatus:
1399
1412
  """Update rows in this table.
1400
1413
 
@@ -1459,8 +1472,8 @@ class Table(SchemaObject):
1459
1472
  the row with new `id` 3 (assuming this key does not exist):
1460
1473
 
1461
1474
  >>> tbl.update(
1462
- [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1463
- if_not_exists='insert')
1475
+ ... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1476
+ ... if_not_exists='insert')
1464
1477
  """
1465
1478
  from pixeltable.catalog import Catalog
1466
1479
 
@@ -1489,7 +1502,9 @@ class Table(SchemaObject):
1489
1502
  col_names = {col.name for col in col_vals}
1490
1503
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1491
1504
  missing_cols = pk_col_names - {col.name for col in col_vals}
1492
- raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1505
+ raise excs.Error(
1506
+ f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
1507
+ )
1493
1508
  row_updates.append(col_vals)
1494
1509
 
1495
1510
  result = self._tbl_version.get().batch_update(
@@ -1555,22 +1570,22 @@ class Table(SchemaObject):
1555
1570
  if isinstance(column, str):
1556
1571
  col = self._tbl_version_path.get_column(column)
1557
1572
  if col is None:
1558
- raise excs.Error(f'Unknown column: {column!r}')
1573
+ raise excs.Error(f'Unknown column: {column}')
1559
1574
  col_name = column
1560
1575
  else:
1561
1576
  assert isinstance(column, ColumnRef)
1562
1577
  col = column.col
1563
1578
  if not self._tbl_version_path.has_column(col):
1564
- raise excs.Error(f'Unknown column: {col.name!r}')
1579
+ raise excs.Error(f'Unknown column: {col.name}')
1565
1580
  col_name = col.name
1566
1581
  if not col.is_computed:
1567
1582
  raise excs.Error(f'Column {col_name!r} is not a computed column')
1568
- if col.tbl.id != self._tbl_version_path.tbl_id:
1569
- raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
1583
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
1584
+ raise excs.Error(f'Cannot recompute column of a base: {col_name}')
1570
1585
  col_names.append(col_name)
1571
1586
 
1572
1587
  if where is not None and not where.is_bound_by([self._tbl_version_path]):
1573
- raise excs.Error(f"'where' ({where}) not bound by {self._display_str()}")
1588
+ raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
1574
1589
 
1575
1590
  result = self._tbl_version.get().recompute_columns(
1576
1591
  col_names, where=where, errors_only=errors_only, cascade=cascade
@@ -1578,7 +1593,7 @@ class Table(SchemaObject):
1578
1593
  FileCache.get().emit_eviction_warnings()
1579
1594
  return result
1580
1595
 
1581
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1596
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
1582
1597
  """Delete rows in this table.
1583
1598
 
1584
1599
  Args:
@@ -1621,12 +1636,12 @@ class Table(SchemaObject):
1621
1636
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1622
1637
  self.__check_mutable('link an external store to')
1623
1638
  if store.name in self.external_stores():
1624
- raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1625
- _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
1639
+ raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
1640
+ _logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
1626
1641
 
1627
1642
  store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
1628
1643
  self._tbl_version.get().link_external_store(store)
1629
- env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
1644
+ env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
1630
1645
 
1631
1646
  def unlink_external_stores(
1632
1647
  self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
@@ -1658,7 +1673,7 @@ class Table(SchemaObject):
1658
1673
  if not ignore_errors:
1659
1674
  for store_name in stores:
1660
1675
  if store_name not in all_stores:
1661
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store_name}')
1676
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
1662
1677
 
1663
1678
  for store_name in stores:
1664
1679
  store = self._tbl_version.get().external_stores[store_name]
@@ -1668,7 +1683,7 @@ class Table(SchemaObject):
1668
1683
  self._tbl_version.get().unlink_external_store(store)
1669
1684
  if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1670
1685
  store.delete()
1671
- env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
1686
+ env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
1672
1687
 
1673
1688
  def sync(
1674
1689
  self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
@@ -1699,7 +1714,7 @@ class Table(SchemaObject):
1699
1714
 
1700
1715
  for store in stores:
1701
1716
  if store not in all_stores:
1702
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1717
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
1703
1718
 
1704
1719
  sync_status = UpdateStatus()
1705
1720
  for store in stores:
@@ -1715,7 +1730,7 @@ class Table(SchemaObject):
1715
1730
  def _ipython_key_completions_(self) -> list[str]:
1716
1731
  return list(self._get_schema().keys())
1717
1732
 
1718
- def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
1733
+ def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
1719
1734
  """
1720
1735
  Returns information about versions of this table, most recent first.
1721
1736
 
@@ -1787,7 +1802,7 @@ class Table(SchemaObject):
1787
1802
 
1788
1803
  return metadata_dicts
1789
1804
 
1790
- def history(self, n: Optional[int] = None) -> pd.DataFrame:
1805
+ def history(self, n: int | None = None) -> pd.DataFrame:
1791
1806
  """
1792
1807
  Returns a human-readable report about versions of this table.
1793
1808