pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -2,13 +2,12 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import builtins
5
+ import datetime
5
6
  import json
6
7
  import logging
7
8
  from keyword import iskeyword as is_python_keyword
8
9
  from pathlib import Path
9
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
10
-
11
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
10
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, overload
12
11
  from uuid import UUID
13
12
 
14
13
  import pandas as pd
@@ -16,7 +15,16 @@ import sqlalchemy as sql
16
15
 
17
16
  import pixeltable as pxt
18
17
  from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
18
+ from pixeltable.catalog.table_metadata import (
19
+ ColumnMetadata,
20
+ EmbeddingIndexParams,
21
+ IndexMetadata,
22
+ TableMetadata,
23
+ VersionMetadata,
24
+ )
19
25
  from pixeltable.metadata import schema
26
+ from pixeltable.metadata.utils import MetadataUtils
27
+ from pixeltable.utils.object_stores import ObjectOps
20
28
 
21
29
  from ..exprs import ColumnRef
22
30
  from ..utils.description_helper import DescriptionHelper
@@ -27,13 +35,16 @@ from .globals import (
27
35
  IfExistsParam,
28
36
  IfNotExistsParam,
29
37
  MediaValidation,
30
- UpdateStatus,
31
38
  is_system_column_name,
32
39
  is_valid_identifier,
33
40
  )
34
41
  from .schema_object import SchemaObject
35
42
  from .table_version_handle import TableVersionHandle
36
43
  from .table_version_path import TableVersionPath
44
+ from .update_status import UpdateStatus
45
+
46
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
47
+
37
48
 
38
49
  if TYPE_CHECKING:
39
50
  import torch.utils.data
@@ -41,6 +52,7 @@ if TYPE_CHECKING:
41
52
  import pixeltable.plan
42
53
  from pixeltable.globals import TableDataSource
43
54
 
55
+
44
56
  _logger = logging.getLogger('pixeltable')
45
57
 
46
58
 
@@ -48,21 +60,34 @@ class Table(SchemaObject):
48
60
  """
49
61
  A handle to a table, view, or snapshot. This class is the primary interface through which table operations
50
62
  (queries, insertions, updates, etc.) are performed in Pixeltable.
63
+
64
+ Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
65
+ FileCache.emit_eviction_warnings() at the end of the operation.
51
66
  """
52
67
 
53
- # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
54
- # FileCache.emit_eviction_warnings() at the end of the operation.
68
+ # the chain of TableVersions needed to run queries and supply metadata (eg, schema)
69
+ _tbl_version_path: TableVersionPath
55
70
 
56
- _is_dropped: bool
57
- __tbl_version_path: TableVersionPath
71
+ # the physical TableVersion backing this Table; None for pure snapshots
72
+ _tbl_version: TableVersionHandle | None
58
73
 
59
74
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
60
75
  super().__init__(id, name, dir_id)
61
- self._is_dropped = False
62
- self.__tbl_version_path = tbl_version_path
76
+ self._tbl_version_path = tbl_version_path
77
+ self._tbl_version = None
63
78
 
64
79
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
65
- self._check_is_dropped()
80
+ old_name = self._name
81
+ old_dir_id = self._dir_id
82
+
83
+ cat = catalog.Catalog.get()
84
+
85
+ @cat.register_undo_action
86
+ def _() -> None:
87
+ # TODO: We should really be invalidating the Table instance and forcing a reload.
88
+ self._name = old_name
89
+ self._dir_id = old_dir_id
90
+
66
91
  super()._move(new_name, new_dir_id)
67
92
  conn = env.Env.get().conn
68
93
  stmt = sql.text(
@@ -75,73 +100,85 @@ class Table(SchemaObject):
75
100
  )
76
101
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
77
102
 
78
- def get_metadata(self) -> dict[str, Any]:
103
+ # this is duplicated from SchemaObject so that our API docs show the docstring for Table
104
+ def get_metadata(self) -> 'TableMetadata':
79
105
  """
80
106
  Retrieves metadata associated with this table.
81
107
 
82
108
  Returns:
83
- A dictionary containing the metadata, in the following format:
84
-
85
- ```python
86
- {
87
- 'base': None, # If this is a view or snapshot, will contain the name of its base table
88
- 'schema': {
89
- 'col1': StringType(),
90
- 'col2': IntType(),
91
- },
92
- 'is_replica': False,
93
- 'version': 22,
94
- 'schema_version': 1,
95
- 'comment': '',
96
- 'num_retained_versions': 10,
97
- 'is_view': False,
98
- 'is_snapshot': False,
99
- 'media_validation': 'on_write',
100
- }
101
- ```
109
+ A [TableMetadata][pixeltable.TableMetadata] instance containing this table's metadata.
102
110
  """
103
- from pixeltable.catalog import Catalog
104
-
105
- with Catalog.get().begin_xact(for_write=False):
106
- self._check_is_dropped()
107
- md = super().get_metadata()
108
- md['base'] = self._base_table._path() if self._base_table is not None else None
109
- md['schema'] = self._schema
110
- md['is_replica'] = self._tbl_version.get().is_replica
111
- md['version'] = self._version
112
- md['schema_version'] = self._tbl_version.get().schema_version
113
- md['comment'] = self._comment
114
- md['num_retained_versions'] = self._num_retained_versions
115
- md['media_validation'] = self._media_validation.name.lower()
116
- return md
111
+ from pixeltable.catalog import retry_loop
112
+
113
+ @retry_loop(for_write=False)
114
+ def op() -> 'TableMetadata':
115
+ return self._get_metadata()
116
+
117
+ return op()
118
+
119
+ def _get_metadata(self) -> TableMetadata:
120
+ columns = self._tbl_version_path.columns()
121
+ column_info: dict[str, ColumnMetadata] = {}
122
+ for col in columns:
123
+ column_info[col.name] = ColumnMetadata(
124
+ name=col.name,
125
+ type_=col.col_type._to_str(as_schema=True),
126
+ version_added=col.schema_version_add,
127
+ is_stored=col.is_stored,
128
+ is_primary_key=col.is_pk,
129
+ media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
130
+ computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
131
+ defined_in=col.get_tbl().name,
132
+ )
133
+ # Pure snapshots have no indices
134
+ indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
135
+ index_info: dict[str, IndexMetadata] = {}
136
+ for info in indices:
137
+ if isinstance(info.idx, index.EmbeddingIndex):
138
+ embeddings: list[str] = []
139
+ if info.idx.string_embed is not None:
140
+ embeddings.append(str(info.idx.string_embed))
141
+ if info.idx.image_embed is not None:
142
+ embeddings.append(str(info.idx.image_embed))
143
+ index_info[info.name] = IndexMetadata(
144
+ name=info.name,
145
+ columns=[info.col.name],
146
+ index_type='embedding',
147
+ parameters=EmbeddingIndexParams(
148
+ metric=info.idx.metric.name.lower(), # type: ignore[typeddict-item]
149
+ embeddings=embeddings,
150
+ ),
151
+ )
152
+ return TableMetadata(
153
+ name=self._name,
154
+ path=self._path(),
155
+ columns=column_info,
156
+ indices=index_info,
157
+ is_replica=self._tbl_version_path.is_replica(),
158
+ is_view=False,
159
+ is_snapshot=False,
160
+ version=self._get_version(),
161
+ version_created=datetime.datetime.fromtimestamp(
162
+ self._tbl_version_path.tbl_version.get().created_at, tz=datetime.timezone.utc
163
+ ),
164
+ schema_version=self._tbl_version_path.schema_version(),
165
+ comment=self._get_comment(),
166
+ media_validation=self._get_media_validation().name.lower(), # type: ignore[typeddict-item]
167
+ base=None,
168
+ )
117
169
 
118
- @property
119
- def _version(self) -> int:
170
+ def _get_version(self) -> int:
120
171
  """Return the version of this table. Used by tests to ascertain version changes."""
121
- return self._tbl_version.get().version
122
-
123
- @property
124
- def _tbl_version(self) -> TableVersionHandle:
125
- """Return TableVersion for just this table."""
126
- return self._tbl_version_path.tbl_version
127
-
128
- @property
129
- def _tbl_version_path(self) -> TableVersionPath:
130
- self._check_is_dropped()
131
- return self.__tbl_version_path
172
+ return self._tbl_version_path.version()
132
173
 
133
174
  def __hash__(self) -> int:
134
- return hash(self._tbl_version.id)
135
-
136
- def _check_is_dropped(self) -> None:
137
- if self._is_dropped:
138
- raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
175
+ return hash(self._tbl_version_path.tbl_id)
139
176
 
140
177
  def __getattr__(self, name: str) -> 'exprs.ColumnRef':
141
178
  """Return a ColumnRef for the given name."""
142
179
  col = self._tbl_version_path.get_column(name)
143
180
  if col is None:
144
- raise AttributeError(f'Column {name!r} unknown')
181
+ raise AttributeError(f'Unknown column: {name}')
145
182
  return ColumnRef(col, reference_tbl=self._tbl_version_path)
146
183
 
147
184
  def __getitem__(self, name: str) -> 'exprs.ColumnRef':
@@ -159,18 +196,23 @@ class Table(SchemaObject):
159
196
  Returns:
160
197
  A list of view paths.
161
198
  """
162
- from pixeltable.catalog import Catalog
199
+ from pixeltable.catalog import retry_loop
163
200
 
164
- with Catalog.get().begin_xact(for_write=False):
165
- self._check_is_dropped()
201
+ # we need retry_loop() here, because we end up loading Tables for the views
202
+ @retry_loop(tbl=self._tbl_version_path, for_write=False)
203
+ def op() -> list[str]:
166
204
  return [t._path() for t in self._get_views(recursive=recursive)]
167
205
 
168
- def _get_views(self, *, recursive: bool = True) -> list['Table']:
206
+ return op()
207
+
208
+ def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
169
209
  cat = catalog.Catalog.get()
170
210
  view_ids = cat.get_view_ids(self._id)
171
211
  views = [cat.get_table_by_id(id) for id in view_ids]
212
+ if mutable_only:
213
+ views = [t for t in views if t._tbl_version_path.is_mutable()]
172
214
  if recursive:
173
- views.extend([t for view in views for t in view._get_views(recursive=True)])
215
+ views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
174
216
  return views
175
217
 
176
218
  def _df(self) -> 'pxt.dataframe.DataFrame':
@@ -187,7 +229,7 @@ class Table(SchemaObject):
187
229
  """
188
230
  from pixeltable.catalog import Catalog
189
231
 
190
- with Catalog.get().begin_xact(for_write=False):
232
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
191
233
  return self._df().select(*items, **named_items)
192
234
 
193
235
  def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
@@ -197,20 +239,16 @@ class Table(SchemaObject):
197
239
  """
198
240
  from pixeltable.catalog import Catalog
199
241
 
200
- with Catalog.get().begin_xact(for_write=False):
242
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
201
243
  return self._df().where(pred)
202
244
 
203
245
  def join(
204
- self,
205
- other: 'Table',
206
- *,
207
- on: Optional['exprs.Expr'] = None,
208
- how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
246
+ self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
209
247
  ) -> 'pxt.DataFrame':
210
248
  """Join this table with another table."""
211
249
  from pixeltable.catalog import Catalog
212
250
 
213
- with Catalog.get().begin_xact(for_write=False):
251
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
214
252
  return self._df().join(other, on=on, how=how)
215
253
 
216
254
  def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
@@ -220,7 +258,7 @@ class Table(SchemaObject):
220
258
  """
221
259
  from pixeltable.catalog import Catalog
222
260
 
223
- with Catalog.get().begin_xact(for_write=False):
261
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
224
262
  return self._df().order_by(*items, asc=asc)
225
263
 
226
264
  def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
@@ -230,7 +268,7 @@ class Table(SchemaObject):
230
268
  """
231
269
  from pixeltable.catalog import Catalog
232
270
 
233
- with Catalog.get().begin_xact(for_write=False):
271
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
234
272
  return self._df().group_by(*items)
235
273
 
236
274
  def distinct(self) -> 'pxt.DataFrame':
@@ -242,10 +280,10 @@ class Table(SchemaObject):
242
280
 
243
281
  def sample(
244
282
  self,
245
- n: Optional[int] = None,
246
- n_per_stratum: Optional[int] = None,
247
- fraction: Optional[float] = None,
248
- seed: Optional[int] = None,
283
+ n: int | None = None,
284
+ n_per_stratum: int | None = None,
285
+ fraction: float | None = None,
286
+ seed: int | None = None,
249
287
  stratify_by: Any = None,
250
288
  ) -> pxt.DataFrame:
251
289
  """Choose a shuffled sample of rows
@@ -276,53 +314,44 @@ class Table(SchemaObject):
276
314
  """Return the number of rows in this table."""
277
315
  return self._df().count()
278
316
 
279
- @property
280
317
  def columns(self) -> list[str]:
281
318
  """Return the names of the columns in this table."""
282
319
  cols = self._tbl_version_path.columns()
283
320
  return [c.name for c in cols]
284
321
 
285
- @property
286
- def _schema(self) -> dict[str, ts.ColumnType]:
322
+ def _get_schema(self) -> dict[str, ts.ColumnType]:
287
323
  """Return the schema (column names and column types) of this table."""
288
324
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
289
325
 
290
- @property
291
- def base_table(self) -> Optional['Table']:
292
- with env.Env.get().begin_xact():
293
- return self._base_table
326
+ def get_base_table(self) -> 'Table' | None:
327
+ return self._get_base_table()
294
328
 
295
- @property
296
329
  @abc.abstractmethod
297
- def _base_table(self) -> Optional['Table']:
298
- """The base's Table instance"""
330
+ def _get_base_table(self) -> 'Table' | None:
331
+ """The base's Table instance. Requires a transaction context"""
299
332
 
300
- @property
301
- def _base_tables(self) -> list['Table']:
302
- """The ancestor list of bases of this table, starting with its immediate base."""
303
- bases = []
304
- base = self._base_table
333
+ def _get_base_tables(self) -> list['Table']:
334
+ """The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
335
+ bases: list[Table] = []
336
+ base = self._get_base_table()
305
337
  while base is not None:
306
338
  bases.append(base)
307
- base = base._base_table
339
+ base = base._get_base_table()
308
340
  return bases
309
341
 
310
342
  @property
311
343
  @abc.abstractmethod
312
- def _effective_base_versions(self) -> list[Optional[int]]:
344
+ def _effective_base_versions(self) -> list[int | None]:
313
345
  """The effective versions of the ancestor bases, starting with its immediate base."""
314
346
 
315
- @property
316
- def _comment(self) -> str:
317
- return self._tbl_version.get().comment
347
+ def _get_comment(self) -> str:
348
+ return self._tbl_version_path.comment()
318
349
 
319
- @property
320
- def _num_retained_versions(self) -> int:
321
- return self._tbl_version.get().num_retained_versions
350
+ def _get_num_retained_versions(self) -> int:
351
+ return self._tbl_version_path.num_retained_versions()
322
352
 
323
- @property
324
- def _media_validation(self) -> MediaValidation:
325
- return self._tbl_version.get().media_validation
353
+ def _get_media_validation(self) -> MediaValidation:
354
+ return self._tbl_version_path.media_validation()
326
355
 
327
356
  def __repr__(self) -> str:
328
357
  return self._descriptors().to_string()
@@ -336,7 +365,7 @@ class Table(SchemaObject):
336
365
  """
337
366
  from pixeltable.catalog import Catalog
338
367
 
339
- with Catalog.get().begin_xact(for_write=False):
368
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
340
369
  helper = DescriptionHelper()
341
370
  helper.append(self._table_descriptor())
342
371
  helper.append(self._col_descriptor())
@@ -346,11 +375,11 @@ class Table(SchemaObject):
346
375
  stores = self._external_store_descriptor()
347
376
  if not stores.empty:
348
377
  helper.append(stores)
349
- if self._comment:
350
- helper.append(f'COMMENT: {self._comment}')
378
+ if self._get_comment():
379
+ helper.append(f'COMMENT: {self._get_comment()}')
351
380
  return helper
352
381
 
353
- def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
382
+ def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
354
383
  return pd.DataFrame(
355
384
  {
356
385
  'Column Name': col.name,
@@ -361,9 +390,11 @@ class Table(SchemaObject):
361
390
  if columns is None or col.name in columns
362
391
  )
363
392
 
364
- def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
393
+ def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
365
394
  from pixeltable import index
366
395
 
396
+ if self._tbl_version is None:
397
+ return pd.DataFrame([])
367
398
  pd_rows = []
368
399
  for name, info in self._tbl_version.get().idxs_by_name.items():
369
400
  if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
@@ -383,7 +414,7 @@ class Table(SchemaObject):
383
414
 
384
415
  def _external_store_descriptor(self) -> pd.DataFrame:
385
416
  pd_rows = []
386
- for name, store in self._tbl_version.get().external_stores.items():
417
+ for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
387
418
  row = {'External Store': name, 'Type': type(store).__name__}
388
419
  pd_rows.append(row)
389
420
  return pd.DataFrame(pd_rows)
@@ -392,7 +423,6 @@ class Table(SchemaObject):
392
423
  """
393
424
  Print the table schema.
394
425
  """
395
- self._check_is_dropped()
396
426
  if getattr(builtins, '__IPYTHON__', False):
397
427
  from IPython.display import Markdown, display
398
428
 
@@ -400,11 +430,6 @@ class Table(SchemaObject):
400
430
  else:
401
431
  print(repr(self))
402
432
 
403
- def _drop(self) -> None:
404
- self._check_is_dropped()
405
- self._tbl_version.get().drop()
406
- self._is_dropped = True
407
-
408
433
  # TODO Factor this out into a separate module.
409
434
  # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
410
435
  def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
@@ -422,9 +447,11 @@ class Table(SchemaObject):
422
447
  def _column_has_dependents(self, col: Column) -> bool:
423
448
  """Returns True if the column has dependents, False otherwise."""
424
449
  assert col is not None
425
- assert col.name in self._schema
426
- if any(c.name is not None for c in col.dependent_cols):
450
+ assert col.name in self._get_schema()
451
+ cat = catalog.Catalog.get()
452
+ if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
427
453
  return True
454
+ assert self._tbl_version is not None
428
455
  return any(
429
456
  col in store.get_local_columns()
430
457
  for view in (self, *self._get_views(recursive=True))
@@ -436,13 +463,13 @@ class Table(SchemaObject):
436
463
 
437
464
  If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
438
465
  """
439
- assert not self.get_metadata()['is_snapshot']
440
- existing_col_names = set(self._schema.keys())
466
+ assert self._tbl_version is not None
467
+ existing_col_names = set(self._get_schema().keys())
441
468
  cols_to_ignore = []
442
469
  for new_col_name in new_col_names:
443
470
  if new_col_name in existing_col_names:
444
471
  if if_exists == IfExistsParam.ERROR:
445
- raise excs.Error(f'Duplicate column name: {new_col_name!r}')
472
+ raise excs.Error(f'Duplicate column name: {new_col_name}')
446
473
  elif if_exists == IfExistsParam.IGNORE:
447
474
  cols_to_ignore.append(new_col_name)
448
475
  elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
@@ -465,15 +492,14 @@ class Table(SchemaObject):
465
492
 
466
493
  def add_columns(
467
494
  self,
468
- schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]],
495
+ schema: dict[str, ts.ColumnType | builtins.type | _GenericAlias],
469
496
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
470
497
  ) -> UpdateStatus:
471
498
  """
472
499
  Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
473
500
  columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
474
501
 
475
- The format of the `schema` argument is identical to the format of the schema in a call to
476
- [`create_table()`][pixeltable.globals.create_table].
502
+ The format of the `schema` argument is a dict mapping column names to their types.
477
503
 
478
504
  Args:
479
505
  schema: A dictionary mapping column names to types.
@@ -507,10 +533,9 @@ class Table(SchemaObject):
507
533
  """
508
534
  from pixeltable.catalog import Catalog
509
535
 
510
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
511
- self._check_is_dropped()
512
- if self.get_metadata()['is_snapshot']:
513
- raise excs.Error('Cannot add column to a snapshot.')
536
+ # lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
537
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
538
+ self.__check_mutable('add columns to')
514
539
  col_schema = {
515
540
  col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
516
541
  for col_name, spec in schema.items()
@@ -525,20 +550,22 @@ class Table(SchemaObject):
525
550
  for cname in cols_to_ignore:
526
551
  assert cname in col_schema
527
552
  del col_schema[cname]
553
+ result = UpdateStatus()
528
554
  if len(col_schema) == 0:
529
- return UpdateStatus()
555
+ return result
530
556
  new_cols = self._create_columns(col_schema)
531
557
  for new_col in new_cols:
532
558
  self._verify_column(new_col)
533
- status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
559
+ assert self._tbl_version is not None
560
+ result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
534
561
  FileCache.get().emit_eviction_warnings()
535
- return status
562
+ return result
536
563
 
537
564
  def add_column(
538
565
  self,
539
566
  *,
540
567
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
541
- **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr],
568
+ **kwargs: ts.ColumnType | builtins.type | _GenericAlias | exprs.Expr,
542
569
  ) -> UpdateStatus:
543
570
  """
544
571
  Adds an ordinary (non-computed) column to the table.
@@ -568,30 +595,24 @@ class Table(SchemaObject):
568
595
 
569
596
  >>> tbl.add_columns({'new_col': pxt.Int})
570
597
  """
571
- from pixeltable.catalog import Catalog
572
-
573
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
574
- self._check_is_dropped()
575
- # verify kwargs
576
- if self._tbl_version.get().is_snapshot:
577
- raise excs.Error('Cannot add column to a snapshot.')
578
- # verify kwargs and construct column schema dict
579
- if len(kwargs) != 1:
580
- raise excs.Error(
581
- f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
582
- f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
583
- )
584
- col_type = next(iter(kwargs.values()))
585
- if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
586
- raise excs.Error(
587
- 'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
588
- )
589
- return self.add_columns(kwargs, if_exists=if_exists)
598
+ # verify kwargs and construct column schema dict
599
+ if len(kwargs) != 1:
600
+ raise excs.Error(
601
+ f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
602
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
603
+ )
604
+ col_type = next(iter(kwargs.values()))
605
+ if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
606
+ raise excs.Error(
607
+ 'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
608
+ )
609
+ return self.add_columns(kwargs, if_exists=if_exists)
590
610
 
591
611
  def add_computed_column(
592
612
  self,
593
613
  *,
594
- stored: Optional[bool] = None,
614
+ stored: bool | None = None,
615
+ destination: str | Path | None = None,
595
616
  print_stats: bool = False,
596
617
  on_error: Literal['abort', 'ignore'] = 'abort',
597
618
  if_exists: Literal['error', 'ignore', 'replace'] = 'error',
@@ -603,6 +624,7 @@ class Table(SchemaObject):
603
624
  Args:
604
625
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
605
626
  stored: Whether the column is materialized and stored or computed on demand.
627
+ destination: An object store reference for persisting computed files.
606
628
  print_stats: If `True`, print execution metrics during evaluation.
607
629
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
608
630
  row.
@@ -610,7 +632,7 @@ class Table(SchemaObject):
610
632
  - `'abort'`: an exception will be raised and the column will not be added.
611
633
  - `'ignore'`: execution will continue and the column will be added. Any rows
612
634
  with errors will have a `None` value for the column, with information about the error stored in the
613
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
635
+ corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
614
636
  if_exists: Determines the behavior if the column already exists. Must be one of the following:
615
637
 
616
638
  - `'error'`: an exception will be raised.
@@ -637,31 +659,32 @@ class Table(SchemaObject):
637
659
  """
638
660
  from pixeltable.catalog import Catalog
639
661
 
640
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
641
- self._check_is_dropped()
642
- if self.get_metadata()['is_snapshot']:
643
- raise excs.Error('Cannot add column to a snapshot.')
662
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
663
+ self.__check_mutable('add columns to')
644
664
  if len(kwargs) != 1:
645
665
  raise excs.Error(
646
666
  f'add_computed_column() requires exactly one keyword argument of the form '
647
- '"column-name=type|value-expression"; '
648
- f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
667
+ '`col_name=col_type` or `col_name=expression`; '
668
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
649
669
  )
650
670
  col_name, spec = next(iter(kwargs.items()))
651
671
  if not is_valid_identifier(col_name):
652
- raise excs.Error(f'Invalid column name: {col_name!r}')
672
+ raise excs.Error(f'Invalid column name: {col_name}')
653
673
 
654
674
  col_schema: dict[str, Any] = {'value': spec}
655
675
  if stored is not None:
656
676
  col_schema['stored'] = stored
657
677
 
678
+ if destination is not None:
679
+ col_schema['destination'] = destination
680
+
658
681
  # Raise an error if the column expression refers to a column error property
659
682
  if isinstance(spec, exprs.Expr):
660
683
  for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
661
- if e.is_error_prop():
684
+ if e.is_cellmd_prop():
662
685
  raise excs.Error(
663
- 'Use of a reference to an error property of another column is not allowed in a computed '
664
- f'column. The specified computation for this column contains this reference: `{e!r}`'
686
+ f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
687
+ f'is not allowed in a computed column.'
665
688
  )
666
689
 
667
690
  # handle existing columns based on if_exists parameter
@@ -669,16 +692,18 @@ class Table(SchemaObject):
669
692
  [col_name], IfExistsParam.validated(if_exists, 'if_exists')
670
693
  )
671
694
  # if the column to add already exists and user asked to ignore
672
- # exiting column, there's nothing to do.
695
+ # existing column, there's nothing to do.
696
+ result = UpdateStatus()
673
697
  if len(cols_to_ignore) != 0:
674
698
  assert cols_to_ignore[0] == col_name
675
- return UpdateStatus()
699
+ return result
676
700
 
677
701
  new_col = self._create_columns({col_name: col_schema})[0]
678
702
  self._verify_column(new_col)
679
- status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
703
+ assert self._tbl_version is not None
704
+ result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
680
705
  FileCache.get().emit_eviction_warnings()
681
- return status
706
+ return result
682
707
 
683
708
  @classmethod
684
709
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
@@ -688,40 +713,45 @@ class Table(SchemaObject):
688
713
  (on account of containing Python Callables or Exprs).
689
714
  """
690
715
  assert isinstance(spec, dict)
691
- valid_keys = {'type', 'value', 'stored', 'media_validation'}
716
+ valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
692
717
  for k in spec:
693
718
  if k not in valid_keys:
694
- raise excs.Error(f'Column {name}: invalid key {k!r}')
719
+ raise excs.Error(f'Column {name!r}: invalid key {k!r}')
695
720
 
696
721
  if 'type' not in spec and 'value' not in spec:
697
- raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
722
+ raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
698
723
 
699
724
  if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
700
- raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
725
+ raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
701
726
 
702
727
  if 'value' in spec:
703
728
  value_expr = exprs.Expr.from_object(spec['value'])
704
729
  if value_expr is None:
705
- raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
730
+ raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
706
731
  if 'type' in spec:
707
- raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
732
+ raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
708
733
 
709
734
  if 'media_validation' in spec:
710
- _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
735
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
711
736
 
712
737
  if 'stored' in spec and not isinstance(spec['stored'], bool):
713
- raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
738
+ raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
739
+
740
+ d = spec.get('destination')
741
+ if d is not None and not isinstance(d, (str, Path)):
742
+ raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
714
743
 
715
744
  @classmethod
716
745
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
717
746
  """Construct list of Columns, given schema"""
718
747
  columns: list[Column] = []
719
748
  for name, spec in schema.items():
720
- col_type: Optional[ts.ColumnType] = None
721
- value_expr: Optional[exprs.Expr] = None
749
+ col_type: ts.ColumnType | None = None
750
+ value_expr: exprs.Expr | None = None
722
751
  primary_key: bool = False
723
- media_validation: Optional[catalog.MediaValidation] = None
752
+ media_validation: catalog.MediaValidation | None = None
724
753
  stored = True
754
+ destination: str | None = None
725
755
 
726
756
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
727
757
  col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
@@ -746,6 +776,7 @@ class Table(SchemaObject):
746
776
  media_validation = (
747
777
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
748
778
  )
779
+ destination = spec.get('destination')
749
780
  else:
750
781
  raise excs.Error(f'Invalid value for column {name!r}')
751
782
 
@@ -756,41 +787,46 @@ class Table(SchemaObject):
756
787
  stored=stored,
757
788
  is_pk=primary_key,
758
789
  media_validation=media_validation,
790
+ destination=destination,
759
791
  )
792
+ # Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
793
+ # media destination, it gets validated at this time.
794
+ ObjectOps.validate_destination(column.destination, column.name)
760
795
  columns.append(column)
796
+
761
797
  return columns
762
798
 
763
799
  @classmethod
764
800
  def validate_column_name(cls, name: str) -> None:
765
- """Check that a name is usable as a pixeltalbe column name"""
801
+ """Check that a name is usable as a pixeltable column name"""
766
802
  if is_system_column_name(name) or is_python_keyword(name):
767
803
  raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
768
804
  if not is_valid_identifier(name):
769
- raise excs.Error(f'Invalid column name: {name!r}')
805
+ raise excs.Error(f'Invalid column name: {name}')
770
806
 
771
807
  @classmethod
772
808
  def _verify_column(cls, col: Column) -> None:
773
809
  """Check integrity of user-supplied Column and supply defaults"""
774
810
  cls.validate_column_name(col.name)
775
811
  if col.stored is False and not col.is_computed:
776
- raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
812
+ raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
777
813
  if col.stored is False and col.has_window_fn_call():
778
814
  raise excs.Error(
779
815
  (
780
- f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
816
+ f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
781
817
  f'streaming function'
782
818
  )
783
819
  )
820
+ if col._explicit_destination is not None and not (col.stored and col.is_computed):
821
+ raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
784
822
 
785
823
  @classmethod
786
824
  def _verify_schema(cls, schema: list[Column]) -> None:
787
825
  """Check integrity of user-supplied schema and set defaults"""
788
- column_names: set[str] = set()
789
826
  for col in schema:
790
827
  cls._verify_column(col)
791
- column_names.add(col.name)
792
828
 
793
- def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
829
+ def drop_column(self, column: str | ColumnRef, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
794
830
  """Drop a column from the table.
795
831
 
796
832
  Args:
@@ -822,54 +858,86 @@ class Table(SchemaObject):
822
858
  """
823
859
  from pixeltable.catalog import Catalog
824
860
 
825
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
826
- self._check_is_dropped()
827
- if self._tbl_version_path.is_snapshot():
828
- raise excs.Error('Cannot drop column from a snapshot.')
861
+ cat = Catalog.get()
862
+
863
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
864
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
865
+ self.__check_mutable('drop columns from')
829
866
  col: Column = None
830
867
  if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
831
868
 
832
869
  if isinstance(column, str):
833
- col = self._tbl_version_path.get_column(column, include_bases=False)
870
+ col = self._tbl_version_path.get_column(column)
834
871
  if col is None:
835
872
  if if_not_exists_ == IfNotExistsParam.ERROR:
836
- raise excs.Error(f'Column {column!r} unknown')
873
+ raise excs.Error(f'Unknown column: {column}')
837
874
  assert if_not_exists_ == IfNotExistsParam.IGNORE
838
875
  return
876
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
877
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
839
878
  col = self._tbl_version.get().cols_by_name[column]
840
879
  else:
841
- exists = self._tbl_version_path.has_column(column.col, include_bases=False)
880
+ exists = self._tbl_version_path.has_column(column.col)
842
881
  if not exists:
843
882
  if if_not_exists_ == IfNotExistsParam.ERROR:
844
883
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
845
884
  assert if_not_exists_ == IfNotExistsParam.IGNORE
846
885
  return
847
886
  col = column.col
887
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
888
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
848
889
 
849
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
890
+ dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
850
891
  if len(dependent_user_cols) > 0:
851
892
  raise excs.Error(
852
- f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
893
+ f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
853
894
  f'{", ".join(c.name for c in dependent_user_cols)}'
854
895
  )
855
896
 
897
+ views = self._get_views(recursive=True, mutable_only=True)
898
+
899
+ # See if any view predicates depend on this column
900
+ dependent_views: list[tuple[Table, exprs.Expr]] = []
901
+ for view in views:
902
+ if view._tbl_version is not None:
903
+ predicate = view._tbl_version.get().predicate
904
+ if predicate is not None:
905
+ for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
906
+ if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
907
+ dependent_views.append((view, predicate))
908
+
909
+ if len(dependent_views) > 0:
910
+ dependent_views_str = '\n'.join(
911
+ f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
912
+ )
913
+ raise excs.Error(
914
+ f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
915
+ )
916
+
856
917
  # See if this column has a dependent store. We need to look through all stores in all
857
918
  # (transitive) views of this table.
919
+ col_handle = col.handle
858
920
  dependent_stores = [
859
921
  (view, store)
860
- for view in (self, *self._get_views(recursive=True))
922
+ for view in (self, *views)
861
923
  for store in view._tbl_version.get().external_stores.values()
862
- if col in store.get_local_columns()
924
+ if col_handle in store.get_local_columns()
863
925
  ]
864
926
  if len(dependent_stores) > 0:
865
927
  dependent_store_names = [
866
- store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
928
+ store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
867
929
  for view, store in dependent_stores
868
930
  ]
869
931
  raise excs.Error(
870
- f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
932
+ f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
871
933
  f'{", ".join(dependent_store_names)}'
872
934
  )
935
+ all_columns = self.columns()
936
+ if len(all_columns) == 1 and col.name == all_columns[0]:
937
+ raise excs.Error(
938
+ f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
939
+ f' Tables must have at least one column.'
940
+ )
873
941
 
874
942
  self._tbl_version.get().drop_column(col)
875
943
 
@@ -891,7 +959,7 @@ class Table(SchemaObject):
891
959
  """
892
960
  from pixeltable.catalog import Catalog
893
961
 
894
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
962
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
895
963
  self._tbl_version.get().rename_column(old_name, new_name)
896
964
 
897
965
  def _list_index_info_for_test(self) -> list[dict[str, Any]]:
@@ -902,7 +970,6 @@ class Table(SchemaObject):
902
970
  A list of index information, each containing the index's
903
971
  id, name, and the name of the column it indexes.
904
972
  """
905
- assert not self._is_dropped
906
973
  index_info = []
907
974
  for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
908
975
  index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
@@ -910,13 +977,13 @@ class Table(SchemaObject):
910
977
 
911
978
  def add_embedding_index(
912
979
  self,
913
- column: Union[str, ColumnRef],
980
+ column: str | ColumnRef,
914
981
  *,
915
- idx_name: Optional[str] = None,
916
- embedding: Optional[pxt.Function] = None,
917
- string_embed: Optional[pxt.Function] = None,
918
- image_embed: Optional[pxt.Function] = None,
919
- metric: str = 'cosine',
982
+ idx_name: str | None = None,
983
+ embedding: pxt.Function | None = None,
984
+ string_embed: pxt.Function | None = None,
985
+ image_embed: pxt.Function | None = None,
986
+ metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
920
987
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
921
988
  ) -> None:
922
989
  """
@@ -924,25 +991,28 @@ class Table(SchemaObject):
924
991
  rows are inserted into the table.
925
992
 
926
993
  To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
927
- Only `String` and `Image` columns are currently supported. Here's an example that uses a
928
- [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
994
+ Only `String` and `Image` columns are currently supported.
929
995
 
930
- >>> from pixeltable.functions.huggingface import clip
931
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
932
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
996
+ Examples:
997
+ Here's an example that uses a
998
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
999
+
1000
+ >>> from pixeltable.functions.huggingface import clip
1001
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1002
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
933
1003
 
934
- Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
1004
+ Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
935
1005
 
936
- >>> reference_img = PIL.Image.open('my_image.jpg')
937
- ... sim = tbl.img.similarity(reference_img)
938
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1006
+ >>> reference_img = PIL.Image.open('my_image.jpg')
1007
+ >>> sim = tbl.img.similarity(reference_img)
1008
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
939
1009
 
940
- If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
941
- performed using any of its supported types. In our example, CLIP supports both text and images, so we can
942
- also search for images using a text description:
1010
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1011
+ performed using any of its supported types. In our example, CLIP supports both text and images, so we can
1012
+ also search for images using a text description:
943
1013
 
944
- >>> sim = tbl.img.similarity('a picture of a train')
945
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1014
+ >>> sim = tbl.img.similarity('a picture of a train')
1015
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
946
1016
 
947
1017
  Args:
948
1018
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
@@ -973,9 +1043,9 @@ class Table(SchemaObject):
973
1043
  Add an index to the `img` column of the table `my_table`:
974
1044
 
975
1045
  >>> from pixeltable.functions.huggingface import clip
976
- ... tbl = pxt.get_table('my_table')
977
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
978
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1046
+ >>> tbl = pxt.get_table('my_table')
1047
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1048
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
979
1049
 
980
1050
  Alternatively, the `img` column may be specified by name:
981
1051
 
@@ -1001,9 +1071,8 @@ class Table(SchemaObject):
1001
1071
  """
1002
1072
  from pixeltable.catalog import Catalog
1003
1073
 
1004
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1005
- if self._tbl_version_path.is_snapshot():
1006
- raise excs.Error('Cannot add an index to a snapshot')
1074
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1075
+ self.__check_mutable('add an index to')
1007
1076
  col = self._resolve_column_parameter(column)
1008
1077
 
1009
1078
  if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
@@ -1014,7 +1083,7 @@ class Table(SchemaObject):
1014
1083
  raise excs.Error(f'Duplicate index name: {idx_name}')
1015
1084
  if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
1016
1085
  raise excs.Error(
1017
- f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1086
+ f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1018
1087
  )
1019
1088
  if if_exists_ == IfExistsParam.IGNORE:
1020
1089
  return
@@ -1027,10 +1096,9 @@ class Table(SchemaObject):
1027
1096
  if idx_name is not None:
1028
1097
  Table.validate_column_name(idx_name)
1029
1098
 
1030
- # create the EmbeddingIndex instance to verify args
1031
- idx = EmbeddingIndex(
1032
- col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
1033
- )
1099
+ # validate EmbeddingIndex args
1100
+ idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
1101
+ _ = idx.create_value_expr(col)
1034
1102
  _ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
1035
1103
  # TODO: how to deal with exceptions here? drop the index and raise?
1036
1104
  FileCache.get().emit_eviction_warnings()
@@ -1038,8 +1106,8 @@ class Table(SchemaObject):
1038
1106
  def drop_embedding_index(
1039
1107
  self,
1040
1108
  *,
1041
- column: Union[str, ColumnRef, None] = None,
1042
- idx_name: Optional[str] = None,
1109
+ column: str | ColumnRef | None = None,
1110
+ idx_name: str | None = None,
1043
1111
  if_not_exists: Literal['error', 'ignore'] = 'error',
1044
1112
  ) -> None:
1045
1113
  """
@@ -1090,7 +1158,7 @@ class Table(SchemaObject):
1090
1158
  if (column is None) == (idx_name is None):
1091
1159
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1092
1160
 
1093
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1161
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1094
1162
  col: Column = None
1095
1163
  if idx_name is None:
1096
1164
  col = self._resolve_column_parameter(column)
@@ -1098,15 +1166,15 @@ class Table(SchemaObject):
1098
1166
 
1099
1167
  self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1100
1168
 
1101
- def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
1169
+ def _resolve_column_parameter(self, column: str | ColumnRef) -> Column:
1102
1170
  """Resolve a column parameter to a Column object"""
1103
1171
  col: Column = None
1104
1172
  if isinstance(column, str):
1105
- col = self._tbl_version_path.get_column(column, include_bases=True)
1173
+ col = self._tbl_version_path.get_column(column)
1106
1174
  if col is None:
1107
- raise excs.Error(f'Column {column!r} unknown')
1175
+ raise excs.Error(f'Unknown column: {column}')
1108
1176
  elif isinstance(column, ColumnRef):
1109
- exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1177
+ exists = self._tbl_version_path.has_column(column.col)
1110
1178
  if not exists:
1111
1179
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1112
1180
  col = column.col
@@ -1117,8 +1185,8 @@ class Table(SchemaObject):
1117
1185
  def drop_index(
1118
1186
  self,
1119
1187
  *,
1120
- column: Union[str, ColumnRef, None] = None,
1121
- idx_name: Optional[str] = None,
1188
+ column: str | ColumnRef | None = None,
1189
+ idx_name: str | None = None,
1122
1190
  if_not_exists: Literal['error', 'ignore'] = 'error',
1123
1191
  ) -> None:
1124
1192
  """
@@ -1169,7 +1237,7 @@ class Table(SchemaObject):
1169
1237
  if (column is None) == (idx_name is None):
1170
1238
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1171
1239
 
1172
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1240
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1173
1241
  col: Column = None
1174
1242
  if idx_name is None:
1175
1243
  col = self._resolve_column_parameter(column)
@@ -1180,13 +1248,14 @@ class Table(SchemaObject):
1180
1248
  def _drop_index(
1181
1249
  self,
1182
1250
  *,
1183
- col: Optional[Column] = None,
1184
- idx_name: Optional[str] = None,
1185
- _idx_class: Optional[type[index.IndexBase]] = None,
1251
+ col: Column | None = None,
1252
+ idx_name: str | None = None,
1253
+ _idx_class: type[index.IndexBase] | None = None,
1186
1254
  if_not_exists: Literal['error', 'ignore'] = 'error',
1187
1255
  ) -> None:
1188
- if self._tbl_version_path.is_snapshot():
1189
- raise excs.Error('Cannot drop an index from a snapshot')
1256
+ from pixeltable.catalog import Catalog
1257
+
1258
+ self.__check_mutable('drop an index from')
1190
1259
  assert (col is None) != (idx_name is None)
1191
1260
 
1192
1261
  if idx_name is not None:
@@ -1198,9 +1267,10 @@ class Table(SchemaObject):
1198
1267
  return
1199
1268
  idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1200
1269
  else:
1201
- if col.tbl.id != self._tbl_version.id:
1270
+ if col.get_tbl().id != self._tbl_version.id:
1202
1271
  raise excs.Error(
1203
- f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name!r})'
1272
+ f'Column {col.name!r}: '
1273
+ f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
1204
1274
  )
1205
1275
  idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1206
1276
  if _idx_class is not None:
@@ -1212,14 +1282,17 @@ class Table(SchemaObject):
1212
1282
  assert if_not_exists_ == IfNotExistsParam.IGNORE
1213
1283
  return
1214
1284
  if len(idx_info_list) > 1:
1215
- raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1285
+ raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
1216
1286
  idx_info = idx_info_list[0]
1217
1287
 
1218
1288
  # Find out if anything depends on this index
1219
- dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1289
+ val_col = idx_info.val_col
1290
+ dependent_user_cols = [
1291
+ c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
1292
+ ]
1220
1293
  if len(dependent_user_cols) > 0:
1221
1294
  raise excs.Error(
1222
- f'Cannot drop index because the following columns depend on it:\n'
1295
+ f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
1223
1296
  f'{", ".join(c.name for c in dependent_user_cols)}'
1224
1297
  )
1225
1298
  self._tbl_version.get().drop_index(idx_info.id)
@@ -1230,8 +1303,8 @@ class Table(SchemaObject):
1230
1303
  source: TableDataSource,
1231
1304
  /,
1232
1305
  *,
1233
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1234
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1306
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1307
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1235
1308
  on_error: Literal['abort', 'ignore'] = 'abort',
1236
1309
  print_stats: bool = False,
1237
1310
  **kwargs: Any,
@@ -1245,11 +1318,11 @@ class Table(SchemaObject):
1245
1318
  @abc.abstractmethod
1246
1319
  def insert(
1247
1320
  self,
1248
- source: Optional[TableDataSource] = None,
1321
+ source: TableDataSource | None = None,
1249
1322
  /,
1250
1323
  *,
1251
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1252
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1324
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1325
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1253
1326
  on_error: Literal['abort', 'ignore'] = 'abort',
1254
1327
  print_stats: bool = False,
1255
1328
  **kwargs: Any,
@@ -1266,7 +1339,8 @@ class Table(SchemaObject):
1266
1339
  on_error: Literal['abort', 'ignore'] = 'abort',
1267
1340
  print_stats: bool = False,
1268
1341
  **kwargs: Any,
1269
- )```
1342
+ )
1343
+ ```
1270
1344
 
1271
1345
  To insert just a single row, you can use the more concise syntax:
1272
1346
 
@@ -1276,7 +1350,8 @@ class Table(SchemaObject):
1276
1350
  on_error: Literal['abort', 'ignore'] = 'abort',
1277
1351
  print_stats: bool = False,
1278
1352
  **kwargs: Any
1279
- )```
1353
+ )
1354
+ ```
1280
1355
 
1281
1356
  Args:
1282
1357
  source: A data source from which data can be imported.
@@ -1319,11 +1394,20 @@ class Table(SchemaObject):
1319
1394
  Insert rows from a CSV file:
1320
1395
 
1321
1396
  >>> tbl.insert(source='path/to/file.csv')
1397
+
1398
+ Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
1399
+
1400
+ >>> class MyModel(pydantic.BaseModel):
1401
+ ... a: int
1402
+ ... b: int
1403
+ ...
1404
+ ... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
1405
+ ... tbl.insert(models)
1322
1406
  """
1323
1407
  raise NotImplementedError
1324
1408
 
1325
1409
  def update(
1326
- self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
1410
+ self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
1327
1411
  ) -> UpdateStatus:
1328
1412
  """Update rows in this table.
1329
1413
 
@@ -1332,6 +1416,9 @@ class Table(SchemaObject):
1332
1416
  where: a predicate to filter rows to update.
1333
1417
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
1334
1418
 
1419
+ Returns:
1420
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
1421
+
1335
1422
  Examples:
1336
1423
  Set column `int_col` to 1 for all rows:
1337
1424
 
@@ -1351,10 +1438,11 @@ class Table(SchemaObject):
1351
1438
  """
1352
1439
  from pixeltable.catalog import Catalog
1353
1440
 
1354
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1355
- status = self._tbl_version.get().update(value_spec, where, cascade)
1441
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1442
+ self.__check_mutable('update')
1443
+ result = self._tbl_version.get().update(value_spec, where, cascade)
1356
1444
  FileCache.get().emit_eviction_warnings()
1357
- return status
1445
+ return result
1358
1446
 
1359
1447
  def batch_update(
1360
1448
  self,
@@ -1384,14 +1472,13 @@ class Table(SchemaObject):
1384
1472
  the row with new `id` 3 (assuming this key does not exist):
1385
1473
 
1386
1474
  >>> tbl.update(
1387
- [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1388
- if_not_exists='insert')
1475
+ ... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1476
+ ... if_not_exists='insert')
1389
1477
  """
1390
1478
  from pixeltable.catalog import Catalog
1391
1479
 
1392
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1393
- if self._tbl_version_path.is_snapshot():
1394
- raise excs.Error('Cannot update a snapshot')
1480
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1481
+ self.__check_mutable('update')
1395
1482
  rows = list(rows)
1396
1483
 
1397
1484
  row_updates: list[dict[Column, exprs.Expr]] = []
@@ -1415,10 +1502,12 @@ class Table(SchemaObject):
1415
1502
  col_names = {col.name for col in col_vals}
1416
1503
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1417
1504
  missing_cols = pk_col_names - {col.name for col in col_vals}
1418
- raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1505
+ raise excs.Error(
1506
+ f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
1507
+ )
1419
1508
  row_updates.append(col_vals)
1420
1509
 
1421
- status = self._tbl_version.get().batch_update(
1510
+ result = self._tbl_version.get().batch_update(
1422
1511
  row_updates,
1423
1512
  rowids,
1424
1513
  error_if_not_exists=if_not_exists == 'error',
@@ -1426,9 +1515,85 @@ class Table(SchemaObject):
1426
1515
  cascade=cascade,
1427
1516
  )
1428
1517
  FileCache.get().emit_eviction_warnings()
1429
- return status
1518
+ return result
1430
1519
 
1431
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1520
+ def recompute_columns(
1521
+ self,
1522
+ *columns: str | ColumnRef,
1523
+ where: 'exprs.Expr' | None = None,
1524
+ errors_only: bool = False,
1525
+ cascade: bool = True,
1526
+ ) -> UpdateStatus:
1527
+ """Recompute the values in one or more computed columns of this table.
1528
+
1529
+ Args:
1530
+ columns: The names or references of the computed columns to recompute.
1531
+ where: A predicate to filter rows to recompute.
1532
+ errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1533
+ `errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
1534
+ cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
1535
+
1536
+ Examples:
1537
+ Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
1538
+ depends on them:
1539
+
1540
+ >>> tbl.recompute_columns('c1', 'c2')
1541
+
1542
+ Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
1543
+ it:
1544
+
1545
+ >>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
1546
+
1547
+ Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
1548
+
1549
+ >>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
1550
+
1551
+ Recompute column `c1` and its dependents, but only for rows that have errors in it:
1552
+
1553
+ >>> tbl.recompute_columns('c1', errors_only=True)
1554
+ """
1555
+ from pixeltable.catalog import Catalog
1556
+
1557
+ cat = Catalog.get()
1558
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
1559
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1560
+ self.__check_mutable('recompute columns of')
1561
+ if len(columns) == 0:
1562
+ raise excs.Error('At least one column must be specified to recompute')
1563
+ if errors_only and len(columns) > 1:
1564
+ raise excs.Error('Cannot use errors_only=True with multiple columns')
1565
+
1566
+ col_names: list[str] = []
1567
+ for column in columns:
1568
+ col_name: str
1569
+ col: Column
1570
+ if isinstance(column, str):
1571
+ col = self._tbl_version_path.get_column(column)
1572
+ if col is None:
1573
+ raise excs.Error(f'Unknown column: {column}')
1574
+ col_name = column
1575
+ else:
1576
+ assert isinstance(column, ColumnRef)
1577
+ col = column.col
1578
+ if not self._tbl_version_path.has_column(col):
1579
+ raise excs.Error(f'Unknown column: {col.name}')
1580
+ col_name = col.name
1581
+ if not col.is_computed:
1582
+ raise excs.Error(f'Column {col_name!r} is not a computed column')
1583
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
1584
+ raise excs.Error(f'Cannot recompute column of a base: {col_name}')
1585
+ col_names.append(col_name)
1586
+
1587
+ if where is not None and not where.is_bound_by([self._tbl_version_path]):
1588
+ raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
1589
+
1590
+ result = self._tbl_version.get().recompute_columns(
1591
+ col_names, where=where, errors_only=errors_only, cascade=cascade
1592
+ )
1593
+ FileCache.get().emit_eviction_warnings()
1594
+ return result
1595
+
1596
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
1432
1597
  """Delete rows in this table.
1433
1598
 
1434
1599
  Args:
@@ -1453,14 +1618,63 @@ class Table(SchemaObject):
1453
1618
  """
1454
1619
  from pixeltable.catalog import Catalog
1455
1620
 
1456
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1457
- if self._tbl_version_path.is_snapshot():
1458
- raise excs.Error('Cannot revert a snapshot')
1621
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1622
+ self.__check_mutable('revert')
1459
1623
  self._tbl_version.get().revert()
1460
1624
  # remove cached md in order to force a reload on the next operation
1461
- self.__tbl_version_path.clear_cached_md()
1625
+ self._tbl_version_path.clear_cached_md()
1626
+
1627
+ def push(self, *, version: int | None = None) -> None:
1628
+ from pixeltable.share import push_replica
1629
+ from pixeltable.share.protocol import PxtUri
1630
+
1631
+ tbl_version = self._tbl_version.get()
1632
+ pxt_uri = tbl_version.pxt_uri
1633
+
1634
+ if tbl_version.is_replica:
1635
+ raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
1636
+ if pxt_uri is None:
1637
+ raise excs.Error(
1638
+ f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
1639
+ 'To publish it, use `pxt.publish()` instead.'
1640
+ )
1641
+
1642
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
1643
+ parsed_uri = PxtUri(uri=pxt_uri)
1644
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
1645
+ uuid_uri = str(uuid_uri_obj)
1646
+
1647
+ if version is None:
1648
+ # Push this version
1649
+ push_replica(uuid_uri, self)
1650
+ else:
1651
+ versioned_path = catalog.Path.parse(self._path())._replace(version=version)
1652
+ versioned_tbl = catalog.Catalog.get().get_table(versioned_path, IfNotExistsParam.IGNORE)
1653
+ if versioned_tbl is None:
1654
+ raise excs.Error(f'Table {self._name!r} has no known version {version}')
1655
+ assert versioned_tbl._id == self._id
1656
+ push_replica(uuid_uri, versioned_tbl)
1657
+
1658
+ def pull(self, *, version: int | None = None) -> None:
1659
+ from pixeltable.share import pull_replica
1660
+ from pixeltable.share.protocol import PxtUri
1661
+
1662
+ tbl_version = self._tbl_version_path.tbl_version.get()
1663
+ pxt_uri = tbl_version.pxt_uri
1664
+
1665
+ if not tbl_version.is_replica:
1666
+ raise excs.Error(
1667
+ f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
1668
+ )
1669
+ assert pxt_uri is not None
1670
+
1671
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
1672
+ parsed_uri = PxtUri(uri=pxt_uri)
1673
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db, version=version)
1674
+ uuid_uri = str(uuid_uri_obj)
1675
+
1676
+ pull_replica(self._path(), uuid_uri)
1462
1677
 
1463
- @property
1464
1678
  def external_stores(self) -> list[str]:
1465
1679
  return list(self._tbl_version.get().external_stores.keys())
1466
1680
 
@@ -1470,23 +1684,18 @@ class Table(SchemaObject):
1470
1684
  """
1471
1685
  from pixeltable.catalog import Catalog
1472
1686
 
1473
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1474
- if self._tbl_version.get().is_snapshot:
1475
- raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1476
- if store.name in self.external_stores:
1477
- raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1478
- _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
1687
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1688
+ self.__check_mutable('link an external store to')
1689
+ if store.name in self.external_stores():
1690
+ raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
1691
+ _logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
1479
1692
 
1480
1693
  store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
1481
1694
  self._tbl_version.get().link_external_store(store)
1482
- env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
1695
+ env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
1483
1696
 
1484
1697
  def unlink_external_stores(
1485
- self,
1486
- stores: Optional[str | list[str]] = None,
1487
- *,
1488
- delete_external_data: bool = False,
1489
- ignore_errors: bool = False,
1698
+ self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
1490
1699
  ) -> None:
1491
1700
  """
1492
1701
  Unlinks this table's external stores.
@@ -1501,9 +1710,10 @@ class Table(SchemaObject):
1501
1710
  """
1502
1711
  from pixeltable.catalog import Catalog
1503
1712
 
1504
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1505
- self._check_is_dropped()
1506
- all_stores = self.external_stores
1713
+ if not self._tbl_version_path.is_mutable():
1714
+ return
1715
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1716
+ all_stores = self.external_stores()
1507
1717
 
1508
1718
  if stores is None:
1509
1719
  stores = all_stores
@@ -1514,7 +1724,7 @@ class Table(SchemaObject):
1514
1724
  if not ignore_errors:
1515
1725
  for store_name in stores:
1516
1726
  if store_name not in all_stores:
1517
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store_name}')
1727
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
1518
1728
 
1519
1729
  for store_name in stores:
1520
1730
  store = self._tbl_version.get().external_stores[store_name]
@@ -1524,11 +1734,11 @@ class Table(SchemaObject):
1524
1734
  self._tbl_version.get().unlink_external_store(store)
1525
1735
  if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1526
1736
  store.delete()
1527
- env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
1737
+ env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
1528
1738
 
1529
1739
  def sync(
1530
- self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
1531
- ) -> 'pxt.io.SyncStatus':
1740
+ self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
1741
+ ) -> UpdateStatus:
1532
1742
  """
1533
1743
  Synchronizes this table with its linked external stores.
1534
1744
 
@@ -1540,9 +1750,13 @@ class Table(SchemaObject):
1540
1750
  """
1541
1751
  from pixeltable.catalog import Catalog
1542
1752
 
1543
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
1544
- self._check_is_dropped()
1545
- all_stores = self.external_stores
1753
+ if not self._tbl_version_path.is_mutable():
1754
+ return UpdateStatus()
1755
+ # we lock the entire tree starting at the root base table in order to ensure that all synced columns can
1756
+ # have their updates propagated down the tree
1757
+ base_tv = self._tbl_version_path.get_tbl_versions()[-1]
1758
+ with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
1759
+ all_stores = self.external_stores()
1546
1760
 
1547
1761
  if stores is None:
1548
1762
  stores = all_stores
@@ -1551,18 +1765,122 @@ class Table(SchemaObject):
1551
1765
 
1552
1766
  for store in stores:
1553
1767
  if store not in all_stores:
1554
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1768
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
1555
1769
 
1556
- sync_status = pxt.io.SyncStatus.empty()
1770
+ sync_status = UpdateStatus()
1557
1771
  for store in stores:
1558
1772
  store_obj = self._tbl_version.get().external_stores[store]
1559
1773
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
1560
- sync_status = sync_status.combine(store_sync_status)
1774
+ sync_status += store_sync_status
1561
1775
 
1562
1776
  return sync_status
1563
1777
 
1564
1778
  def __dir__(self) -> list[str]:
1565
- return list(super().__dir__()) + list(self._schema.keys())
1779
+ return list(super().__dir__()) + list(self._get_schema().keys())
1566
1780
 
1567
1781
  def _ipython_key_completions_(self) -> list[str]:
1568
- return list(self._schema.keys())
1782
+ return list(self._get_schema().keys())
1783
+
1784
+ def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
1785
+ """
1786
+ Returns information about versions of this table, most recent first.
1787
+
1788
+ `get_versions()` is intended for programmatic access to version metadata; for human-readable
1789
+ output, use [`history()`][pixeltable.Table.history] instead.
1790
+
1791
+ Args:
1792
+ n: if specified, will return at most `n` versions
1793
+
1794
+ Returns:
1795
+ A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
1796
+ recent first.
1797
+
1798
+ Examples:
1799
+ Retrieve metadata about all versions of the table `tbl`:
1800
+
1801
+ >>> tbl.get_versions()
1802
+
1803
+ Retrieve metadata about the most recent 5 versions of the table `tbl`:
1804
+
1805
+ >>> tbl.get_versions(n=5)
1806
+ """
1807
+ from pixeltable.catalog import Catalog
1808
+
1809
+ if n is None:
1810
+ n = 1_000_000_000
1811
+ if not isinstance(n, int) or n < 1:
1812
+ raise excs.Error(f'Invalid value for `n`: {n}')
1813
+
1814
+ # Retrieve the table history components from the catalog
1815
+ tbl_id = self._id
1816
+ # Collect an extra version, if available, to allow for computation of the first version's schema change
1817
+ vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
1818
+
1819
+ # Construct the metadata change description dictionary
1820
+ md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
1821
+ md_dict = MetadataUtils._create_md_change_dict(md_list)
1822
+
1823
+ # Construct report lines
1824
+ if len(vers_list) > n:
1825
+ assert len(vers_list) == n + 1
1826
+ over_count = 1
1827
+ else:
1828
+ over_count = 0
1829
+
1830
+ metadata_dicts: list[VersionMetadata] = []
1831
+ for vers_md in vers_list[0 : len(vers_list) - over_count]:
1832
+ version = vers_md.version_md.version
1833
+ schema_change = md_dict.get(version, None)
1834
+ update_status = vers_md.version_md.update_status
1835
+ if update_status is None:
1836
+ update_status = UpdateStatus()
1837
+ change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
1838
+ rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
1839
+ metadata_dicts.append(
1840
+ VersionMetadata(
1841
+ version=version,
1842
+ created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
1843
+ user=vers_md.version_md.user,
1844
+ change_type=change_type,
1845
+ inserts=rcs.ins_rows,
1846
+ updates=rcs.upd_rows,
1847
+ deletes=rcs.del_rows,
1848
+ errors=rcs.num_excs,
1849
+ computed=rcs.computed_values,
1850
+ schema_change=schema_change,
1851
+ )
1852
+ )
1853
+
1854
+ return metadata_dicts
1855
+
1856
+ def history(self, n: int | None = None) -> pd.DataFrame:
1857
+ """
1858
+ Returns a human-readable report about versions of this table.
1859
+
1860
+ `history()` is intended for human-readable output of version metadata; for programmatic access,
1861
+ use [`get_versions()`][pixeltable.Table.get_versions] instead.
1862
+
1863
+ Args:
1864
+ n: if specified, will return at most `n` versions
1865
+
1866
+ Returns:
1867
+ A report with information about each version, one per row, most recent first.
1868
+
1869
+ Examples:
1870
+ Report all versions of the table:
1871
+
1872
+ >>> tbl.history()
1873
+
1874
+ Report only the most recent 5 changes to the table:
1875
+
1876
+ >>> tbl.history(n=5)
1877
+ """
1878
+ versions = self.get_versions(n)
1879
+ assert len(versions) > 0
1880
+ return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
1881
+
1882
+ def __check_mutable(self, op_descr: str) -> None:
1883
+ if self._tbl_version_path.is_replica():
1884
+ raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a replica.')
1885
+ if self._tbl_version_path.is_snapshot():
1886
+ raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')