pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -2,22 +2,30 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import builtins
5
+ import datetime
5
6
  import json
6
7
  import logging
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
9
-
10
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
8
  from keyword import iskeyword as is_python_keyword
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, Iterable, Literal
12
11
  from uuid import UUID
13
12
 
14
13
  import pandas as pd
15
14
  import sqlalchemy as sql
15
+ from typing_extensions import overload
16
16
 
17
17
  import pixeltable as pxt
18
18
  from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
19
- from pixeltable.env import Env
19
+ from pixeltable.catalog.table_metadata import (
20
+ ColumnMetadata,
21
+ EmbeddingIndexParams,
22
+ IndexMetadata,
23
+ TableMetadata,
24
+ VersionMetadata,
25
+ )
20
26
  from pixeltable.metadata import schema
27
+ from pixeltable.metadata.utils import MetadataUtils
28
+ from pixeltable.utils.object_stores import ObjectOps
21
29
 
22
30
  from ..exprs import ColumnRef
23
31
  from ..utils.description_helper import DescriptionHelper
@@ -28,13 +36,16 @@ from .globals import (
28
36
  IfExistsParam,
29
37
  IfNotExistsParam,
30
38
  MediaValidation,
31
- UpdateStatus,
32
39
  is_system_column_name,
33
40
  is_valid_identifier,
34
41
  )
35
42
  from .schema_object import SchemaObject
36
43
  from .table_version_handle import TableVersionHandle
37
44
  from .table_version_path import TableVersionPath
45
+ from .update_status import UpdateStatus
46
+
47
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
48
+
38
49
 
39
50
  if TYPE_CHECKING:
40
51
  import torch.utils.data
@@ -42,6 +53,7 @@ if TYPE_CHECKING:
42
53
  import pixeltable.plan
43
54
  from pixeltable.globals import TableDataSource
44
55
 
56
+
45
57
  _logger = logging.getLogger('pixeltable')
46
58
 
47
59
 
@@ -49,26 +61,34 @@ class Table(SchemaObject):
49
61
  """
50
62
  A handle to a table, view, or snapshot. This class is the primary interface through which table operations
51
63
  (queries, insertions, updates, etc.) are performed in Pixeltable.
64
+
65
+ Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
66
+ FileCache.emit_eviction_warnings() at the end of the operation.
52
67
  """
53
68
 
54
- # Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
55
- # FileCache.emit_eviction_warnings() at the end of the operation.
69
+ # the chain of TableVersions needed to run queries and supply metadata (eg, schema)
70
+ _tbl_version_path: TableVersionPath
56
71
 
57
- _is_dropped: bool
58
- __tbl_version_path: TableVersionPath
72
+ # the physical TableVersion backing this Table; None for pure snapshots
73
+ _tbl_version: TableVersionHandle | None
59
74
 
60
75
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
61
76
  super().__init__(id, name, dir_id)
62
- self._is_dropped = False
63
- self.__tbl_version_path = tbl_version_path
64
-
65
- # @property
66
- # def _has_dependents(self) -> bool:
67
- # """Returns True if this table has any dependent views, or snapshots."""
68
- # return len(self._get_views(recursive=False)) > 0
77
+ self._tbl_version_path = tbl_version_path
78
+ self._tbl_version = None
69
79
 
70
80
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
71
- self._check_is_dropped()
81
+ old_name = self._name
82
+ old_dir_id = self._dir_id
83
+
84
+ cat = catalog.Catalog.get()
85
+
86
+ @cat.register_undo_action
87
+ def _() -> None:
88
+ # TODO: We should really be invalidating the Table instance and forcing a reload.
89
+ self._name = old_name
90
+ self._dir_id = old_dir_id
91
+
72
92
  super()._move(new_name, new_dir_id)
73
93
  conn = env.Env.get().conn
74
94
  stmt = sql.text(
@@ -81,71 +101,88 @@ class Table(SchemaObject):
81
101
  )
82
102
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
83
103
 
84
- def get_metadata(self) -> dict[str, Any]:
104
+ # this is duplicated from SchemaObject so that our API docs show the docstring for Table
105
+ def get_metadata(self) -> 'TableMetadata':
85
106
  """
86
107
  Retrieves metadata associated with this table.
87
108
 
88
109
  Returns:
89
- A dictionary containing the metadata, in the following format:
90
-
91
- ```python
92
- {
93
- 'base': None, # If this is a view or snapshot, will contain the name of its base table
94
- 'schema': {
95
- 'col1': StringType(),
96
- 'col2': IntType(),
97
- },
98
- 'is_replica': False,
99
- 'version': 22,
100
- 'schema_version': 1,
101
- 'comment': '',
102
- 'num_retained_versions': 10,
103
- 'is_view': False,
104
- 'is_snapshot': False,
105
- 'media_validation': 'on_write',
106
- }
107
- ```
110
+ A [TableMetadata][pixeltable.TableMetadata] instance containing this table's metadata.
108
111
  """
109
- self._check_is_dropped()
110
- with env.Env.get().begin_xact():
111
- md = super().get_metadata()
112
- md['base'] = self._base_table._path if self._base_table is not None else None
113
- md['schema'] = self._schema
114
- md['is_replica'] = self._tbl_version.get().is_replica
115
- md['version'] = self._version
116
- md['schema_version'] = self._tbl_version.get().schema_version
117
- md['comment'] = self._comment
118
- md['num_retained_versions'] = self._num_retained_versions
119
- md['media_validation'] = self._media_validation.name.lower()
120
- return md
112
+ from pixeltable.catalog import retry_loop
113
+
114
+ @retry_loop(for_write=False)
115
+ def op() -> 'TableMetadata':
116
+ return self._get_metadata()
117
+
118
+ return op()
119
+
120
+ def _get_metadata(self) -> TableMetadata:
121
+ tvp = self._tbl_version_path
122
+ tv = tvp.tbl_version.get()
123
+ columns = tvp.columns()
124
+ column_info: dict[str, ColumnMetadata] = {}
125
+ for col in columns:
126
+ column_info[col.name] = ColumnMetadata(
127
+ name=col.name,
128
+ type_=col.col_type._to_str(as_schema=True),
129
+ version_added=col.schema_version_add,
130
+ is_stored=col.is_stored,
131
+ is_primary_key=col.is_pk,
132
+ media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
133
+ computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
134
+ defined_in=col.get_tbl().name,
135
+ )
121
136
 
122
- @property
123
- def _version(self) -> int:
124
- """Return the version of this table. Used by tests to ascertain version changes."""
125
- return self._tbl_version.get().version
137
+ indices = tv.idxs_by_name.values()
138
+ index_info: dict[str, IndexMetadata] = {}
139
+ for info in indices:
140
+ if isinstance(info.idx, index.EmbeddingIndex):
141
+ col_ref = ColumnRef(info.col)
142
+ embedding = info.idx.embeddings[info.col.col_type._type](col_ref)
143
+ index_info[info.name] = IndexMetadata(
144
+ name=info.name,
145
+ columns=[info.col.name],
146
+ index_type='embedding',
147
+ parameters=EmbeddingIndexParams(
148
+ metric=info.idx.metric.name.lower(), # type: ignore[typeddict-item]
149
+ embedding=str(embedding),
150
+ embedding_functions=[str(fn) for fn in info.idx.embeddings.values()],
151
+ ),
152
+ )
126
153
 
127
- @property
128
- def _tbl_version(self) -> TableVersionHandle:
129
- """Return TableVersion for just this table."""
130
- return self._tbl_version_path.tbl_version
154
+ return TableMetadata(
155
+ name=self._name,
156
+ path=self._path(),
157
+ columns=column_info,
158
+ indices=index_info,
159
+ is_replica=tv.is_replica,
160
+ is_view=False,
161
+ is_snapshot=False,
162
+ version=self._get_version(),
163
+ version_created=datetime.datetime.fromtimestamp(tv.created_at, tz=datetime.timezone.utc),
164
+ schema_version=tvp.schema_version(),
165
+ comment=self._get_comment(),
166
+ media_validation=self._get_media_validation().name.lower(), # type: ignore[typeddict-item]
167
+ base=None,
168
+ )
131
169
 
132
- @property
133
- def _tbl_version_path(self) -> TableVersionPath:
134
- self._check_is_dropped()
135
- return self.__tbl_version_path
170
+ def _get_version(self) -> int:
171
+ """Return the version of this table. Used by tests to ascertain version changes."""
172
+ return self._tbl_version_path.version()
136
173
 
137
- def __hash__(self) -> int:
138
- return hash(self._tbl_version.id)
174
+ def _get_pxt_uri(self) -> str | None:
175
+ with catalog.Catalog.get().begin_xact(tbl_id=self._id):
176
+ return catalog.Catalog.get().get_additional_md(self._id).get('pxt_uri')
139
177
 
140
- def _check_is_dropped(self) -> None:
141
- if self._is_dropped:
142
- raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
178
+ def __hash__(self) -> int:
179
+ return hash(self._tbl_version_path.tbl_id)
143
180
 
144
181
  def __getattr__(self, name: str) -> 'exprs.ColumnRef':
145
182
  """Return a ColumnRef for the given name."""
146
183
  col = self._tbl_version_path.get_column(name)
147
184
  if col is None:
148
- raise AttributeError(f'Column {name!r} unknown')
185
+ raise AttributeError(f'Unknown column: {name}')
149
186
  return ColumnRef(col, reference_tbl=self._tbl_version_path)
150
187
 
151
188
  def __getitem__(self, name: str) -> 'exprs.ColumnRef':
@@ -163,137 +200,160 @@ class Table(SchemaObject):
163
200
  Returns:
164
201
  A list of view paths.
165
202
  """
166
- self._check_is_dropped()
167
- with env.Env.get().begin_xact():
168
- return [t._path for t in self._get_views(recursive=recursive)]
203
+ from pixeltable.catalog import retry_loop
169
204
 
170
- def _get_views(self, *, recursive: bool = True) -> list['Table']:
205
+ # we need retry_loop() here, because we end up loading Tables for the views
206
+ @retry_loop(tbl=self._tbl_version_path, for_write=False)
207
+ def op() -> list[str]:
208
+ return [t._path() for t in self._get_views(recursive=recursive)]
209
+
210
+ return op()
211
+
212
+ def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
171
213
  cat = catalog.Catalog.get()
172
214
  view_ids = cat.get_view_ids(self._id)
173
215
  views = [cat.get_table_by_id(id) for id in view_ids]
216
+ if mutable_only:
217
+ views = [t for t in views if t._tbl_version_path.is_mutable()]
174
218
  if recursive:
175
- views.extend([t for view in views for t in view._get_views(recursive=True)])
219
+ views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
176
220
  return views
177
221
 
178
- def _df(self) -> 'pxt.dataframe.DataFrame':
179
- """Return a DataFrame for this table."""
180
- # local import: avoid circular imports
181
- from pixeltable.plan import FromClause
182
-
183
- return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
184
-
185
- def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
222
+ def select(self, *items: Any, **named_items: Any) -> 'pxt.Query':
186
223
  """Select columns or expressions from this table.
187
224
 
188
- See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
225
+ See [`Query.select`][pixeltable.Query.select] for more details.
189
226
  """
190
- return self._df().select(*items, **named_items)
227
+ from pixeltable.catalog import Catalog
228
+ from pixeltable.plan import FromClause
229
+
230
+ query = pxt.Query(FromClause(tbls=[self._tbl_version_path]))
231
+ if len(items) == 0 and len(named_items) == 0:
232
+ return query # Select(*); no further processing is necessary
191
233
 
192
- def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
234
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
235
+ return query.select(*items, **named_items)
236
+
237
+ def where(self, pred: 'exprs.Expr') -> 'pxt.Query':
193
238
  """Filter rows from this table based on the expression.
194
239
 
195
- See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
240
+ See [`Query.where`][pixeltable.Query.where] for more details.
196
241
  """
197
- return self._df().where(pred)
242
+ from pixeltable.catalog import Catalog
243
+
244
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
245
+ return self.select().where(pred)
198
246
 
199
247
  def join(
200
- self,
201
- other: 'Table',
202
- *,
203
- on: Optional['exprs.Expr'] = None,
204
- how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
205
- ) -> 'pxt.DataFrame':
248
+ self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
249
+ ) -> 'pxt.Query':
206
250
  """Join this table with another table."""
207
- return self._df().join(other, on=on, how=how)
251
+ from pixeltable.catalog import Catalog
208
252
 
209
- def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
253
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
254
+ return self.select().join(other, on=on, how=how)
255
+
256
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.Query':
210
257
  """Order the rows of this table based on the expression.
211
258
 
212
- See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
259
+ See [`Query.order_by`][pixeltable.Query.order_by] for more details.
213
260
  """
214
- return self._df().order_by(*items, asc=asc)
261
+ from pixeltable.catalog import Catalog
262
+
263
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
264
+ return self.select().order_by(*items, asc=asc)
215
265
 
216
- def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
266
+ def group_by(self, *items: 'exprs.Expr') -> 'pxt.Query':
217
267
  """Group the rows of this table based on the expression.
218
268
 
219
- See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
269
+ See [`Query.group_by`][pixeltable.Query.group_by] for more details.
220
270
  """
221
- return self._df().group_by(*items)
271
+ from pixeltable.catalog import Catalog
222
272
 
223
- def distinct(self) -> 'pxt.DataFrame':
273
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
274
+ return self.select().group_by(*items)
275
+
276
+ def distinct(self) -> 'pxt.Query':
224
277
  """Remove duplicate rows from table."""
225
- return self._df().distinct()
278
+ return self.select().distinct()
279
+
280
+ def limit(self, n: int) -> 'pxt.Query':
281
+ return self.select().limit(n)
226
282
 
227
- def limit(self, n: int) -> 'pxt.DataFrame':
228
- return self._df().limit(n)
283
+ def sample(
284
+ self,
285
+ n: int | None = None,
286
+ n_per_stratum: int | None = None,
287
+ fraction: float | None = None,
288
+ seed: int | None = None,
289
+ stratify_by: Any = None,
290
+ ) -> pxt.Query:
291
+ """Choose a shuffled sample of rows
292
+
293
+ See [`Query.sample`][pixeltable.Query.sample] for more details.
294
+ """
295
+ return self.select().sample(
296
+ n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
297
+ )
229
298
 
230
- def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
299
+ def collect(self) -> 'pxt._query.ResultSet':
231
300
  """Return rows from this table."""
232
- return self._df().collect()
301
+ return self.select().collect()
233
302
 
234
- def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
303
+ def show(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
235
304
  """Return rows from this table."""
236
- return self._df().show(*args, **kwargs)
305
+ return self.select().show(*args, **kwargs)
237
306
 
238
- def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
307
+ def head(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
239
308
  """Return the first n rows inserted into this table."""
240
- return self._df().head(*args, **kwargs)
309
+ return self.select().head(*args, **kwargs)
241
310
 
242
- def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
311
+ def tail(self, *args: Any, **kwargs: Any) -> 'pxt._query.ResultSet':
243
312
  """Return the last n rows inserted into this table."""
244
- return self._df().tail(*args, **kwargs)
313
+ return self.select().tail(*args, **kwargs)
245
314
 
246
315
  def count(self) -> int:
247
316
  """Return the number of rows in this table."""
248
- return self._df().count()
317
+ return self.select().count()
249
318
 
250
- @property
251
319
  def columns(self) -> list[str]:
252
320
  """Return the names of the columns in this table."""
253
321
  cols = self._tbl_version_path.columns()
254
322
  return [c.name for c in cols]
255
323
 
256
- @property
257
- def _schema(self) -> dict[str, ts.ColumnType]:
324
+ def _get_schema(self) -> dict[str, ts.ColumnType]:
258
325
  """Return the schema (column names and column types) of this table."""
259
326
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
260
327
 
261
- @property
262
- def base_table(self) -> Optional['Table']:
263
- with env.Env.get().begin_xact():
264
- return self._base_table
328
+ def get_base_table(self) -> 'Table' | None:
329
+ return self._get_base_table()
265
330
 
266
- @property
267
331
  @abc.abstractmethod
268
- def _base_table(self) -> Optional['Table']:
269
- """The base's Table instance"""
332
+ def _get_base_table(self) -> 'Table' | None:
333
+ """The base's Table instance. Requires a transaction context"""
270
334
 
271
- @property
272
- def _base_tables(self) -> list['Table']:
273
- """The ancestor list of bases of this table, starting with its immediate base."""
274
- bases = []
275
- base = self._base_table
335
+ def _get_base_tables(self) -> list['Table']:
336
+ """The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
337
+ bases: list[Table] = []
338
+ base = self._get_base_table()
276
339
  while base is not None:
277
340
  bases.append(base)
278
- base = base._base_table
341
+ base = base._get_base_table()
279
342
  return bases
280
343
 
281
344
  @property
282
345
  @abc.abstractmethod
283
- def _effective_base_versions(self) -> list[Optional[int]]:
346
+ def _effective_base_versions(self) -> list[int | None]:
284
347
  """The effective versions of the ancestor bases, starting with its immediate base."""
285
348
 
286
- @property
287
- def _comment(self) -> str:
288
- return self._tbl_version.get().comment
349
+ def _get_comment(self) -> str:
350
+ return self._tbl_version_path.comment()
289
351
 
290
- @property
291
- def _num_retained_versions(self) -> int:
292
- return self._tbl_version.get().num_retained_versions
352
+ def _get_num_retained_versions(self) -> int:
353
+ return self._tbl_version_path.num_retained_versions()
293
354
 
294
- @property
295
- def _media_validation(self) -> MediaValidation:
296
- return self._tbl_version.get().media_validation
355
+ def _get_media_validation(self) -> MediaValidation:
356
+ return self._tbl_version_path.media_validation()
297
357
 
298
358
  def __repr__(self) -> str:
299
359
  return self._descriptors().to_string()
@@ -305,20 +365,23 @@ class Table(SchemaObject):
305
365
  """
306
366
  Constructs a list of descriptors for this table that can be pretty-printed.
307
367
  """
308
- helper = DescriptionHelper()
309
- helper.append(self._table_descriptor())
310
- helper.append(self._col_descriptor())
311
- idxs = self._index_descriptor()
312
- if not idxs.empty:
313
- helper.append(idxs)
314
- stores = self._external_store_descriptor()
315
- if not stores.empty:
316
- helper.append(stores)
317
- if self._comment:
318
- helper.append(f'COMMENT: {self._comment}')
319
- return helper
320
-
321
- def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
368
+ from pixeltable.catalog import Catalog
369
+
370
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
371
+ helper = DescriptionHelper()
372
+ helper.append(self._table_descriptor())
373
+ helper.append(self._col_descriptor())
374
+ idxs = self._index_descriptor()
375
+ if not idxs.empty:
376
+ helper.append(idxs)
377
+ stores = self._external_store_descriptor()
378
+ if not stores.empty:
379
+ helper.append(stores)
380
+ if self._get_comment():
381
+ helper.append(f'COMMENT: {self._get_comment()}')
382
+ return helper
383
+
384
+ def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
322
385
  return pd.DataFrame(
323
386
  {
324
387
  'Column Name': col.name,
@@ -329,29 +392,28 @@ class Table(SchemaObject):
329
392
  if columns is None or col.name in columns
330
393
  )
331
394
 
332
- def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
395
+ def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
333
396
  from pixeltable import index
334
397
 
398
+ if self._tbl_version is None:
399
+ return pd.DataFrame([])
335
400
  pd_rows = []
336
401
  for name, info in self._tbl_version.get().idxs_by_name.items():
337
402
  if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
338
- display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
339
- if info.idx.string_embed is not None and info.idx.image_embed is not None:
340
- embed_str = f'{display_embed} (+1)'
341
- else:
342
- embed_str = str(display_embed)
403
+ col_ref = ColumnRef(info.col)
404
+ embedding = info.idx.embeddings[info.col.col_type._type](col_ref)
343
405
  row = {
344
406
  'Index Name': name,
345
407
  'Column': info.col.name,
346
408
  'Metric': str(info.idx.metric.name.lower()),
347
- 'Embedding': embed_str,
409
+ 'Embedding': str(embedding),
348
410
  }
349
411
  pd_rows.append(row)
350
412
  return pd.DataFrame(pd_rows)
351
413
 
352
414
  def _external_store_descriptor(self) -> pd.DataFrame:
353
415
  pd_rows = []
354
- for name, store in self._tbl_version.get().external_stores.items():
416
+ for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
355
417
  row = {'External Store': name, 'Type': type(store).__name__}
356
418
  pd_rows.append(row)
357
419
  return pd.DataFrame(pd_rows)
@@ -360,7 +422,6 @@ class Table(SchemaObject):
360
422
  """
361
423
  Print the table schema.
362
424
  """
363
- self._check_is_dropped()
364
425
  if getattr(builtins, '__IPYTHON__', False):
365
426
  from IPython.display import Markdown, display
366
427
 
@@ -368,31 +429,28 @@ class Table(SchemaObject):
368
429
  else:
369
430
  print(repr(self))
370
431
 
371
- def _drop(self) -> None:
372
- self._check_is_dropped()
373
- self._tbl_version.get().drop()
374
- self._is_dropped = True
375
-
376
432
  # TODO Factor this out into a separate module.
377
433
  # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
378
434
  def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
379
435
  """Return a PyTorch Dataset for this table.
380
- See DataFrame.to_pytorch_dataset()
436
+ See Query.to_pytorch_dataset()
381
437
  """
382
- return self._df().to_pytorch_dataset(image_format=image_format)
438
+ return self.select().to_pytorch_dataset(image_format=image_format)
383
439
 
384
440
  def to_coco_dataset(self) -> Path:
385
441
  """Return the path to a COCO json file for this table.
386
- See DataFrame.to_coco_dataset()
442
+ See Query.to_coco_dataset()
387
443
  """
388
- return self._df().to_coco_dataset()
444
+ return self.select().to_coco_dataset()
389
445
 
390
446
  def _column_has_dependents(self, col: Column) -> bool:
391
447
  """Returns True if the column has dependents, False otherwise."""
392
448
  assert col is not None
393
- assert col.name in self._schema
394
- if any(c.name is not None for c in col.dependent_cols):
449
+ assert col.name in self._get_schema()
450
+ cat = catalog.Catalog.get()
451
+ if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
395
452
  return True
453
+ assert self._tbl_version is not None
396
454
  return any(
397
455
  col in store.get_local_columns()
398
456
  for view in (self, *self._get_views(recursive=True))
@@ -404,13 +462,13 @@ class Table(SchemaObject):
404
462
 
405
463
  If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
406
464
  """
407
- assert not self.get_metadata()['is_snapshot']
408
- existing_col_names = set(self._schema.keys())
465
+ assert self._tbl_version is not None
466
+ existing_col_names = set(self._get_schema().keys())
409
467
  cols_to_ignore = []
410
468
  for new_col_name in new_col_names:
411
469
  if new_col_name in existing_col_names:
412
470
  if if_exists == IfExistsParam.ERROR:
413
- raise excs.Error(f'Duplicate column name: {new_col_name!r}')
471
+ raise excs.Error(f'Duplicate column name: {new_col_name}')
414
472
  elif if_exists == IfExistsParam.IGNORE:
415
473
  cols_to_ignore.append(new_col_name)
416
474
  elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
@@ -433,15 +491,14 @@ class Table(SchemaObject):
433
491
 
434
492
  def add_columns(
435
493
  self,
436
- schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]],
494
+ schema: dict[str, ts.ColumnType | builtins.type | _GenericAlias],
437
495
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
438
496
  ) -> UpdateStatus:
439
497
  """
440
498
  Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
441
499
  columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
442
500
 
443
- The format of the `schema` argument is identical to the format of the schema in a call to
444
- [`create_table()`][pixeltable.globals.create_table].
501
+ The format of the `schema` argument is a dict mapping column names to their types.
445
502
 
446
503
  Args:
447
504
  schema: A dictionary mapping column names to types.
@@ -473,15 +530,16 @@ class Table(SchemaObject):
473
530
  ... }
474
531
  ... tbl.add_columns(schema)
475
532
  """
476
- self._check_is_dropped()
477
- if self.get_metadata()['is_snapshot']:
478
- raise excs.Error('Cannot add column to a snapshot.')
479
- col_schema = {
480
- col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
481
- for col_name, spec in schema.items()
482
- }
483
-
484
- with Env.get().begin_xact():
533
+ from pixeltable.catalog import Catalog
534
+
535
+ # lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
536
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
537
+ self.__check_mutable('add columns to')
538
+ col_schema = {
539
+ col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
540
+ for col_name, spec in schema.items()
541
+ }
542
+
485
543
  # handle existing columns based on if_exists parameter
486
544
  cols_to_ignore = self._ignore_or_drop_existing_columns(
487
545
  list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
@@ -491,20 +549,22 @@ class Table(SchemaObject):
491
549
  for cname in cols_to_ignore:
492
550
  assert cname in col_schema
493
551
  del col_schema[cname]
552
+ result = UpdateStatus()
494
553
  if len(col_schema) == 0:
495
- return UpdateStatus()
554
+ return result
496
555
  new_cols = self._create_columns(col_schema)
497
556
  for new_col in new_cols:
498
557
  self._verify_column(new_col)
499
- status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
558
+ assert self._tbl_version is not None
559
+ result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
500
560
  FileCache.get().emit_eviction_warnings()
501
- return status
561
+ return result
502
562
 
503
563
  def add_column(
504
564
  self,
505
565
  *,
506
566
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
507
- **kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr],
567
+ **kwargs: ts.ColumnType | builtins.type | _GenericAlias | exprs.Expr,
508
568
  ) -> UpdateStatus:
509
569
  """
510
570
  Adds an ordinary (non-computed) column to the table.
@@ -515,7 +575,7 @@ class Table(SchemaObject):
515
575
 
516
576
  - `'error'`: an exception will be raised.
517
577
  - `'ignore'`: do nothing and return.
518
- - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
578
+ - `'replace'` or `'replace_force'`: drop the existing column and add the new column, if it has
519
579
  no dependents.
520
580
 
521
581
  Returns:
@@ -534,15 +594,11 @@ class Table(SchemaObject):
534
594
 
535
595
  >>> tbl.add_columns({'new_col': pxt.Int})
536
596
  """
537
- self._check_is_dropped()
538
- # verify kwargs
539
- if self._tbl_version.get().is_snapshot:
540
- raise excs.Error('Cannot add column to a snapshot.')
541
597
  # verify kwargs and construct column schema dict
542
598
  if len(kwargs) != 1:
543
599
  raise excs.Error(
544
- f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
545
- f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
600
+ f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
601
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
546
602
  )
547
603
  col_type = next(iter(kwargs.values()))
548
604
  if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
@@ -554,7 +610,8 @@ class Table(SchemaObject):
554
610
  def add_computed_column(
555
611
  self,
556
612
  *,
557
- stored: Optional[bool] = None,
613
+ stored: bool | None = None,
614
+ destination: str | Path | None = None,
558
615
  print_stats: bool = False,
559
616
  on_error: Literal['abort', 'ignore'] = 'abort',
560
617
  if_exists: Literal['error', 'ignore', 'replace'] = 'error',
@@ -566,6 +623,7 @@ class Table(SchemaObject):
566
623
  Args:
567
624
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
568
625
  stored: Whether the column is materialized and stored or computed on demand.
626
+ destination: An object store reference for persisting computed files.
569
627
  print_stats: If `True`, print execution metrics during evaluation.
570
628
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
571
629
  row.
@@ -573,7 +631,7 @@ class Table(SchemaObject):
573
631
  - `'abort'`: an exception will be raised and the column will not be added.
574
632
  - `'ignore'`: execution will continue and the column will be added. Any rows
575
633
  with errors will have a `None` value for the column, with information about the error stored in the
576
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
634
+ corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
577
635
  if_exists: Determines the behavior if the column already exists. Must be one of the following:
578
636
 
579
637
  - `'error'`: an exception will be raised.
@@ -598,48 +656,53 @@ class Table(SchemaObject):
598
656
 
599
657
  >>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
600
658
  """
601
- self._check_is_dropped()
602
- if self.get_metadata()['is_snapshot']:
603
- raise excs.Error('Cannot add column to a snapshot.')
604
- if len(kwargs) != 1:
605
- raise excs.Error(
606
- f'add_computed_column() requires exactly one keyword argument of the form '
607
- '"column-name=type|value-expression"; '
608
- f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
609
- )
610
- col_name, spec = next(iter(kwargs.items()))
611
- if not is_valid_identifier(col_name):
612
- raise excs.Error(f'Invalid column name: {col_name!r}')
613
-
614
- col_schema: dict[str, Any] = {'value': spec}
615
- if stored is not None:
616
- col_schema['stored'] = stored
617
-
618
- # Raise an error if the column expression refers to a column error property
619
- if isinstance(spec, exprs.Expr):
620
- for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
621
- if e.is_error_prop():
622
- raise excs.Error(
623
- 'Use of a reference to an error property of another column is not allowed in a computed '
624
- f'column. The specified computation for this column contains this reference: `{e!r}`'
625
- )
659
+ from pixeltable.catalog import Catalog
660
+
661
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
662
+ self.__check_mutable('add columns to')
663
+ if len(kwargs) != 1:
664
+ raise excs.Error(
665
+ f'add_computed_column() requires exactly one keyword argument of the form '
666
+ '`col_name=col_type` or `col_name=expression`; '
667
+ f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
668
+ )
669
+ col_name, spec = next(iter(kwargs.items()))
670
+ if not is_valid_identifier(col_name):
671
+ raise excs.Error(f'Invalid column name: {col_name}')
672
+
673
+ col_schema: dict[str, Any] = {'value': spec}
674
+ if stored is not None:
675
+ col_schema['stored'] = stored
676
+
677
+ if destination is not None:
678
+ col_schema['destination'] = destination
679
+
680
+ # Raise an error if the column expression refers to a column error property
681
+ if isinstance(spec, exprs.Expr):
682
+ for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
683
+ if e.is_cellmd_prop():
684
+ raise excs.Error(
685
+ f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
686
+ f'is not allowed in a computed column.'
687
+ )
626
688
 
627
- with Env.get().begin_xact():
628
689
  # handle existing columns based on if_exists parameter
629
690
  cols_to_ignore = self._ignore_or_drop_existing_columns(
630
691
  [col_name], IfExistsParam.validated(if_exists, 'if_exists')
631
692
  )
632
693
  # if the column to add already exists and user asked to ignore
633
- # exiting column, there's nothing to do.
694
+ # existing column, there's nothing to do.
695
+ result = UpdateStatus()
634
696
  if len(cols_to_ignore) != 0:
635
697
  assert cols_to_ignore[0] == col_name
636
- return UpdateStatus()
698
+ return result
637
699
 
638
700
  new_col = self._create_columns({col_name: col_schema})[0]
639
701
  self._verify_column(new_col)
640
- status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
702
+ assert self._tbl_version is not None
703
+ result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
641
704
  FileCache.get().emit_eviction_warnings()
642
- return status
705
+ return result
643
706
 
644
707
  @classmethod
645
708
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
@@ -649,40 +712,45 @@ class Table(SchemaObject):
649
712
  (on account of containing Python Callables or Exprs).
650
713
  """
651
714
  assert isinstance(spec, dict)
652
- valid_keys = {'type', 'value', 'stored', 'media_validation'}
715
+ valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
653
716
  for k in spec:
654
717
  if k not in valid_keys:
655
- raise excs.Error(f'Column {name}: invalid key {k!r}')
718
+ raise excs.Error(f'Column {name!r}: invalid key {k!r}')
656
719
 
657
720
  if 'type' not in spec and 'value' not in spec:
658
- raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
721
+ raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
659
722
 
660
723
  if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
661
- raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
724
+ raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
662
725
 
663
726
  if 'value' in spec:
664
727
  value_expr = exprs.Expr.from_object(spec['value'])
665
728
  if value_expr is None:
666
- raise excs.Error(f'Column {name}: value must be a Pixeltable expression.')
729
+ raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
667
730
  if 'type' in spec:
668
- raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
731
+ raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
669
732
 
670
733
  if 'media_validation' in spec:
671
- _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
734
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
672
735
 
673
736
  if 'stored' in spec and not isinstance(spec['stored'], bool):
674
- raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
737
+ raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
738
+
739
+ d = spec.get('destination')
740
+ if d is not None and not isinstance(d, (str, Path)):
741
+ raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
675
742
 
676
743
  @classmethod
677
744
  def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
678
745
  """Construct list of Columns, given schema"""
679
746
  columns: list[Column] = []
680
747
  for name, spec in schema.items():
681
- col_type: Optional[ts.ColumnType] = None
682
- value_expr: Optional[exprs.Expr] = None
748
+ col_type: ts.ColumnType | None = None
749
+ value_expr: exprs.Expr | None = None
683
750
  primary_key: bool = False
684
- media_validation: Optional[catalog.MediaValidation] = None
751
+ media_validation: catalog.MediaValidation | None = None
685
752
  stored = True
753
+ destination: str | None = None
686
754
 
687
755
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
688
756
  col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
@@ -707,6 +775,7 @@ class Table(SchemaObject):
707
775
  media_validation = (
708
776
  catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
709
777
  )
778
+ destination = spec.get('destination')
710
779
  else:
711
780
  raise excs.Error(f'Invalid value for column {name!r}')
712
781
 
@@ -717,41 +786,46 @@ class Table(SchemaObject):
717
786
  stored=stored,
718
787
  is_pk=primary_key,
719
788
  media_validation=media_validation,
789
+ destination=destination,
720
790
  )
791
+ # Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
792
+ # media destination, it gets validated at this time.
793
+ ObjectOps.validate_destination(column.destination, column.name)
721
794
  columns.append(column)
795
+
722
796
  return columns
723
797
 
724
798
  @classmethod
725
799
  def validate_column_name(cls, name: str) -> None:
726
- """Check that a name is usable as a pixeltalbe column name"""
800
+ """Check that a name is usable as a pixeltable column name"""
727
801
  if is_system_column_name(name) or is_python_keyword(name):
728
802
  raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
729
803
  if not is_valid_identifier(name):
730
- raise excs.Error(f'Invalid column name: {name!r}')
804
+ raise excs.Error(f'Invalid column name: {name}')
731
805
 
732
806
  @classmethod
733
807
  def _verify_column(cls, col: Column) -> None:
734
808
  """Check integrity of user-supplied Column and supply defaults"""
735
809
  cls.validate_column_name(col.name)
736
810
  if col.stored is False and not col.is_computed:
737
- raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
811
+ raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
738
812
  if col.stored is False and col.has_window_fn_call():
739
813
  raise excs.Error(
740
814
  (
741
- f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
815
+ f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
742
816
  f'streaming function'
743
817
  )
744
818
  )
819
+ if col._explicit_destination is not None and not (col.stored and col.is_computed):
820
+ raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
745
821
 
746
822
  @classmethod
747
823
  def _verify_schema(cls, schema: list[Column]) -> None:
748
824
  """Check integrity of user-supplied schema and set defaults"""
749
- column_names: set[str] = set()
750
825
  for col in schema:
751
826
  cls._verify_column(col)
752
- column_names.add(col.name)
753
827
 
754
- def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
828
+ def drop_column(self, column: str | ColumnRef, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
755
829
  """Drop a column from the table.
756
830
 
757
831
  Args:
@@ -781,53 +855,88 @@ class Table(SchemaObject):
781
855
  >>> tbl = pxt.get_table('my_table')
782
856
  ... tbl.drop_col(tbl.col, if_not_exists='ignore')
783
857
  """
784
- self._check_is_dropped()
785
- if self._tbl_version_path.is_snapshot():
786
- raise excs.Error('Cannot drop column from a snapshot.')
787
- col: Column = None
788
- if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
789
- if isinstance(column, str):
790
- col = self._tbl_version_path.get_column(column, include_bases=False)
791
- if col is None:
792
- if if_not_exists_ == IfNotExistsParam.ERROR:
793
- raise excs.Error(f'Column {column!r} unknown')
794
- assert if_not_exists_ == IfNotExistsParam.IGNORE
795
- return
796
- col = self._tbl_version.get().cols_by_name[column]
797
- else:
798
- exists = self._tbl_version_path.has_column(column.col, include_bases=False)
799
- if not exists:
800
- if if_not_exists_ == IfNotExistsParam.ERROR:
801
- raise excs.Error(f'Unknown column: {column.col.qualified_name}')
802
- assert if_not_exists_ == IfNotExistsParam.IGNORE
803
- return
804
- col = column.col
858
+ from pixeltable.catalog import Catalog
805
859
 
806
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
807
- if len(dependent_user_cols) > 0:
808
- raise excs.Error(
809
- f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
810
- f'{", ".join(c.name for c in dependent_user_cols)}'
811
- )
860
+ cat = Catalog.get()
861
+
862
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
863
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
864
+ self.__check_mutable('drop columns from')
865
+ col: Column = None
866
+ if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
867
+
868
+ if isinstance(column, str):
869
+ col = self._tbl_version_path.get_column(column)
870
+ if col is None:
871
+ if if_not_exists_ == IfNotExistsParam.ERROR:
872
+ raise excs.Error(f'Unknown column: {column}')
873
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
874
+ return
875
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
876
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
877
+ col = self._tbl_version.get().cols_by_name[column]
878
+ else:
879
+ exists = self._tbl_version_path.has_column(column.col)
880
+ if not exists:
881
+ if if_not_exists_ == IfNotExistsParam.ERROR:
882
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
883
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
884
+ return
885
+ col = column.col
886
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
887
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
888
+
889
+ dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
890
+ if len(dependent_user_cols) > 0:
891
+ raise excs.Error(
892
+ f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
893
+ f'{", ".join(c.name for c in dependent_user_cols)}'
894
+ )
895
+
896
+ views = self._get_views(recursive=True, mutable_only=True)
897
+
898
+ # See if any view predicates depend on this column
899
+ dependent_views: list[tuple[Table, exprs.Expr]] = []
900
+ for view in views:
901
+ if view._tbl_version is not None:
902
+ predicate = view._tbl_version.get().predicate
903
+ if predicate is not None:
904
+ for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
905
+ if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
906
+ dependent_views.append((view, predicate))
907
+
908
+ if len(dependent_views) > 0:
909
+ dependent_views_str = '\n'.join(
910
+ f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
911
+ )
912
+ raise excs.Error(
913
+ f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
914
+ )
812
915
 
813
- with Env.get().begin_xact():
814
916
  # See if this column has a dependent store. We need to look through all stores in all
815
917
  # (transitive) views of this table.
918
+ col_handle = col.handle
816
919
  dependent_stores = [
817
920
  (view, store)
818
- for view in (self, *self._get_views(recursive=True))
921
+ for view in (self, *views)
819
922
  for store in view._tbl_version.get().external_stores.values()
820
- if col in store.get_local_columns()
923
+ if col_handle in store.get_local_columns()
821
924
  ]
822
925
  if len(dependent_stores) > 0:
823
926
  dependent_store_names = [
824
- store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
927
+ store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
825
928
  for view, store in dependent_stores
826
929
  ]
827
930
  raise excs.Error(
828
- f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
931
+ f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
829
932
  f'{", ".join(dependent_store_names)}'
830
933
  )
934
+ all_columns = self.columns()
935
+ if len(all_columns) == 1 and col.name == all_columns[0]:
936
+ raise excs.Error(
937
+ f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
938
+ f' Tables must have at least one column.'
939
+ )
831
940
 
832
941
  self._tbl_version.get().drop_column(col)
833
942
 
@@ -847,7 +956,9 @@ class Table(SchemaObject):
847
956
  >>> tbl = pxt.get_table('my_table')
848
957
  ... tbl.rename_column('col1', 'col2')
849
958
  """
850
- with Env.get().begin_xact():
959
+ from pixeltable.catalog import Catalog
960
+
961
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
851
962
  self._tbl_version.get().rename_column(old_name, new_name)
852
963
 
853
964
  def _list_index_info_for_test(self) -> list[dict[str, Any]]:
@@ -858,7 +969,6 @@ class Table(SchemaObject):
858
969
  A list of index information, each containing the index's
859
970
  id, name, and the name of the column it indexes.
860
971
  """
861
- assert not self._is_dropped
862
972
  index_info = []
863
973
  for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
864
974
  index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
@@ -866,13 +976,13 @@ class Table(SchemaObject):
866
976
 
867
977
  def add_embedding_index(
868
978
  self,
869
- column: Union[str, ColumnRef],
979
+ column: str | ColumnRef,
870
980
  *,
871
- idx_name: Optional[str] = None,
872
- embedding: Optional[pxt.Function] = None,
873
- string_embed: Optional[pxt.Function] = None,
874
- image_embed: Optional[pxt.Function] = None,
875
- metric: str = 'cosine',
981
+ idx_name: str | None = None,
982
+ embedding: pxt.Function | None = None,
983
+ string_embed: pxt.Function | None = None,
984
+ image_embed: pxt.Function | None = None,
985
+ metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
876
986
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
877
987
  ) -> None:
878
988
  """
@@ -880,25 +990,33 @@ class Table(SchemaObject):
880
990
  rows are inserted into the table.
881
991
 
882
992
  To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
883
- Only `String` and `Image` columns are currently supported. Here's an example that uses a
884
- [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
993
+ Only `String` and `Image` columns are currently supported.
994
+
995
+ Examples:
996
+ Here's an example that uses a
997
+ [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
998
+
999
+ >>> from pixeltable.functions.huggingface import clip
1000
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1001
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1002
+
1003
+ Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
885
1004
 
886
- >>> from pixeltable.functions.huggingface import clip
887
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
888
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1005
+ >>> reference_img = PIL.Image.open('my_image.jpg')
1006
+ >>> sim = tbl.img.similarity(image=reference_img)
1007
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
889
1008
 
890
- Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
1009
+ If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
1010
+ performed using any of its supported modalities. In our example, CLIP supports both text and images, so we
1011
+ can also search for images using a text description:
891
1012
 
892
- >>> reference_img = PIL.Image.open('my_image.jpg')
893
- ... sim = tbl.img.similarity(reference_img)
894
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1013
+ >>> sim = tbl.img.similarity(string='a picture of a train')
1014
+ >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
895
1015
 
896
- If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
897
- performed using any of its supported types. In our example, CLIP supports both text and images, so we can
898
- also search for images using a text description:
1016
+ Audio and video lookups would look like this:
899
1017
 
900
- >>> sim = tbl.img.similarity('a picture of a train')
901
- ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
1018
+ >>> sim = tbl.img.similarity(audio='/path/to/audio.flac')
1019
+ >>> sim = tbl.img.similarity(video='/path/to/video.mp4')
902
1020
 
903
1021
  Args:
904
1022
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
@@ -929,9 +1047,9 @@ class Table(SchemaObject):
929
1047
  Add an index to the `img` column of the table `my_table`:
930
1048
 
931
1049
  >>> from pixeltable.functions.huggingface import clip
932
- ... tbl = pxt.get_table('my_table')
933
- ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
934
- ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
1050
+ >>> tbl = pxt.get_table('my_table')
1051
+ >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
1052
+ >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
935
1053
 
936
1054
  Alternatively, the `img` column may be specified by name:
937
1055
 
@@ -955,11 +1073,12 @@ class Table(SchemaObject):
955
1073
  ... image_embed=image_embedding_fn
956
1074
  ... )
957
1075
  """
958
- if self._tbl_version_path.is_snapshot():
959
- raise excs.Error('Cannot add an index to a snapshot')
960
- col = self._resolve_column_parameter(column)
1076
+ from pixeltable.catalog import Catalog
1077
+
1078
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1079
+ self.__check_mutable('add an index to')
1080
+ col = self._resolve_column_parameter(column)
961
1081
 
962
- with Env.get().begin_xact():
963
1082
  if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
964
1083
  if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
965
1084
  # An index with the same name already exists.
@@ -968,7 +1087,7 @@ class Table(SchemaObject):
968
1087
  raise excs.Error(f'Duplicate index name: {idx_name}')
969
1088
  if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
970
1089
  raise excs.Error(
971
- f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
1090
+ f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
972
1091
  )
973
1092
  if if_exists_ == IfExistsParam.IGNORE:
974
1093
  return
@@ -981,10 +1100,9 @@ class Table(SchemaObject):
981
1100
  if idx_name is not None:
982
1101
  Table.validate_column_name(idx_name)
983
1102
 
984
- # create the EmbeddingIndex instance to verify args
985
- idx = EmbeddingIndex(
986
- col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
987
- )
1103
+ # validate EmbeddingIndex args
1104
+ idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
1105
+ _ = idx.create_value_expr(col)
988
1106
  _ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
989
1107
  # TODO: how to deal with exceptions here? drop the index and raise?
990
1108
  FileCache.get().emit_eviction_warnings()
@@ -992,8 +1110,8 @@ class Table(SchemaObject):
992
1110
  def drop_embedding_index(
993
1111
  self,
994
1112
  *,
995
- column: Union[str, ColumnRef, None] = None,
996
- idx_name: Optional[str] = None,
1113
+ column: str | ColumnRef | None = None,
1114
+ idx_name: str | None = None,
997
1115
  if_not_exists: Literal['error', 'ignore'] = 'error',
998
1116
  ) -> None:
999
1117
  """
@@ -1039,26 +1157,28 @@ class Table(SchemaObject):
1039
1157
  >>> tbl = pxt.get_table('my_table')
1040
1158
  ... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
1041
1159
  """
1160
+ from pixeltable.catalog import Catalog
1161
+
1042
1162
  if (column is None) == (idx_name is None):
1043
1163
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1044
1164
 
1045
- col: Column = None
1046
- if idx_name is None:
1047
- col = self._resolve_column_parameter(column)
1048
- assert col is not None
1165
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1166
+ col: Column = None
1167
+ if idx_name is None:
1168
+ col = self._resolve_column_parameter(column)
1169
+ assert col is not None
1049
1170
 
1050
- with Env.get().begin_xact():
1051
1171
  self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1052
1172
 
1053
- def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
1173
+ def _resolve_column_parameter(self, column: str | ColumnRef) -> Column:
1054
1174
  """Resolve a column parameter to a Column object"""
1055
1175
  col: Column = None
1056
1176
  if isinstance(column, str):
1057
- col = self._tbl_version_path.get_column(column, include_bases=True)
1177
+ col = self._tbl_version_path.get_column(column)
1058
1178
  if col is None:
1059
- raise excs.Error(f'Column {column!r} unknown')
1179
+ raise excs.Error(f'Unknown column: {column}')
1060
1180
  elif isinstance(column, ColumnRef):
1061
- exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1181
+ exists = self._tbl_version_path.has_column(column.col)
1062
1182
  if not exists:
1063
1183
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1064
1184
  col = column.col
@@ -1069,8 +1189,8 @@ class Table(SchemaObject):
1069
1189
  def drop_index(
1070
1190
  self,
1071
1191
  *,
1072
- column: Union[str, ColumnRef, None] = None,
1073
- idx_name: Optional[str] = None,
1192
+ column: str | ColumnRef | None = None,
1193
+ idx_name: str | None = None,
1074
1194
  if_not_exists: Literal['error', 'ignore'] = 'error',
1075
1195
  ) -> None:
1076
1196
  """
@@ -1116,27 +1236,30 @@ class Table(SchemaObject):
1116
1236
  ... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
1117
1237
 
1118
1238
  """
1239
+ from pixeltable.catalog import Catalog
1240
+
1119
1241
  if (column is None) == (idx_name is None):
1120
1242
  raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
1121
1243
 
1122
- col: Column = None
1123
- if idx_name is None:
1124
- col = self._resolve_column_parameter(column)
1125
- assert col is not None
1244
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1245
+ col: Column = None
1246
+ if idx_name is None:
1247
+ col = self._resolve_column_parameter(column)
1248
+ assert col is not None
1126
1249
 
1127
- with Env.get().begin_xact():
1128
1250
  self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
1129
1251
 
1130
1252
  def _drop_index(
1131
1253
  self,
1132
1254
  *,
1133
- col: Optional[Column] = None,
1134
- idx_name: Optional[str] = None,
1135
- _idx_class: Optional[type[index.IndexBase]] = None,
1255
+ col: Column | None = None,
1256
+ idx_name: str | None = None,
1257
+ _idx_class: type[index.IndexBase] | None = None,
1136
1258
  if_not_exists: Literal['error', 'ignore'] = 'error',
1137
1259
  ) -> None:
1138
- if self._tbl_version_path.is_snapshot():
1139
- raise excs.Error('Cannot drop an index from a snapshot')
1260
+ from pixeltable.catalog import Catalog
1261
+
1262
+ self.__check_mutable('drop an index from')
1140
1263
  assert (col is None) != (idx_name is None)
1141
1264
 
1142
1265
  if idx_name is not None:
@@ -1148,9 +1271,10 @@ class Table(SchemaObject):
1148
1271
  return
1149
1272
  idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1150
1273
  else:
1151
- if col.tbl.id != self._tbl_version.id:
1274
+ if col.get_tbl().id != self._tbl_version.id:
1152
1275
  raise excs.Error(
1153
- f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
1276
+ f'Column {col.name!r}: '
1277
+ f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
1154
1278
  )
1155
1279
  idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1156
1280
  if _idx_class is not None:
@@ -1162,14 +1286,17 @@ class Table(SchemaObject):
1162
1286
  assert if_not_exists_ == IfNotExistsParam.IGNORE
1163
1287
  return
1164
1288
  if len(idx_info_list) > 1:
1165
- raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1289
+ raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
1166
1290
  idx_info = idx_info_list[0]
1167
1291
 
1168
1292
  # Find out if anything depends on this index
1169
- dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1293
+ val_col = idx_info.val_col
1294
+ dependent_user_cols = [
1295
+ c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
1296
+ ]
1170
1297
  if len(dependent_user_cols) > 0:
1171
1298
  raise excs.Error(
1172
- f'Cannot drop index because the following columns depend on it:\n'
1299
+ f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
1173
1300
  f'{", ".join(c.name for c in dependent_user_cols)}'
1174
1301
  )
1175
1302
  self._tbl_version.get().drop_index(idx_info.id)
@@ -1180,8 +1307,8 @@ class Table(SchemaObject):
1180
1307
  source: TableDataSource,
1181
1308
  /,
1182
1309
  *,
1183
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1184
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1310
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1311
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1185
1312
  on_error: Literal['abort', 'ignore'] = 'abort',
1186
1313
  print_stats: bool = False,
1187
1314
  **kwargs: Any,
@@ -1195,11 +1322,11 @@ class Table(SchemaObject):
1195
1322
  @abc.abstractmethod
1196
1323
  def insert(
1197
1324
  self,
1198
- source: Optional[TableDataSource] = None,
1325
+ source: TableDataSource | None = None,
1199
1326
  /,
1200
1327
  *,
1201
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1202
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1328
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
1329
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
1203
1330
  on_error: Literal['abort', 'ignore'] = 'abort',
1204
1331
  print_stats: bool = False,
1205
1332
  **kwargs: Any,
@@ -1216,7 +1343,8 @@ class Table(SchemaObject):
1216
1343
  on_error: Literal['abort', 'ignore'] = 'abort',
1217
1344
  print_stats: bool = False,
1218
1345
  **kwargs: Any,
1219
- )```
1346
+ )
1347
+ ```
1220
1348
 
1221
1349
  To insert just a single row, you can use the more concise syntax:
1222
1350
 
@@ -1226,7 +1354,8 @@ class Table(SchemaObject):
1226
1354
  on_error: Literal['abort', 'ignore'] = 'abort',
1227
1355
  print_stats: bool = False,
1228
1356
  **kwargs: Any
1229
- )```
1357
+ )
1358
+ ```
1230
1359
 
1231
1360
  Args:
1232
1361
  source: A data source from which data can be imported.
@@ -1269,11 +1398,20 @@ class Table(SchemaObject):
1269
1398
  Insert rows from a CSV file:
1270
1399
 
1271
1400
  >>> tbl.insert(source='path/to/file.csv')
1401
+
1402
+ Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
1403
+
1404
+ >>> class MyModel(pydantic.BaseModel):
1405
+ ... a: int
1406
+ ... b: int
1407
+ ...
1408
+ ... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
1409
+ ... tbl.insert(models)
1272
1410
  """
1273
1411
  raise NotImplementedError
1274
1412
 
1275
1413
  def update(
1276
- self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
1414
+ self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
1277
1415
  ) -> UpdateStatus:
1278
1416
  """Update rows in this table.
1279
1417
 
@@ -1282,6 +1420,9 @@ class Table(SchemaObject):
1282
1420
  where: a predicate to filter rows to update.
1283
1421
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
1284
1422
 
1423
+ Returns:
1424
+ An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
1425
+
1285
1426
  Examples:
1286
1427
  Set column `int_col` to 1 for all rows:
1287
1428
 
@@ -1299,10 +1440,13 @@ class Table(SchemaObject):
1299
1440
 
1300
1441
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
1301
1442
  """
1302
- with Env.get().begin_xact():
1303
- status = self._tbl_version.get().update(value_spec, where, cascade)
1443
+ from pixeltable.catalog import Catalog
1444
+
1445
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1446
+ self.__check_mutable('update')
1447
+ result = self._tbl_version.get().update(value_spec, where, cascade)
1304
1448
  FileCache.get().emit_eviction_warnings()
1305
- return status
1449
+ return result
1306
1450
 
1307
1451
  def batch_update(
1308
1452
  self,
@@ -1326,45 +1470,51 @@ class Table(SchemaObject):
1326
1470
  Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
1327
1471
  If either row does not exist, this raises an error:
1328
1472
 
1329
- >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
1473
+ >>> tbl.batch_update(
1474
+ ... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}]
1475
+ ... )
1330
1476
 
1331
1477
  Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
1332
1478
  the row with new `id` 3 (assuming this key does not exist):
1333
1479
 
1334
- >>> tbl.update(
1335
- [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1336
- if_not_exists='insert')
1480
+ >>> tbl.batch_update(
1481
+ ... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
1482
+ ... if_not_exists='insert'
1483
+ ... )
1337
1484
  """
1338
- if self._tbl_version_path.is_snapshot():
1339
- raise excs.Error('Cannot update a snapshot')
1340
- rows = list(rows)
1485
+ from pixeltable.catalog import Catalog
1341
1486
 
1342
- row_updates: list[dict[Column, exprs.Expr]] = []
1343
- pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
1487
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1488
+ self.__check_mutable('update')
1489
+ rows = list(rows)
1344
1490
 
1345
- # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
1346
- has_rowid = _ROWID_COLUMN_NAME in rows[0]
1347
- rowids: list[tuple[int, ...]] = []
1348
- if len(pk_col_names) == 0 and not has_rowid:
1349
- raise excs.Error('Table must have primary key for batch update')
1491
+ row_updates: list[dict[Column, exprs.Expr]] = []
1492
+ pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
1350
1493
 
1351
- for row_spec in rows:
1352
- col_vals = self._tbl_version.get()._validate_update_spec(
1353
- row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
1354
- )
1355
- if has_rowid:
1356
- # we expect the _rowid column to be present for each row
1357
- assert _ROWID_COLUMN_NAME in row_spec
1358
- rowids.append(row_spec[_ROWID_COLUMN_NAME])
1359
- else:
1360
- col_names = {col.name for col in col_vals}
1361
- if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1362
- missing_cols = pk_col_names - {col.name for col in col_vals}
1363
- raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1364
- row_updates.append(col_vals)
1365
-
1366
- with Env.get().begin_xact():
1367
- status = self._tbl_version.get().batch_update(
1494
+ # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
1495
+ has_rowid = _ROWID_COLUMN_NAME in rows[0]
1496
+ rowids: list[tuple[int, ...]] = []
1497
+ if len(pk_col_names) == 0 and not has_rowid:
1498
+ raise excs.Error('Table must have primary key for batch update')
1499
+
1500
+ for row_spec in rows:
1501
+ col_vals = self._tbl_version.get()._validate_update_spec(
1502
+ row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
1503
+ )
1504
+ if has_rowid:
1505
+ # we expect the _rowid column to be present for each row
1506
+ assert _ROWID_COLUMN_NAME in row_spec
1507
+ rowids.append(row_spec[_ROWID_COLUMN_NAME])
1508
+ else:
1509
+ col_names = {col.name for col in col_vals}
1510
+ if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1511
+ missing_cols = pk_col_names - {col.name for col in col_vals}
1512
+ raise excs.Error(
1513
+ f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
1514
+ )
1515
+ row_updates.append(col_vals)
1516
+
1517
+ result = self._tbl_version.get().batch_update(
1368
1518
  row_updates,
1369
1519
  rowids,
1370
1520
  error_if_not_exists=if_not_exists == 'error',
@@ -1372,9 +1522,85 @@ class Table(SchemaObject):
1372
1522
  cascade=cascade,
1373
1523
  )
1374
1524
  FileCache.get().emit_eviction_warnings()
1375
- return status
1525
+ return result
1526
+
1527
+ def recompute_columns(
1528
+ self,
1529
+ *columns: str | ColumnRef,
1530
+ where: 'exprs.Expr' | None = None,
1531
+ errors_only: bool = False,
1532
+ cascade: bool = True,
1533
+ ) -> UpdateStatus:
1534
+ """Recompute the values in one or more computed columns of this table.
1535
+
1536
+ Args:
1537
+ columns: The names or references of the computed columns to recompute.
1538
+ where: A predicate to filter rows to recompute.
1539
+ errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1540
+ `errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
1541
+ cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
1542
+
1543
+ Examples:
1544
+ Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
1545
+ depends on them:
1546
+
1547
+ >>> tbl.recompute_columns('c1', 'c2')
1548
+
1549
+ Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
1550
+ it:
1551
+
1552
+ >>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
1553
+
1554
+ Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
1555
+
1556
+ >>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
1557
+
1558
+ Recompute column `c1` and its dependents, but only for rows that have errors in it:
1559
+
1560
+ >>> tbl.recompute_columns('c1', errors_only=True)
1561
+ """
1562
+ from pixeltable.catalog import Catalog
1563
+
1564
+ cat = Catalog.get()
1565
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
1566
+ with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1567
+ self.__check_mutable('recompute columns of')
1568
+ if len(columns) == 0:
1569
+ raise excs.Error('At least one column must be specified to recompute')
1570
+ if errors_only and len(columns) > 1:
1571
+ raise excs.Error('Cannot use errors_only=True with multiple columns')
1572
+
1573
+ col_names: list[str] = []
1574
+ for column in columns:
1575
+ col_name: str
1576
+ col: Column
1577
+ if isinstance(column, str):
1578
+ col = self._tbl_version_path.get_column(column)
1579
+ if col is None:
1580
+ raise excs.Error(f'Unknown column: {column}')
1581
+ col_name = column
1582
+ else:
1583
+ assert isinstance(column, ColumnRef)
1584
+ col = column.col
1585
+ if not self._tbl_version_path.has_column(col):
1586
+ raise excs.Error(f'Unknown column: {col.name}')
1587
+ col_name = col.name
1588
+ if not col.is_computed:
1589
+ raise excs.Error(f'Column {col_name!r} is not a computed column')
1590
+ if col.get_tbl().id != self._tbl_version_path.tbl_id:
1591
+ raise excs.Error(f'Cannot recompute column of a base: {col_name}')
1592
+ col_names.append(col_name)
1593
+
1594
+ if where is not None and not where.is_bound_by([self._tbl_version_path]):
1595
+ raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
1596
+
1597
+ result = self._tbl_version.get().recompute_columns(
1598
+ col_names, where=where, errors_only=errors_only, cascade=cascade
1599
+ )
1600
+ FileCache.get().emit_eviction_warnings()
1601
+ return result
1376
1602
 
1377
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1603
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
1378
1604
  """Delete rows in this table.
1379
1605
 
1380
1606
  Args:
@@ -1397,12 +1623,75 @@ class Table(SchemaObject):
1397
1623
  .. warning::
1398
1624
  This operation is irreversible.
1399
1625
  """
1400
- if self._tbl_version_path.is_snapshot():
1401
- raise excs.Error('Cannot revert a snapshot')
1402
- with Env.get().begin_xact():
1626
+ with catalog.Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1627
+ self.__check_mutable('revert')
1403
1628
  self._tbl_version.get().revert()
1629
+ # remove cached md in order to force a reload on the next operation
1630
+ self._tbl_version_path.clear_cached_md()
1631
+
1632
+ def push(self) -> None:
1633
+ from pixeltable.share import push_replica
1634
+ from pixeltable.share.protocol import PxtUri
1635
+
1636
+ pxt_uri = self._get_pxt_uri()
1637
+ tbl_version = self._tbl_version_path.tbl_version.get()
1638
+
1639
+ if tbl_version.is_replica:
1640
+ raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
1641
+
1642
+ if pxt_uri is None:
1643
+ raise excs.Error(
1644
+ f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
1645
+ 'To publish it, use `pxt.publish()` instead.'
1646
+ )
1647
+
1648
+ if isinstance(self, catalog.View) and self._is_anonymous_snapshot():
1649
+ raise excs.Error(
1650
+ f'push(): Cannot push specific-version table handle {tbl_version.versioned_name!r}. '
1651
+ 'To push the latest version instead:\n'
1652
+ f' t = pxt.get_table({self._name!r})\n'
1653
+ f' t.push()'
1654
+ )
1655
+
1656
+ if self._tbl_version is None:
1657
+ # Named snapshots never have new versions to push.
1658
+ env.Env.get().console_logger.info('push(): Everything up to date.')
1659
+ return
1660
+
1661
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
1662
+ parsed_uri = PxtUri(uri=pxt_uri)
1663
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
1664
+ uuid_uri = str(uuid_uri_obj)
1665
+
1666
+ push_replica(uuid_uri, self)
1667
+
1668
+ def pull(self) -> None:
1669
+ from pixeltable.share import pull_replica
1670
+ from pixeltable.share.protocol import PxtUri
1671
+
1672
+ pxt_uri = self._get_pxt_uri()
1673
+ tbl_version = self._tbl_version_path.tbl_version.get()
1674
+
1675
+ if not tbl_version.is_replica or pxt_uri is None:
1676
+ raise excs.Error(
1677
+ f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
1678
+ )
1679
+
1680
+ if isinstance(self, catalog.View) and self._is_anonymous_snapshot():
1681
+ raise excs.Error(
1682
+ f'pull(): Cannot pull specific-version table handle {tbl_version.versioned_name!r}. '
1683
+ 'To pull the latest version instead:\n'
1684
+ f' t = pxt.get_table({self._name!r})\n'
1685
+ f' t.pull()'
1686
+ )
1687
+
1688
+ # Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
1689
+ parsed_uri = PxtUri(uri=pxt_uri)
1690
+ uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
1691
+ uuid_uri = str(uuid_uri_obj)
1692
+
1693
+ pull_replica(self._path(), uuid_uri)
1404
1694
 
1405
- @property
1406
1695
  def external_stores(self) -> list[str]:
1407
1696
  return list(self._tbl_version.get().external_stores.keys())
1408
1697
 
@@ -1410,21 +1699,20 @@ class Table(SchemaObject):
1410
1699
  """
1411
1700
  Links the specified `ExternalStore` to this table.
1412
1701
  """
1413
- if self._tbl_version.get().is_snapshot:
1414
- raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
1415
- if store.name in self.external_stores:
1416
- raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
1417
- _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
1418
- with Env.get().begin_xact():
1702
+ from pixeltable.catalog import Catalog
1703
+
1704
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1705
+ self.__check_mutable('link an external store to')
1706
+ if store.name in self.external_stores():
1707
+ raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
1708
+ _logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
1709
+
1710
+ store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
1419
1711
  self._tbl_version.get().link_external_store(store)
1420
- env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
1712
+ env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
1421
1713
 
1422
1714
  def unlink_external_stores(
1423
- self,
1424
- stores: Optional[str | list[str]] = None,
1425
- *,
1426
- delete_external_data: bool = False,
1427
- ignore_errors: bool = False,
1715
+ self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
1428
1716
  ) -> None:
1429
1717
  """
1430
1718
  Unlinks this table's external stores.
@@ -1437,28 +1725,37 @@ class Table(SchemaObject):
1437
1725
  delete_external_data (bool): If `True`, then the external data store will also be deleted. WARNING: This
1438
1726
  is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
1439
1727
  """
1440
- self._check_is_dropped()
1441
- all_stores = self.external_stores
1442
-
1443
- if stores is None:
1444
- stores = all_stores
1445
- elif isinstance(stores, str):
1446
- stores = [stores]
1447
-
1448
- # Validation
1449
- if not ignore_errors:
1450
- for store in stores:
1451
- if store not in all_stores:
1452
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1453
-
1454
- with Env.get().begin_xact():
1455
- for store in stores:
1456
- self._tbl_version.get().unlink_external_store(store, delete_external_data=delete_external_data)
1457
- env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store}')
1728
+ from pixeltable.catalog import Catalog
1729
+
1730
+ if not self._tbl_version_path.is_mutable():
1731
+ return
1732
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
1733
+ all_stores = self.external_stores()
1734
+
1735
+ if stores is None:
1736
+ stores = all_stores
1737
+ elif isinstance(stores, str):
1738
+ stores = [stores]
1739
+
1740
+ # Validation
1741
+ if not ignore_errors:
1742
+ for store_name in stores:
1743
+ if store_name not in all_stores:
1744
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
1745
+
1746
+ for store_name in stores:
1747
+ store = self._tbl_version.get().external_stores[store_name]
1748
+ # get hold of the store's debug string before deleting it
1749
+ store_str = str(store)
1750
+ store.unlink(self._tbl_version.get()) # might call tbl_version.drop_columns()
1751
+ self._tbl_version.get().unlink_external_store(store)
1752
+ if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1753
+ store.delete()
1754
+ env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
1458
1755
 
1459
1756
  def sync(
1460
- self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
1461
- ) -> 'pxt.io.SyncStatus':
1757
+ self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
1758
+ ) -> UpdateStatus:
1462
1759
  """
1463
1760
  Synchronizes this table with its linked external stores.
1464
1761
 
@@ -1468,29 +1765,139 @@ class Table(SchemaObject):
1468
1765
  export_data: If `True`, data from this table will be exported to the external stores during synchronization.
1469
1766
  import_data: If `True`, data from the external stores will be imported to this table during synchronization.
1470
1767
  """
1471
- self._check_is_dropped()
1472
- all_stores = self.external_stores
1768
+ from pixeltable.catalog import Catalog
1769
+
1770
+ if not self._tbl_version_path.is_mutable():
1771
+ return UpdateStatus()
1772
+ # we lock the entire tree starting at the root base table in order to ensure that all synced columns can
1773
+ # have their updates propagated down the tree
1774
+ base_tv = self._tbl_version_path.get_tbl_versions()[-1]
1775
+ with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
1776
+ all_stores = self.external_stores()
1473
1777
 
1474
- if stores is None:
1475
- stores = all_stores
1476
- elif isinstance(stores, str):
1477
- stores = [stores]
1778
+ if stores is None:
1779
+ stores = all_stores
1780
+ elif isinstance(stores, str):
1781
+ stores = [stores]
1478
1782
 
1479
- for store in stores:
1480
- if store not in all_stores:
1481
- raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1783
+ for store in stores:
1784
+ if store not in all_stores:
1785
+ raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
1482
1786
 
1483
- sync_status = pxt.io.SyncStatus.empty()
1484
- with Env.get().begin_xact():
1787
+ sync_status = UpdateStatus()
1485
1788
  for store in stores:
1486
1789
  store_obj = self._tbl_version.get().external_stores[store]
1487
1790
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
1488
- sync_status = sync_status.combine(store_sync_status)
1791
+ sync_status += store_sync_status
1489
1792
 
1490
1793
  return sync_status
1491
1794
 
1492
1795
  def __dir__(self) -> list[str]:
1493
- return list(super().__dir__()) + list(self._schema.keys())
1796
+ return list(super().__dir__()) + list(self._get_schema().keys())
1494
1797
 
1495
1798
  def _ipython_key_completions_(self) -> list[str]:
1496
- return list(self._schema.keys())
1799
+ return list(self._get_schema().keys())
1800
+
1801
+ def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
1802
+ """
1803
+ Returns information about versions of this table, most recent first.
1804
+
1805
+ `get_versions()` is intended for programmatic access to version metadata; for human-readable
1806
+ output, use [`history()`][pixeltable.Table.history] instead.
1807
+
1808
+ Args:
1809
+ n: if specified, will return at most `n` versions
1810
+
1811
+ Returns:
1812
+ A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
1813
+ recent first.
1814
+
1815
+ Examples:
1816
+ Retrieve metadata about all versions of the table `tbl`:
1817
+
1818
+ >>> tbl.get_versions()
1819
+
1820
+ Retrieve metadata about the most recent 5 versions of the table `tbl`:
1821
+
1822
+ >>> tbl.get_versions(n=5)
1823
+ """
1824
+ from pixeltable.catalog import Catalog
1825
+
1826
+ if n is None:
1827
+ n = 1_000_000_000
1828
+ if not isinstance(n, int) or n < 1:
1829
+ raise excs.Error(f'Invalid value for `n`: {n}')
1830
+
1831
+ # Retrieve the table history components from the catalog
1832
+ tbl_id = self._id
1833
+ # Collect an extra version, if available, to allow for computation of the first version's schema change
1834
+ vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
1835
+
1836
+ # Construct the metadata change description dictionary
1837
+ md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
1838
+ md_dict = MetadataUtils._create_md_change_dict(md_list)
1839
+
1840
+ # Construct report lines
1841
+ if len(vers_list) > n:
1842
+ assert len(vers_list) == n + 1
1843
+ over_count = 1
1844
+ else:
1845
+ over_count = 0
1846
+
1847
+ metadata_dicts: list[VersionMetadata] = []
1848
+ for vers_md in vers_list[0 : len(vers_list) - over_count]:
1849
+ version = vers_md.version_md.version
1850
+ schema_change = md_dict.get(version, None)
1851
+ update_status = vers_md.version_md.update_status
1852
+ if update_status is None:
1853
+ update_status = UpdateStatus()
1854
+ change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
1855
+ rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
1856
+ metadata_dicts.append(
1857
+ VersionMetadata(
1858
+ version=version,
1859
+ created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
1860
+ user=vers_md.version_md.user,
1861
+ change_type=change_type,
1862
+ inserts=rcs.ins_rows,
1863
+ updates=rcs.upd_rows,
1864
+ deletes=rcs.del_rows,
1865
+ errors=rcs.num_excs,
1866
+ computed=rcs.computed_values,
1867
+ schema_change=schema_change,
1868
+ )
1869
+ )
1870
+
1871
+ return metadata_dicts
1872
+
1873
+ def history(self, n: int | None = None) -> pd.DataFrame:
1874
+ """
1875
+ Returns a human-readable report about versions of this table.
1876
+
1877
+ `history()` is intended for human-readable output of version metadata; for programmatic access,
1878
+ use [`get_versions()`][pixeltable.Table.get_versions] instead.
1879
+
1880
+ Args:
1881
+ n: if specified, will return at most `n` versions
1882
+
1883
+ Returns:
1884
+ A report with information about each version, one per row, most recent first.
1885
+
1886
+ Examples:
1887
+ Report all versions of the table:
1888
+
1889
+ >>> tbl.history()
1890
+
1891
+ Report only the most recent 5 changes to the table:
1892
+
1893
+ >>> tbl.history(n=5)
1894
+ """
1895
+ versions = self.get_versions(n)
1896
+ assert len(versions) > 0
1897
+ return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
1898
+
1899
+ def __check_mutable(self, op_descr: str) -> None:
1900
+ if self._tbl_version_path.is_replica():
1901
+ raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a replica.')
1902
+ if self._tbl_version_path.is_snapshot():
1903
+ raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')