pixeltable 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +360 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -23
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +165 -33
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/math.py +63 -0
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +4 -4
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +321 -20
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +31 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/RECORD +79 -74
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -4,11 +4,11 @@ import abc
|
|
|
4
4
|
import builtins
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
|
+
from keyword import iskeyword as is_python_keyword
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
9
10
|
|
|
10
11
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
11
|
-
from keyword import iskeyword as is_python_keyword
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
|
|
14
14
|
import pandas as pd
|
|
@@ -16,7 +16,6 @@ import sqlalchemy as sql
|
|
|
16
16
|
|
|
17
17
|
import pixeltable as pxt
|
|
18
18
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
19
|
-
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.metadata import schema
|
|
21
20
|
|
|
22
21
|
from ..exprs import ColumnRef
|
|
@@ -49,26 +48,23 @@ class Table(SchemaObject):
|
|
|
49
48
|
"""
|
|
50
49
|
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
51
50
|
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
51
|
+
|
|
52
|
+
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
53
|
+
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
52
54
|
"""
|
|
53
55
|
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
+
# the chain of TableVersions needed to run queries and supply metadata (eg, schema)
|
|
57
|
+
_tbl_version_path: TableVersionPath
|
|
56
58
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
# the physical TableVersion backing this Table; None for pure snapshots
|
|
60
|
+
_tbl_version: Optional[TableVersionHandle]
|
|
59
61
|
|
|
60
62
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
61
63
|
super().__init__(id, name, dir_id)
|
|
62
|
-
self.
|
|
63
|
-
self.
|
|
64
|
-
|
|
65
|
-
# @property
|
|
66
|
-
# def _has_dependents(self) -> bool:
|
|
67
|
-
# """Returns True if this table has any dependent views, or snapshots."""
|
|
68
|
-
# return len(self._get_views(recursive=False)) > 0
|
|
64
|
+
self._tbl_version_path = tbl_version_path
|
|
65
|
+
self._tbl_version = None
|
|
69
66
|
|
|
70
67
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
71
|
-
self._check_is_dropped()
|
|
72
68
|
super()._move(new_name, new_dir_id)
|
|
73
69
|
conn = env.Env.get().conn
|
|
74
70
|
stmt = sql.text(
|
|
@@ -81,6 +77,7 @@ class Table(SchemaObject):
|
|
|
81
77
|
)
|
|
82
78
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
83
79
|
|
|
80
|
+
# this is duplicated from SchemaObject so that our API docs show the docstring for Table
|
|
84
81
|
def get_metadata(self) -> dict[str, Any]:
|
|
85
82
|
"""
|
|
86
83
|
Retrieves metadata associated with this table.
|
|
@@ -106,40 +103,27 @@ class Table(SchemaObject):
|
|
|
106
103
|
}
|
|
107
104
|
```
|
|
108
105
|
"""
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
106
|
+
return super().get_metadata()
|
|
107
|
+
|
|
108
|
+
def _get_metadata(self) -> dict[str, Any]:
|
|
109
|
+
md = super()._get_metadata()
|
|
110
|
+
base = self._get_base_table()
|
|
111
|
+
md['base'] = base._path() if base is not None else None
|
|
112
|
+
md['schema'] = self._get_schema()
|
|
113
|
+
md['is_replica'] = self._tbl_version_path.is_replica()
|
|
114
|
+
md['version'] = self._get_version()
|
|
115
|
+
md['schema_version'] = self._tbl_version_path.schema_version()
|
|
116
|
+
md['comment'] = self._get_comment()
|
|
117
|
+
md['num_retained_versions'] = self._get_num_retained_versions()
|
|
118
|
+
md['media_validation'] = self._get_media_validation().name.lower()
|
|
119
|
+
return md
|
|
120
|
+
|
|
121
|
+
def _get_version(self) -> int:
|
|
124
122
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
125
|
-
return self.
|
|
126
|
-
|
|
127
|
-
@property
|
|
128
|
-
def _tbl_version(self) -> TableVersionHandle:
|
|
129
|
-
"""Return TableVersion for just this table."""
|
|
130
|
-
return self._tbl_version_path.tbl_version
|
|
131
|
-
|
|
132
|
-
@property
|
|
133
|
-
def _tbl_version_path(self) -> TableVersionPath:
|
|
134
|
-
self._check_is_dropped()
|
|
135
|
-
return self.__tbl_version_path
|
|
123
|
+
return self._tbl_version_path.version()
|
|
136
124
|
|
|
137
125
|
def __hash__(self) -> int:
|
|
138
|
-
return hash(self.
|
|
139
|
-
|
|
140
|
-
def _check_is_dropped(self) -> None:
|
|
141
|
-
if self._is_dropped:
|
|
142
|
-
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
126
|
+
return hash(self._tbl_version_path.tbl_id)
|
|
143
127
|
|
|
144
128
|
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
145
129
|
"""Return a ColumnRef for the given name."""
|
|
@@ -163,16 +147,21 @@ class Table(SchemaObject):
|
|
|
163
147
|
Returns:
|
|
164
148
|
A list of view paths.
|
|
165
149
|
"""
|
|
166
|
-
|
|
167
|
-
with env.Env.get().begin_xact():
|
|
168
|
-
return [t._path for t in self._get_views(recursive=recursive)]
|
|
150
|
+
from pixeltable.catalog import Catalog
|
|
169
151
|
|
|
170
|
-
|
|
152
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
153
|
+
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
154
|
+
|
|
155
|
+
def _get_views(self, *, recursive: bool = True, include_snapshots: bool = True) -> list['Table']:
|
|
171
156
|
cat = catalog.Catalog.get()
|
|
172
157
|
view_ids = cat.get_view_ids(self._id)
|
|
173
158
|
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
159
|
+
if not include_snapshots:
|
|
160
|
+
views = [t for t in views if not t._tbl_version_path.is_snapshot()]
|
|
174
161
|
if recursive:
|
|
175
|
-
views.extend(
|
|
162
|
+
views.extend(
|
|
163
|
+
t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
|
|
164
|
+
)
|
|
176
165
|
return views
|
|
177
166
|
|
|
178
167
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
@@ -187,14 +176,20 @@ class Table(SchemaObject):
|
|
|
187
176
|
|
|
188
177
|
See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
|
|
189
178
|
"""
|
|
190
|
-
|
|
179
|
+
from pixeltable.catalog import Catalog
|
|
180
|
+
|
|
181
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
182
|
+
return self._df().select(*items, **named_items)
|
|
191
183
|
|
|
192
184
|
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
193
185
|
"""Filter rows from this table based on the expression.
|
|
194
186
|
|
|
195
187
|
See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
|
|
196
188
|
"""
|
|
197
|
-
|
|
189
|
+
from pixeltable.catalog import Catalog
|
|
190
|
+
|
|
191
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
192
|
+
return self._df().where(pred)
|
|
198
193
|
|
|
199
194
|
def join(
|
|
200
195
|
self,
|
|
@@ -204,21 +199,30 @@ class Table(SchemaObject):
|
|
|
204
199
|
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
|
|
205
200
|
) -> 'pxt.DataFrame':
|
|
206
201
|
"""Join this table with another table."""
|
|
207
|
-
|
|
202
|
+
from pixeltable.catalog import Catalog
|
|
203
|
+
|
|
204
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
205
|
+
return self._df().join(other, on=on, how=how)
|
|
208
206
|
|
|
209
207
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
210
208
|
"""Order the rows of this table based on the expression.
|
|
211
209
|
|
|
212
210
|
See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
|
|
213
211
|
"""
|
|
214
|
-
|
|
212
|
+
from pixeltable.catalog import Catalog
|
|
213
|
+
|
|
214
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
215
|
+
return self._df().order_by(*items, asc=asc)
|
|
215
216
|
|
|
216
217
|
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
217
218
|
"""Group the rows of this table based on the expression.
|
|
218
219
|
|
|
219
220
|
See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
|
|
220
221
|
"""
|
|
221
|
-
|
|
222
|
+
from pixeltable.catalog import Catalog
|
|
223
|
+
|
|
224
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
225
|
+
return self._df().group_by(*items)
|
|
222
226
|
|
|
223
227
|
def distinct(self) -> 'pxt.DataFrame':
|
|
224
228
|
"""Remove duplicate rows from table."""
|
|
@@ -227,6 +231,22 @@ class Table(SchemaObject):
|
|
|
227
231
|
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
228
232
|
return self._df().limit(n)
|
|
229
233
|
|
|
234
|
+
def sample(
|
|
235
|
+
self,
|
|
236
|
+
n: Optional[int] = None,
|
|
237
|
+
n_per_stratum: Optional[int] = None,
|
|
238
|
+
fraction: Optional[float] = None,
|
|
239
|
+
seed: Optional[int] = None,
|
|
240
|
+
stratify_by: Any = None,
|
|
241
|
+
) -> pxt.DataFrame:
|
|
242
|
+
"""Choose a shuffled sample of rows
|
|
243
|
+
|
|
244
|
+
See [`DataFrame.sample`][pixeltable.DataFrame.sample] for more details.
|
|
245
|
+
"""
|
|
246
|
+
return self._df().sample(
|
|
247
|
+
n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
|
|
248
|
+
)
|
|
249
|
+
|
|
230
250
|
def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
|
|
231
251
|
"""Return rows from this table."""
|
|
232
252
|
return self._df().collect()
|
|
@@ -247,35 +267,32 @@ class Table(SchemaObject):
|
|
|
247
267
|
"""Return the number of rows in this table."""
|
|
248
268
|
return self._df().count()
|
|
249
269
|
|
|
250
|
-
@property
|
|
251
270
|
def columns(self) -> list[str]:
|
|
252
271
|
"""Return the names of the columns in this table."""
|
|
253
272
|
cols = self._tbl_version_path.columns()
|
|
254
273
|
return [c.name for c in cols]
|
|
255
274
|
|
|
256
|
-
|
|
257
|
-
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
275
|
+
def _get_schema(self) -> dict[str, ts.ColumnType]:
|
|
258
276
|
"""Return the schema (column names and column types) of this table."""
|
|
259
277
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
260
278
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
279
|
+
def get_base_table(self) -> Optional['Table']:
|
|
280
|
+
from pixeltable.catalog import Catalog
|
|
281
|
+
|
|
282
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
283
|
+
return self._get_base_table()
|
|
265
284
|
|
|
266
|
-
@property
|
|
267
285
|
@abc.abstractmethod
|
|
268
|
-
def
|
|
269
|
-
"""The base's Table instance"""
|
|
286
|
+
def _get_base_table(self) -> Optional['Table']:
|
|
287
|
+
"""The base's Table instance. Requires a transaction context"""
|
|
270
288
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
base = self._base_table
|
|
289
|
+
def _get_base_tables(self) -> list['Table']:
|
|
290
|
+
"""The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
|
|
291
|
+
bases: list[Table] = []
|
|
292
|
+
base = self._get_base_table()
|
|
276
293
|
while base is not None:
|
|
277
294
|
bases.append(base)
|
|
278
|
-
base = base.
|
|
295
|
+
base = base._get_base_table()
|
|
279
296
|
return bases
|
|
280
297
|
|
|
281
298
|
@property
|
|
@@ -283,17 +300,14 @@ class Table(SchemaObject):
|
|
|
283
300
|
def _effective_base_versions(self) -> list[Optional[int]]:
|
|
284
301
|
"""The effective versions of the ancestor bases, starting with its immediate base."""
|
|
285
302
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
return self._tbl_version.get().comment
|
|
303
|
+
def _get_comment(self) -> str:
|
|
304
|
+
return self._tbl_version_path.comment()
|
|
289
305
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
return self._tbl_version.get().num_retained_versions
|
|
306
|
+
def _get_num_retained_versions(self) -> int:
|
|
307
|
+
return self._tbl_version_path.num_retained_versions()
|
|
293
308
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
return self._tbl_version.get().media_validation
|
|
309
|
+
def _get_media_validation(self) -> MediaValidation:
|
|
310
|
+
return self._tbl_version_path.media_validation()
|
|
297
311
|
|
|
298
312
|
def __repr__(self) -> str:
|
|
299
313
|
return self._descriptors().to_string()
|
|
@@ -305,18 +319,21 @@ class Table(SchemaObject):
|
|
|
305
319
|
"""
|
|
306
320
|
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
307
321
|
"""
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
helper.append(
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
322
|
+
from pixeltable.catalog import Catalog
|
|
323
|
+
|
|
324
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
325
|
+
helper = DescriptionHelper()
|
|
326
|
+
helper.append(self._table_descriptor())
|
|
327
|
+
helper.append(self._col_descriptor())
|
|
328
|
+
idxs = self._index_descriptor()
|
|
329
|
+
if not idxs.empty:
|
|
330
|
+
helper.append(idxs)
|
|
331
|
+
stores = self._external_store_descriptor()
|
|
332
|
+
if not stores.empty:
|
|
333
|
+
helper.append(stores)
|
|
334
|
+
if self._get_comment():
|
|
335
|
+
helper.append(f'COMMENT: {self._get_comment()}')
|
|
336
|
+
return helper
|
|
320
337
|
|
|
321
338
|
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
322
339
|
return pd.DataFrame(
|
|
@@ -332,6 +349,8 @@ class Table(SchemaObject):
|
|
|
332
349
|
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
333
350
|
from pixeltable import index
|
|
334
351
|
|
|
352
|
+
if self._tbl_version is None:
|
|
353
|
+
return pd.DataFrame([])
|
|
335
354
|
pd_rows = []
|
|
336
355
|
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
337
356
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
@@ -351,7 +370,7 @@ class Table(SchemaObject):
|
|
|
351
370
|
|
|
352
371
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
353
372
|
pd_rows = []
|
|
354
|
-
for name, store in self.
|
|
373
|
+
for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
|
|
355
374
|
row = {'External Store': name, 'Type': type(store).__name__}
|
|
356
375
|
pd_rows.append(row)
|
|
357
376
|
return pd.DataFrame(pd_rows)
|
|
@@ -360,7 +379,6 @@ class Table(SchemaObject):
|
|
|
360
379
|
"""
|
|
361
380
|
Print the table schema.
|
|
362
381
|
"""
|
|
363
|
-
self._check_is_dropped()
|
|
364
382
|
if getattr(builtins, '__IPYTHON__', False):
|
|
365
383
|
from IPython.display import Markdown, display
|
|
366
384
|
|
|
@@ -368,11 +386,6 @@ class Table(SchemaObject):
|
|
|
368
386
|
else:
|
|
369
387
|
print(repr(self))
|
|
370
388
|
|
|
371
|
-
def _drop(self) -> None:
|
|
372
|
-
self._check_is_dropped()
|
|
373
|
-
self._tbl_version.get().drop()
|
|
374
|
-
self._is_dropped = True
|
|
375
|
-
|
|
376
389
|
# TODO Factor this out into a separate module.
|
|
377
390
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
378
391
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
@@ -390,9 +403,11 @@ class Table(SchemaObject):
|
|
|
390
403
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
391
404
|
"""Returns True if the column has dependents, False otherwise."""
|
|
392
405
|
assert col is not None
|
|
393
|
-
assert col.name in self.
|
|
394
|
-
|
|
406
|
+
assert col.name in self._get_schema()
|
|
407
|
+
cat = catalog.Catalog.get()
|
|
408
|
+
if any(c.name is not None for c in cat.get_column_dependents(col.tbl.id, col.id)):
|
|
395
409
|
return True
|
|
410
|
+
assert self._tbl_version is not None
|
|
396
411
|
return any(
|
|
397
412
|
col in store.get_local_columns()
|
|
398
413
|
for view in (self, *self._get_views(recursive=True))
|
|
@@ -404,8 +419,8 @@ class Table(SchemaObject):
|
|
|
404
419
|
|
|
405
420
|
If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
|
|
406
421
|
"""
|
|
407
|
-
assert not
|
|
408
|
-
existing_col_names = set(self.
|
|
422
|
+
assert self._tbl_version is not None
|
|
423
|
+
existing_col_names = set(self._get_schema().keys())
|
|
409
424
|
cols_to_ignore = []
|
|
410
425
|
for new_col_name in new_col_names:
|
|
411
426
|
if new_col_name in existing_col_names:
|
|
@@ -473,15 +488,17 @@ class Table(SchemaObject):
|
|
|
473
488
|
... }
|
|
474
489
|
... tbl.add_columns(schema)
|
|
475
490
|
"""
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
491
|
+
from pixeltable.catalog import Catalog
|
|
492
|
+
|
|
493
|
+
# lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
|
|
494
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
495
|
+
if self._tbl_version_path.is_snapshot():
|
|
496
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
497
|
+
col_schema = {
|
|
498
|
+
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
499
|
+
for col_name, spec in schema.items()
|
|
500
|
+
}
|
|
501
|
+
|
|
485
502
|
# handle existing columns based on if_exists parameter
|
|
486
503
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
487
504
|
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
@@ -496,6 +513,7 @@ class Table(SchemaObject):
|
|
|
496
513
|
new_cols = self._create_columns(col_schema)
|
|
497
514
|
for new_col in new_cols:
|
|
498
515
|
self._verify_column(new_col)
|
|
516
|
+
assert self._tbl_version is not None
|
|
499
517
|
status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
500
518
|
FileCache.get().emit_eviction_warnings()
|
|
501
519
|
return status
|
|
@@ -534,22 +552,24 @@ class Table(SchemaObject):
|
|
|
534
552
|
|
|
535
553
|
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
536
554
|
"""
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
555
|
+
from pixeltable.catalog import Catalog
|
|
556
|
+
|
|
557
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
558
|
+
# verify kwargs
|
|
559
|
+
if self._tbl_version_path.is_snapshot():
|
|
560
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
561
|
+
# verify kwargs and construct column schema dict
|
|
562
|
+
if len(kwargs) != 1:
|
|
563
|
+
raise excs.Error(
|
|
564
|
+
f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
|
|
565
|
+
f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
|
|
566
|
+
)
|
|
567
|
+
col_type = next(iter(kwargs.values()))
|
|
568
|
+
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
569
|
+
raise excs.Error(
|
|
570
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
571
|
+
)
|
|
572
|
+
return self.add_columns(kwargs, if_exists=if_exists)
|
|
553
573
|
|
|
554
574
|
def add_computed_column(
|
|
555
575
|
self,
|
|
@@ -598,33 +618,34 @@ class Table(SchemaObject):
|
|
|
598
618
|
|
|
599
619
|
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
600
620
|
"""
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
col_schema[
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
621
|
+
from pixeltable.catalog import Catalog
|
|
622
|
+
|
|
623
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
624
|
+
if self._tbl_version_path.is_snapshot():
|
|
625
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
626
|
+
if len(kwargs) != 1:
|
|
627
|
+
raise excs.Error(
|
|
628
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
629
|
+
'"column-name=type|value-expression"; '
|
|
630
|
+
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
631
|
+
)
|
|
632
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
633
|
+
if not is_valid_identifier(col_name):
|
|
634
|
+
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
635
|
+
|
|
636
|
+
col_schema: dict[str, Any] = {'value': spec}
|
|
637
|
+
if stored is not None:
|
|
638
|
+
col_schema['stored'] = stored
|
|
639
|
+
|
|
640
|
+
# Raise an error if the column expression refers to a column error property
|
|
641
|
+
if isinstance(spec, exprs.Expr):
|
|
642
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
643
|
+
if e.is_error_prop():
|
|
644
|
+
raise excs.Error(
|
|
645
|
+
'Use of a reference to an error property of another column is not allowed in a computed '
|
|
646
|
+
f'column. The specified computation for this column contains this reference: `{e!r}`'
|
|
647
|
+
)
|
|
626
648
|
|
|
627
|
-
with Env.get().begin_xact():
|
|
628
649
|
# handle existing columns based on if_exists parameter
|
|
629
650
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
630
651
|
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
@@ -637,6 +658,7 @@ class Table(SchemaObject):
|
|
|
637
658
|
|
|
638
659
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
639
660
|
self._verify_column(new_col)
|
|
661
|
+
assert self._tbl_version is not None
|
|
640
662
|
status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
641
663
|
FileCache.get().emit_eviction_warnings()
|
|
642
664
|
return status
|
|
@@ -781,41 +803,46 @@ class Table(SchemaObject):
|
|
|
781
803
|
>>> tbl = pxt.get_table('my_table')
|
|
782
804
|
... tbl.drop_col(tbl.col, if_not_exists='ignore')
|
|
783
805
|
"""
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
raise excs.Error(f'Column {column!r} unknown')
|
|
794
|
-
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
795
|
-
return
|
|
796
|
-
col = self._tbl_version.get().cols_by_name[column]
|
|
797
|
-
else:
|
|
798
|
-
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
799
|
-
if not exists:
|
|
800
|
-
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
801
|
-
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
802
|
-
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
803
|
-
return
|
|
804
|
-
col = column.col
|
|
806
|
+
from pixeltable.catalog import Catalog
|
|
807
|
+
|
|
808
|
+
cat = Catalog.get()
|
|
809
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
810
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
811
|
+
if self._tbl_version_path.is_snapshot():
|
|
812
|
+
raise excs.Error('Cannot drop column from a snapshot.')
|
|
813
|
+
col: Column = None
|
|
814
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
805
815
|
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
816
|
+
if isinstance(column, str):
|
|
817
|
+
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
818
|
+
if col is None:
|
|
819
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
820
|
+
raise excs.Error(f'Column {column!r} unknown')
|
|
821
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
822
|
+
return
|
|
823
|
+
col = self._tbl_version.get().cols_by_name[column]
|
|
824
|
+
else:
|
|
825
|
+
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
826
|
+
if not exists:
|
|
827
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
828
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
829
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
830
|
+
return
|
|
831
|
+
col = column.col
|
|
832
|
+
|
|
833
|
+
dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
|
|
834
|
+
if len(dependent_user_cols) > 0:
|
|
835
|
+
raise excs.Error(
|
|
836
|
+
f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
|
|
837
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
838
|
+
)
|
|
812
839
|
|
|
813
|
-
|
|
840
|
+
_ = self._get_views(recursive=True, include_snapshots=False)
|
|
814
841
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
815
842
|
# (transitive) views of this table.
|
|
816
843
|
dependent_stores = [
|
|
817
844
|
(view, store)
|
|
818
|
-
for view in (self, *self._get_views(recursive=True))
|
|
845
|
+
for view in (self, *self._get_views(recursive=True, include_snapshots=False))
|
|
819
846
|
for store in view._tbl_version.get().external_stores.values()
|
|
820
847
|
if col in store.get_local_columns()
|
|
821
848
|
]
|
|
@@ -847,7 +874,9 @@ class Table(SchemaObject):
|
|
|
847
874
|
>>> tbl = pxt.get_table('my_table')
|
|
848
875
|
... tbl.rename_column('col1', 'col2')
|
|
849
876
|
"""
|
|
850
|
-
|
|
877
|
+
from pixeltable.catalog import Catalog
|
|
878
|
+
|
|
879
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
851
880
|
self._tbl_version.get().rename_column(old_name, new_name)
|
|
852
881
|
|
|
853
882
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
@@ -858,7 +887,6 @@ class Table(SchemaObject):
|
|
|
858
887
|
A list of index information, each containing the index's
|
|
859
888
|
id, name, and the name of the column it indexes.
|
|
860
889
|
"""
|
|
861
|
-
assert not self._is_dropped
|
|
862
890
|
index_info = []
|
|
863
891
|
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
864
892
|
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
@@ -955,11 +983,13 @@ class Table(SchemaObject):
|
|
|
955
983
|
... image_embed=image_embedding_fn
|
|
956
984
|
... )
|
|
957
985
|
"""
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
986
|
+
from pixeltable.catalog import Catalog
|
|
987
|
+
|
|
988
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
989
|
+
if self._tbl_version_path.is_snapshot():
|
|
990
|
+
raise excs.Error('Cannot add an index to a snapshot')
|
|
991
|
+
col = self._resolve_column_parameter(column)
|
|
961
992
|
|
|
962
|
-
with Env.get().begin_xact():
|
|
963
993
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
964
994
|
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
965
995
|
# An index with the same name already exists.
|
|
@@ -1039,15 +1069,17 @@ class Table(SchemaObject):
|
|
|
1039
1069
|
>>> tbl = pxt.get_table('my_table')
|
|
1040
1070
|
... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
|
|
1041
1071
|
"""
|
|
1072
|
+
from pixeltable.catalog import Catalog
|
|
1073
|
+
|
|
1042
1074
|
if (column is None) == (idx_name is None):
|
|
1043
1075
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1044
1076
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1077
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1078
|
+
col: Column = None
|
|
1079
|
+
if idx_name is None:
|
|
1080
|
+
col = self._resolve_column_parameter(column)
|
|
1081
|
+
assert col is not None
|
|
1049
1082
|
|
|
1050
|
-
with Env.get().begin_xact():
|
|
1051
1083
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1052
1084
|
|
|
1053
1085
|
def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
|
|
@@ -1116,15 +1148,17 @@ class Table(SchemaObject):
|
|
|
1116
1148
|
... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
|
|
1117
1149
|
|
|
1118
1150
|
"""
|
|
1151
|
+
from pixeltable.catalog import Catalog
|
|
1152
|
+
|
|
1119
1153
|
if (column is None) == (idx_name is None):
|
|
1120
1154
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1121
1155
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1156
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1157
|
+
col: Column = None
|
|
1158
|
+
if idx_name is None:
|
|
1159
|
+
col = self._resolve_column_parameter(column)
|
|
1160
|
+
assert col is not None
|
|
1126
1161
|
|
|
1127
|
-
with Env.get().begin_xact():
|
|
1128
1162
|
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
1129
1163
|
|
|
1130
1164
|
def _drop_index(
|
|
@@ -1135,6 +1169,8 @@ class Table(SchemaObject):
|
|
|
1135
1169
|
_idx_class: Optional[type[index.IndexBase]] = None,
|
|
1136
1170
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1137
1171
|
) -> None:
|
|
1172
|
+
from pixeltable.catalog import Catalog
|
|
1173
|
+
|
|
1138
1174
|
if self._tbl_version_path.is_snapshot():
|
|
1139
1175
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
1140
1176
|
assert (col is None) != (idx_name is None)
|
|
@@ -1150,7 +1186,7 @@ class Table(SchemaObject):
|
|
|
1150
1186
|
else:
|
|
1151
1187
|
if col.tbl.id != self._tbl_version.id:
|
|
1152
1188
|
raise excs.Error(
|
|
1153
|
-
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.
|
|
1189
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name!r})'
|
|
1154
1190
|
)
|
|
1155
1191
|
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1156
1192
|
if _idx_class is not None:
|
|
@@ -1166,7 +1202,10 @@ class Table(SchemaObject):
|
|
|
1166
1202
|
idx_info = idx_info_list[0]
|
|
1167
1203
|
|
|
1168
1204
|
# Find out if anything depends on this index
|
|
1169
|
-
|
|
1205
|
+
val_col = idx_info.val_col
|
|
1206
|
+
dependent_user_cols = [
|
|
1207
|
+
c for c in Catalog.get().get_column_dependents(val_col.tbl.id, val_col.id) if c.name is not None
|
|
1208
|
+
]
|
|
1170
1209
|
if len(dependent_user_cols) > 0:
|
|
1171
1210
|
raise excs.Error(
|
|
1172
1211
|
f'Cannot drop index because the following columns depend on it:\n'
|
|
@@ -1299,7 +1338,11 @@ class Table(SchemaObject):
|
|
|
1299
1338
|
|
|
1300
1339
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1301
1340
|
"""
|
|
1302
|
-
|
|
1341
|
+
from pixeltable.catalog import Catalog
|
|
1342
|
+
|
|
1343
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1344
|
+
if self._tbl_version_path.is_snapshot():
|
|
1345
|
+
raise excs.Error('Cannot update a snapshot')
|
|
1303
1346
|
status = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1304
1347
|
FileCache.get().emit_eviction_warnings()
|
|
1305
1348
|
return status
|
|
@@ -1335,35 +1378,37 @@ class Table(SchemaObject):
|
|
|
1335
1378
|
[{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
|
|
1336
1379
|
if_not_exists='insert')
|
|
1337
1380
|
"""
|
|
1338
|
-
|
|
1339
|
-
raise excs.Error('Cannot update a snapshot')
|
|
1340
|
-
rows = list(rows)
|
|
1381
|
+
from pixeltable.catalog import Catalog
|
|
1341
1382
|
|
|
1342
|
-
|
|
1343
|
-
|
|
1383
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1384
|
+
if self._tbl_version_path.is_snapshot():
|
|
1385
|
+
raise excs.Error('Cannot update a snapshot')
|
|
1386
|
+
rows = list(rows)
|
|
1344
1387
|
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
rowids: list[tuple[int, ...]] = []
|
|
1348
|
-
if len(pk_col_names) == 0 and not has_rowid:
|
|
1349
|
-
raise excs.Error('Table must have primary key for batch update')
|
|
1388
|
+
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1389
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1350
1390
|
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
)
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
if
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1391
|
+
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1392
|
+
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
1393
|
+
rowids: list[tuple[int, ...]] = []
|
|
1394
|
+
if len(pk_col_names) == 0 and not has_rowid:
|
|
1395
|
+
raise excs.Error('Table must have primary key for batch update')
|
|
1396
|
+
|
|
1397
|
+
for row_spec in rows:
|
|
1398
|
+
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1399
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1400
|
+
)
|
|
1401
|
+
if has_rowid:
|
|
1402
|
+
# we expect the _rowid column to be present for each row
|
|
1403
|
+
assert _ROWID_COLUMN_NAME in row_spec
|
|
1404
|
+
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1405
|
+
else:
|
|
1406
|
+
col_names = {col.name for col in col_vals}
|
|
1407
|
+
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1408
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1409
|
+
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1410
|
+
row_updates.append(col_vals)
|
|
1365
1411
|
|
|
1366
|
-
with Env.get().begin_xact():
|
|
1367
1412
|
status = self._tbl_version.get().batch_update(
|
|
1368
1413
|
row_updates,
|
|
1369
1414
|
rowids,
|
|
@@ -1397,12 +1442,15 @@ class Table(SchemaObject):
|
|
|
1397
1442
|
.. warning::
|
|
1398
1443
|
This operation is irreversible.
|
|
1399
1444
|
"""
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
with
|
|
1445
|
+
from pixeltable.catalog import Catalog
|
|
1446
|
+
|
|
1447
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1448
|
+
if self._tbl_version_path.is_snapshot():
|
|
1449
|
+
raise excs.Error('Cannot revert a snapshot')
|
|
1403
1450
|
self._tbl_version.get().revert()
|
|
1451
|
+
# remove cached md in order to force a reload on the next operation
|
|
1452
|
+
self._tbl_version_path.clear_cached_md()
|
|
1404
1453
|
|
|
1405
|
-
@property
|
|
1406
1454
|
def external_stores(self) -> list[str]:
|
|
1407
1455
|
return list(self._tbl_version.get().external_stores.keys())
|
|
1408
1456
|
|
|
@@ -1410,12 +1458,16 @@ class Table(SchemaObject):
|
|
|
1410
1458
|
"""
|
|
1411
1459
|
Links the specified `ExternalStore` to this table.
|
|
1412
1460
|
"""
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1461
|
+
from pixeltable.catalog import Catalog
|
|
1462
|
+
|
|
1463
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1464
|
+
if self._tbl_version_path.is_snapshot():
|
|
1465
|
+
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1466
|
+
if store.name in self.external_stores():
|
|
1467
|
+
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1468
|
+
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1469
|
+
|
|
1470
|
+
store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
|
|
1419
1471
|
self._tbl_version.get().link_external_store(store)
|
|
1420
1472
|
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1421
1473
|
|
|
@@ -1437,24 +1489,33 @@ class Table(SchemaObject):
|
|
|
1437
1489
|
delete_external_data (bool): If `True`, then the external data store will also be deleted. WARNING: This
|
|
1438
1490
|
is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
|
|
1439
1491
|
"""
|
|
1440
|
-
|
|
1441
|
-
all_stores = self.external_stores
|
|
1442
|
-
|
|
1443
|
-
if stores is None:
|
|
1444
|
-
stores = all_stores
|
|
1445
|
-
elif isinstance(stores, str):
|
|
1446
|
-
stores = [stores]
|
|
1447
|
-
|
|
1448
|
-
# Validation
|
|
1449
|
-
if not ignore_errors:
|
|
1450
|
-
for store in stores:
|
|
1451
|
-
if store not in all_stores:
|
|
1452
|
-
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1492
|
+
from pixeltable.catalog import Catalog
|
|
1453
1493
|
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1494
|
+
if self._tbl_version_path.is_snapshot():
|
|
1495
|
+
return
|
|
1496
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1497
|
+
all_stores = self.external_stores()
|
|
1498
|
+
|
|
1499
|
+
if stores is None:
|
|
1500
|
+
stores = all_stores
|
|
1501
|
+
elif isinstance(stores, str):
|
|
1502
|
+
stores = [stores]
|
|
1503
|
+
|
|
1504
|
+
# Validation
|
|
1505
|
+
if not ignore_errors:
|
|
1506
|
+
for store_name in stores:
|
|
1507
|
+
if store_name not in all_stores:
|
|
1508
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store_name}')
|
|
1509
|
+
|
|
1510
|
+
for store_name in stores:
|
|
1511
|
+
store = self._tbl_version.get().external_stores[store_name]
|
|
1512
|
+
# get hold of the store's debug string before deleting it
|
|
1513
|
+
store_str = str(store)
|
|
1514
|
+
store.unlink(self._tbl_version.get()) # might call tbl_version.drop_columns()
|
|
1515
|
+
self._tbl_version.get().unlink_external_store(store)
|
|
1516
|
+
if delete_external_data and isinstance(store, pxt.io.external_store.Project):
|
|
1517
|
+
store.delete()
|
|
1518
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
|
|
1458
1519
|
|
|
1459
1520
|
def sync(
|
|
1460
1521
|
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
@@ -1468,20 +1529,26 @@ class Table(SchemaObject):
|
|
|
1468
1529
|
export_data: If `True`, data from this table will be exported to the external stores during synchronization.
|
|
1469
1530
|
import_data: If `True`, data from the external stores will be imported to this table during synchronization.
|
|
1470
1531
|
"""
|
|
1471
|
-
|
|
1472
|
-
all_stores = self.external_stores
|
|
1532
|
+
from pixeltable.catalog import Catalog
|
|
1473
1533
|
|
|
1474
|
-
if
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1534
|
+
if self._tbl_version_path.is_snapshot():
|
|
1535
|
+
return pxt.io.SyncStatus.empty()
|
|
1536
|
+
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1537
|
+
# have their updates propagated down the tree
|
|
1538
|
+
base_tv = self._tbl_version_path.get_tbl_versions()[-1]
|
|
1539
|
+
with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
|
|
1540
|
+
all_stores = self.external_stores()
|
|
1541
|
+
|
|
1542
|
+
if stores is None:
|
|
1543
|
+
stores = all_stores
|
|
1544
|
+
elif isinstance(stores, str):
|
|
1545
|
+
stores = [stores]
|
|
1478
1546
|
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1547
|
+
for store in stores:
|
|
1548
|
+
if store not in all_stores:
|
|
1549
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1482
1550
|
|
|
1483
|
-
|
|
1484
|
-
with Env.get().begin_xact():
|
|
1551
|
+
sync_status = pxt.io.SyncStatus.empty()
|
|
1485
1552
|
for store in stores:
|
|
1486
1553
|
store_obj = self._tbl_version.get().external_stores[store]
|
|
1487
1554
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
@@ -1490,7 +1557,7 @@ class Table(SchemaObject):
|
|
|
1490
1557
|
return sync_status
|
|
1491
1558
|
|
|
1492
1559
|
def __dir__(self) -> list[str]:
|
|
1493
|
-
return list(super().__dir__()) + list(self.
|
|
1560
|
+
return list(super().__dir__()) + list(self._get_schema().keys())
|
|
1494
1561
|
|
|
1495
1562
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1496
|
-
return list(self.
|
|
1563
|
+
return list(self._get_schema().keys())
|