pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -5,9 +5,9 @@ import builtins
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
8
9
|
|
|
9
10
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import pandas as pd
|
|
@@ -21,6 +21,7 @@ import pixeltable.exprs as exprs
|
|
|
21
21
|
import pixeltable.index as index
|
|
22
22
|
import pixeltable.metadata.schema as schema
|
|
23
23
|
import pixeltable.type_system as ts
|
|
24
|
+
from pixeltable.env import Env
|
|
24
25
|
|
|
25
26
|
from ..exprs import ColumnRef
|
|
26
27
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -37,6 +38,7 @@ from .globals import (
|
|
|
37
38
|
)
|
|
38
39
|
from .schema_object import SchemaObject
|
|
39
40
|
from .table_version import TableVersion
|
|
41
|
+
from .table_version_handle import TableVersionHandle
|
|
40
42
|
from .table_version_path import TableVersionPath
|
|
41
43
|
|
|
42
44
|
if TYPE_CHECKING:
|
|
@@ -56,29 +58,32 @@ class Table(SchemaObject):
|
|
|
56
58
|
# Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
57
59
|
# FileCache.emit_eviction_warnings() at the end of the operation.
|
|
58
60
|
|
|
61
|
+
_is_dropped: bool
|
|
62
|
+
__tbl_version_path: TableVersionPath
|
|
63
|
+
|
|
59
64
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
60
65
|
super().__init__(id, name, dir_id)
|
|
61
66
|
self._is_dropped = False
|
|
62
67
|
self.__tbl_version_path = tbl_version_path
|
|
63
68
|
|
|
64
|
-
@property
|
|
65
|
-
def _has_dependents(self) -> bool:
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
# @property
|
|
70
|
+
# def _has_dependents(self) -> bool:
|
|
71
|
+
# """Returns True if this table has any dependent views, or snapshots."""
|
|
72
|
+
# return len(self._get_views(recursive=False)) > 0
|
|
68
73
|
|
|
69
74
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
70
75
|
self._check_is_dropped()
|
|
71
76
|
super()._move(new_name, new_dir_id)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
77
|
+
conn = env.Env.get().conn
|
|
78
|
+
stmt = sql.text(
|
|
79
|
+
(
|
|
80
|
+
f'UPDATE {schema.Table.__table__} '
|
|
81
|
+
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
82
|
+
f" {schema.Table.md.name}['name'] = :new_name "
|
|
83
|
+
f'WHERE {schema.Table.id.name} = :id'
|
|
80
84
|
)
|
|
81
|
-
|
|
85
|
+
)
|
|
86
|
+
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
82
87
|
|
|
83
88
|
def get_metadata(self) -> dict[str, Any]:
|
|
84
89
|
"""
|
|
@@ -105,29 +110,29 @@ class Table(SchemaObject):
|
|
|
105
110
|
```
|
|
106
111
|
"""
|
|
107
112
|
self._check_is_dropped()
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
113
|
+
with env.Env.get().begin_xact():
|
|
114
|
+
md = super().get_metadata()
|
|
115
|
+
md['base'] = self._base._path() if self._base is not None else None
|
|
116
|
+
md['schema'] = self._schema
|
|
117
|
+
md['version'] = self._version
|
|
118
|
+
md['schema_version'] = self._tbl_version.get().schema_version
|
|
119
|
+
md['comment'] = self._comment
|
|
120
|
+
md['num_retained_versions'] = self._num_retained_versions
|
|
121
|
+
md['media_validation'] = self._media_validation.name.lower()
|
|
122
|
+
return md
|
|
117
123
|
|
|
118
124
|
@property
|
|
119
125
|
def _version(self) -> int:
|
|
120
126
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
121
|
-
return self._tbl_version.version
|
|
127
|
+
return self._tbl_version.get().version
|
|
122
128
|
|
|
123
129
|
@property
|
|
124
|
-
def _tbl_version(self) ->
|
|
130
|
+
def _tbl_version(self) -> TableVersionHandle:
|
|
125
131
|
"""Return TableVersion for just this table."""
|
|
126
132
|
return self._tbl_version_path.tbl_version
|
|
127
133
|
|
|
128
134
|
@property
|
|
129
135
|
def _tbl_version_path(self) -> TableVersionPath:
|
|
130
|
-
"""Return TableVersionPath for just this table."""
|
|
131
136
|
self._check_is_dropped()
|
|
132
137
|
return self.__tbl_version_path
|
|
133
138
|
|
|
@@ -140,7 +145,10 @@ class Table(SchemaObject):
|
|
|
140
145
|
|
|
141
146
|
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
142
147
|
"""Return a ColumnRef for the given name."""
|
|
143
|
-
|
|
148
|
+
col = self._tbl_version_path.get_column(name)
|
|
149
|
+
if col is None:
|
|
150
|
+
raise AttributeError(f'Column {name!r} unknown')
|
|
151
|
+
return ColumnRef(col)
|
|
144
152
|
|
|
145
153
|
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
146
154
|
"""Return a ColumnRef for the given name."""
|
|
@@ -158,14 +166,16 @@ class Table(SchemaObject):
|
|
|
158
166
|
A list of view paths.
|
|
159
167
|
"""
|
|
160
168
|
self._check_is_dropped()
|
|
161
|
-
|
|
169
|
+
with env.Env.get().begin_xact():
|
|
170
|
+
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
162
171
|
|
|
163
172
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
164
|
-
|
|
173
|
+
cat = catalog.Catalog.get()
|
|
174
|
+
view_ids = cat.get_views(self._id)
|
|
175
|
+
views = [cat.get_tbl(id) for id in view_ids]
|
|
165
176
|
if recursive:
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
return dependents
|
|
177
|
+
views.extend([t for view in views for t in view._get_views(recursive=True)])
|
|
178
|
+
return views
|
|
169
179
|
|
|
170
180
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
171
181
|
"""Return a DataFrame for this table."""
|
|
@@ -255,7 +265,7 @@ class Table(SchemaObject):
|
|
|
255
265
|
if self._tbl_version_path.base is None:
|
|
256
266
|
return None
|
|
257
267
|
base_id = self._tbl_version_path.base.tbl_version.id
|
|
258
|
-
return catalog.Catalog.get().
|
|
268
|
+
return catalog.Catalog.get().get_tbl(base_id)
|
|
259
269
|
|
|
260
270
|
@property
|
|
261
271
|
def _bases(self) -> list['Table']:
|
|
@@ -271,15 +281,15 @@ class Table(SchemaObject):
|
|
|
271
281
|
|
|
272
282
|
@property
|
|
273
283
|
def _comment(self) -> str:
|
|
274
|
-
return self._tbl_version.comment
|
|
284
|
+
return self._tbl_version.get().comment
|
|
275
285
|
|
|
276
286
|
@property
|
|
277
287
|
def _num_retained_versions(self):
|
|
278
|
-
return self._tbl_version.num_retained_versions
|
|
288
|
+
return self._tbl_version.get().num_retained_versions
|
|
279
289
|
|
|
280
290
|
@property
|
|
281
291
|
def _media_validation(self) -> MediaValidation:
|
|
282
|
-
return self._tbl_version.media_validation
|
|
292
|
+
return self._tbl_version.get().media_validation
|
|
283
293
|
|
|
284
294
|
def __repr__(self) -> str:
|
|
285
295
|
return self._descriptors().to_string()
|
|
@@ -307,9 +317,9 @@ class Table(SchemaObject):
|
|
|
307
317
|
def _title_descriptor(self) -> str:
|
|
308
318
|
title: str
|
|
309
319
|
if self._base is None:
|
|
310
|
-
title = f'Table\n{self._path!r}'
|
|
320
|
+
title = f'Table\n{self._path()!r}'
|
|
311
321
|
else:
|
|
312
|
-
title = f'View\n{self._path!r}'
|
|
322
|
+
title = f'View\n{self._path()!r}'
|
|
313
323
|
title += f'\n(of {self.__bases_to_desc()})'
|
|
314
324
|
return title
|
|
315
325
|
|
|
@@ -320,7 +330,7 @@ class Table(SchemaObject):
|
|
|
320
330
|
'Type': col.col_type._to_str(as_schema=True),
|
|
321
331
|
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else '',
|
|
322
332
|
}
|
|
323
|
-
for col in self.
|
|
333
|
+
for col in self._tbl_version_path.columns()
|
|
324
334
|
if columns is None or col.name in columns
|
|
325
335
|
)
|
|
326
336
|
|
|
@@ -328,15 +338,15 @@ class Table(SchemaObject):
|
|
|
328
338
|
bases = self._bases
|
|
329
339
|
assert len(bases) >= 1
|
|
330
340
|
if len(bases) <= 2:
|
|
331
|
-
return ', '.join(repr(b._path) for b in bases)
|
|
341
|
+
return ', '.join(repr(b._path()) for b in bases)
|
|
332
342
|
else:
|
|
333
|
-
return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
|
|
343
|
+
return f'{bases[0]._path()!r}, ..., {bases[-1]._path()!r}'
|
|
334
344
|
|
|
335
345
|
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
336
346
|
from pixeltable import index
|
|
337
347
|
|
|
338
348
|
pd_rows = []
|
|
339
|
-
for name, info in self._tbl_version.idxs_by_name.items():
|
|
349
|
+
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
340
350
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
341
351
|
display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
|
|
342
352
|
if info.idx.string_embed is not None and info.idx.image_embed is not None:
|
|
@@ -354,11 +364,16 @@ class Table(SchemaObject):
|
|
|
354
364
|
|
|
355
365
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
356
366
|
pd_rows = []
|
|
357
|
-
for name, store in self._tbl_version.external_stores.items():
|
|
367
|
+
for name, store in self._tbl_version.get().external_stores.items():
|
|
358
368
|
row = {'External Store': name, 'Type': type(store).__name__}
|
|
359
369
|
pd_rows.append(row)
|
|
360
370
|
return pd.DataFrame(pd_rows)
|
|
361
371
|
|
|
372
|
+
def ensure_md_loaded(self) -> None:
|
|
373
|
+
"""Ensure that table metadata is loaded."""
|
|
374
|
+
for col in self._tbl_version.get().cols_by_id.values():
|
|
375
|
+
_ = col.value_expr
|
|
376
|
+
|
|
362
377
|
def describe(self) -> None:
|
|
363
378
|
"""
|
|
364
379
|
Print the table schema.
|
|
@@ -373,15 +388,12 @@ class Table(SchemaObject):
|
|
|
373
388
|
|
|
374
389
|
def _drop(self) -> None:
|
|
375
390
|
cat = catalog.Catalog.get()
|
|
376
|
-
# verify all dependents are deleted by now
|
|
377
|
-
for dep in cat.tbl_dependents[self._id]:
|
|
378
|
-
assert dep._is_dropped
|
|
379
391
|
self._check_is_dropped()
|
|
380
|
-
self._tbl_version.drop()
|
|
392
|
+
self._tbl_version.get().drop()
|
|
381
393
|
self._is_dropped = True
|
|
382
394
|
# update catalog
|
|
383
395
|
cat = catalog.Catalog.get()
|
|
384
|
-
|
|
396
|
+
cat.remove_tbl(self._id)
|
|
385
397
|
|
|
386
398
|
# TODO Factor this out into a separate module.
|
|
387
399
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
@@ -406,7 +418,7 @@ class Table(SchemaObject):
|
|
|
406
418
|
return any(
|
|
407
419
|
col in store.get_local_columns()
|
|
408
420
|
for view in [self] + self._get_views(recursive=True)
|
|
409
|
-
for store in view._tbl_version.external_stores.values()
|
|
421
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
410
422
|
)
|
|
411
423
|
|
|
412
424
|
def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
|
|
@@ -424,12 +436,12 @@ class Table(SchemaObject):
|
|
|
424
436
|
elif if_exists == IfExistsParam.IGNORE:
|
|
425
437
|
cols_to_ignore.append(new_col_name)
|
|
426
438
|
elif if_exists == IfExistsParam.REPLACE or if_exists == IfExistsParam.REPLACE_FORCE:
|
|
427
|
-
if new_col_name not in self._tbl_version.cols_by_name:
|
|
439
|
+
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
428
440
|
# for views, it is possible that the existing column
|
|
429
441
|
# is a base table column; in that case, we should not
|
|
430
442
|
# drop/replace that column. Continue to raise error.
|
|
431
443
|
raise excs.Error(f'Column {new_col_name!r} is a base table column. Cannot replace it.')
|
|
432
|
-
col = self._tbl_version.cols_by_name[new_col_name]
|
|
444
|
+
col = self._tbl_version.get().cols_by_name[new_col_name]
|
|
433
445
|
# cannot drop a column with dependents; so reject
|
|
434
446
|
# replace directive if column has dependents.
|
|
435
447
|
if self._column_has_dependents(col):
|
|
@@ -437,7 +449,7 @@ class Table(SchemaObject):
|
|
|
437
449
|
f'Column {new_col_name!r} already exists and has dependents. Cannot {if_exists.name.lower()} it.'
|
|
438
450
|
)
|
|
439
451
|
self.drop_column(new_col_name)
|
|
440
|
-
assert new_col_name not in self._tbl_version.cols_by_name
|
|
452
|
+
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
441
453
|
return cols_to_ignore
|
|
442
454
|
|
|
443
455
|
def add_columns(
|
|
@@ -487,23 +499,25 @@ class Table(SchemaObject):
|
|
|
487
499
|
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
488
500
|
for col_name, spec in schema.items()
|
|
489
501
|
}
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
self.
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
502
|
+
|
|
503
|
+
with Env.get().begin_xact():
|
|
504
|
+
# handle existing columns based on if_exists parameter
|
|
505
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
506
|
+
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
507
|
+
)
|
|
508
|
+
# if all columns to be added already exist and user asked to ignore
|
|
509
|
+
# existing columns, there's nothing to do.
|
|
510
|
+
for cname in cols_to_ignore:
|
|
511
|
+
assert cname in col_schema
|
|
512
|
+
del col_schema[cname]
|
|
513
|
+
if len(col_schema) == 0:
|
|
514
|
+
return UpdateStatus()
|
|
515
|
+
new_cols = self._create_columns(col_schema)
|
|
516
|
+
for new_col in new_cols:
|
|
517
|
+
self._verify_column(new_col)
|
|
518
|
+
status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
519
|
+
FileCache.get().emit_eviction_warnings()
|
|
520
|
+
return status
|
|
507
521
|
|
|
508
522
|
def add_column(
|
|
509
523
|
self,
|
|
@@ -540,7 +554,7 @@ class Table(SchemaObject):
|
|
|
540
554
|
"""
|
|
541
555
|
self._check_is_dropped()
|
|
542
556
|
# verify kwargs
|
|
543
|
-
if self._tbl_version.is_snapshot:
|
|
557
|
+
if self._tbl_version.get().is_snapshot:
|
|
544
558
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
545
559
|
# verify kwargs and construct column schema dict
|
|
546
560
|
if len(kwargs) != 1:
|
|
@@ -617,21 +631,22 @@ class Table(SchemaObject):
|
|
|
617
631
|
if stored is not None:
|
|
618
632
|
col_schema['stored'] = stored
|
|
619
633
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
634
|
+
with Env.get().begin_xact():
|
|
635
|
+
# handle existing columns based on if_exists parameter
|
|
636
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
637
|
+
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
638
|
+
)
|
|
639
|
+
# if the column to add already exists and user asked to ignore
|
|
640
|
+
# exiting column, there's nothing to do.
|
|
641
|
+
if len(cols_to_ignore) != 0:
|
|
642
|
+
assert cols_to_ignore[0] == col_name
|
|
643
|
+
return UpdateStatus()
|
|
644
|
+
|
|
645
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
646
|
+
self._verify_column(new_col)
|
|
647
|
+
status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
648
|
+
FileCache.get().emit_eviction_warnings()
|
|
649
|
+
return status
|
|
635
650
|
|
|
636
651
|
@classmethod
|
|
637
652
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -791,7 +806,7 @@ class Table(SchemaObject):
|
|
|
791
806
|
raise excs.Error(f'Column {column!r} unknown')
|
|
792
807
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
793
808
|
return
|
|
794
|
-
col = self._tbl_version.cols_by_name[column]
|
|
809
|
+
col = self._tbl_version.get().cols_by_name[column]
|
|
795
810
|
else:
|
|
796
811
|
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
797
812
|
if not exists:
|
|
@@ -808,25 +823,26 @@ class Table(SchemaObject):
|
|
|
808
823
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
809
824
|
)
|
|
810
825
|
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
if len(dependent_stores) > 0:
|
|
820
|
-
dependent_store_names = [
|
|
821
|
-
store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
|
|
822
|
-
for view, store in dependent_stores
|
|
826
|
+
with Env.get().begin_xact():
|
|
827
|
+
# See if this column has a dependent store. We need to look through all stores in all
|
|
828
|
+
# (transitive) views of this table.
|
|
829
|
+
dependent_stores = [
|
|
830
|
+
(view, store)
|
|
831
|
+
for view in [self] + self._get_views(recursive=True)
|
|
832
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
833
|
+
if col in store.get_local_columns()
|
|
823
834
|
]
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
835
|
+
if len(dependent_stores) > 0:
|
|
836
|
+
dependent_store_names = [
|
|
837
|
+
store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
|
|
838
|
+
for view, store in dependent_stores
|
|
839
|
+
]
|
|
840
|
+
raise excs.Error(
|
|
841
|
+
f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
|
|
842
|
+
f'{", ".join(dependent_store_names)}'
|
|
843
|
+
)
|
|
828
844
|
|
|
829
|
-
|
|
845
|
+
self._tbl_version.get().drop_column(col)
|
|
830
846
|
|
|
831
847
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
832
848
|
"""Rename a column.
|
|
@@ -844,7 +860,8 @@ class Table(SchemaObject):
|
|
|
844
860
|
>>> tbl = pxt.get_table('my_table')
|
|
845
861
|
... tbl.rename_column('col1', 'col2')
|
|
846
862
|
"""
|
|
847
|
-
|
|
863
|
+
with Env.get().begin_xact():
|
|
864
|
+
self._tbl_version.get().rename_column(old_name, new_name)
|
|
848
865
|
|
|
849
866
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
850
867
|
"""
|
|
@@ -856,7 +873,7 @@ class Table(SchemaObject):
|
|
|
856
873
|
"""
|
|
857
874
|
assert not self._is_dropped
|
|
858
875
|
index_info = []
|
|
859
|
-
for idx_name, idx in self._tbl_version.idxs_by_name.items():
|
|
876
|
+
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
860
877
|
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
861
878
|
return index_info
|
|
862
879
|
|
|
@@ -960,26 +977,31 @@ class Table(SchemaObject):
|
|
|
960
977
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
961
978
|
col = column.col
|
|
962
979
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
980
|
+
with Env.get().begin_xact():
|
|
981
|
+
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
982
|
+
_if_exists = IfExistsParam.validated(if_exists, 'if_exists')
|
|
983
|
+
# An index with the same name already exists.
|
|
984
|
+
# Handle it according to if_exists.
|
|
985
|
+
if _if_exists == IfExistsParam.ERROR:
|
|
986
|
+
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
987
|
+
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
988
|
+
raise excs.Error(
|
|
989
|
+
f'Index `{idx_name}` is not an embedding index. Cannot {_if_exists.name.lower()} it.'
|
|
990
|
+
)
|
|
991
|
+
if _if_exists == IfExistsParam.IGNORE:
|
|
992
|
+
return
|
|
993
|
+
assert _if_exists == IfExistsParam.REPLACE or _if_exists == IfExistsParam.REPLACE_FORCE
|
|
994
|
+
self.drop_index(idx_name=idx_name)
|
|
995
|
+
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
996
|
+
from pixeltable.index import EmbeddingIndex
|
|
997
|
+
|
|
998
|
+
# create the EmbeddingIndex instance to verify args
|
|
999
|
+
idx = EmbeddingIndex(
|
|
1000
|
+
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
1001
|
+
)
|
|
1002
|
+
status = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
1003
|
+
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
1004
|
+
FileCache.get().emit_eviction_warnings()
|
|
983
1005
|
|
|
984
1006
|
def drop_embedding_index(
|
|
985
1007
|
self,
|
|
@@ -1043,7 +1065,9 @@ class Table(SchemaObject):
|
|
|
1043
1065
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
1044
1066
|
col = column.col
|
|
1045
1067
|
assert col is not None
|
|
1046
|
-
|
|
1068
|
+
|
|
1069
|
+
with Env.get().begin_xact():
|
|
1070
|
+
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1047
1071
|
|
|
1048
1072
|
def drop_index(
|
|
1049
1073
|
self,
|
|
@@ -1107,7 +1131,9 @@ class Table(SchemaObject):
|
|
|
1107
1131
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
1108
1132
|
col = column.col
|
|
1109
1133
|
assert col is not None
|
|
1110
|
-
|
|
1134
|
+
|
|
1135
|
+
with Env.get().begin_xact():
|
|
1136
|
+
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
1111
1137
|
|
|
1112
1138
|
def _drop_index(
|
|
1113
1139
|
self,
|
|
@@ -1123,18 +1149,18 @@ class Table(SchemaObject):
|
|
|
1123
1149
|
|
|
1124
1150
|
if idx_name is not None:
|
|
1125
1151
|
_if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1126
|
-
if idx_name not in self._tbl_version.idxs_by_name:
|
|
1152
|
+
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1127
1153
|
if _if_not_exists == IfNotExistsParam.ERROR:
|
|
1128
1154
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1129
1155
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
1130
1156
|
return
|
|
1131
|
-
idx_id = self._tbl_version.idxs_by_name[idx_name].id
|
|
1157
|
+
idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
|
|
1132
1158
|
else:
|
|
1133
1159
|
if col.tbl.id != self._tbl_version.id:
|
|
1134
1160
|
raise excs.Error(
|
|
1135
|
-
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)'
|
|
1161
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
|
|
1136
1162
|
)
|
|
1137
|
-
idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
1163
|
+
idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1138
1164
|
if _idx_class is not None:
|
|
1139
1165
|
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
1140
1166
|
if len(idx_info) == 0:
|
|
@@ -1146,7 +1172,7 @@ class Table(SchemaObject):
|
|
|
1146
1172
|
if len(idx_info) > 1:
|
|
1147
1173
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
1148
1174
|
idx_id = idx_info[0].id
|
|
1149
|
-
self._tbl_version.drop_index(idx_id)
|
|
1175
|
+
self._tbl_version.get().drop_index(idx_id)
|
|
1150
1176
|
|
|
1151
1177
|
@overload
|
|
1152
1178
|
def insert(
|
|
@@ -1260,9 +1286,10 @@ class Table(SchemaObject):
|
|
|
1260
1286
|
|
|
1261
1287
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1262
1288
|
"""
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1289
|
+
with Env.get().begin_xact():
|
|
1290
|
+
status = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1291
|
+
FileCache.get().emit_eviction_warnings()
|
|
1292
|
+
return status
|
|
1266
1293
|
|
|
1267
1294
|
def batch_update(
|
|
1268
1295
|
self,
|
|
@@ -1300,7 +1327,7 @@ class Table(SchemaObject):
|
|
|
1300
1327
|
rows = list(rows)
|
|
1301
1328
|
|
|
1302
1329
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1303
|
-
pk_col_names = set(c.name for c in self._tbl_version.primary_key_columns())
|
|
1330
|
+
pk_col_names = set(c.name for c in self._tbl_version.get().primary_key_columns())
|
|
1304
1331
|
|
|
1305
1332
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1306
1333
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
@@ -1309,7 +1336,9 @@ class Table(SchemaObject):
|
|
|
1309
1336
|
raise excs.Error('Table must have primary key for batch update')
|
|
1310
1337
|
|
|
1311
1338
|
for row_spec in rows:
|
|
1312
|
-
col_vals = self._tbl_version._validate_update_spec(
|
|
1339
|
+
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1340
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False
|
|
1341
|
+
)
|
|
1313
1342
|
if has_rowid:
|
|
1314
1343
|
# we expect the _rowid column to be present for each row
|
|
1315
1344
|
assert _ROWID_COLUMN_NAME in row_spec
|
|
@@ -1320,15 +1349,17 @@ class Table(SchemaObject):
|
|
|
1320
1349
|
missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
|
|
1321
1350
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1322
1351
|
row_updates.append(col_vals)
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1352
|
+
|
|
1353
|
+
with Env.get().begin_xact():
|
|
1354
|
+
status = self._tbl_version.get().batch_update(
|
|
1355
|
+
row_updates,
|
|
1356
|
+
rowids,
|
|
1357
|
+
error_if_not_exists=if_not_exists == 'error',
|
|
1358
|
+
insert_if_not_exists=if_not_exists == 'insert',
|
|
1359
|
+
cascade=cascade,
|
|
1360
|
+
)
|
|
1361
|
+
FileCache.get().emit_eviction_warnings()
|
|
1362
|
+
return status
|
|
1332
1363
|
|
|
1333
1364
|
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
1334
1365
|
"""Delete rows in this table.
|
|
@@ -1355,23 +1386,25 @@ class Table(SchemaObject):
|
|
|
1355
1386
|
"""
|
|
1356
1387
|
if self._tbl_version_path.is_snapshot():
|
|
1357
1388
|
raise excs.Error('Cannot revert a snapshot')
|
|
1358
|
-
|
|
1389
|
+
with Env.get().begin_xact():
|
|
1390
|
+
self._tbl_version.get().revert()
|
|
1359
1391
|
|
|
1360
1392
|
@property
|
|
1361
1393
|
def external_stores(self) -> list[str]:
|
|
1362
|
-
return list(self._tbl_version.external_stores.keys())
|
|
1394
|
+
return list(self._tbl_version.get().external_stores.keys())
|
|
1363
1395
|
|
|
1364
1396
|
def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
|
|
1365
1397
|
"""
|
|
1366
1398
|
Links the specified `ExternalStore` to this table.
|
|
1367
1399
|
"""
|
|
1368
|
-
if self._tbl_version.is_snapshot:
|
|
1400
|
+
if self._tbl_version.get().is_snapshot:
|
|
1369
1401
|
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1370
1402
|
if store.name in self.external_stores:
|
|
1371
1403
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1372
1404
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1373
|
-
|
|
1374
|
-
|
|
1405
|
+
with Env.get().begin_xact():
|
|
1406
|
+
self._tbl_version.get().link_external_store(store)
|
|
1407
|
+
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1375
1408
|
|
|
1376
1409
|
def unlink_external_stores(
|
|
1377
1410
|
self,
|
|
@@ -1405,9 +1438,10 @@ class Table(SchemaObject):
|
|
|
1405
1438
|
if store not in all_stores:
|
|
1406
1439
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1407
1440
|
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1441
|
+
with Env.get().begin_xact():
|
|
1442
|
+
for store in stores:
|
|
1443
|
+
self._tbl_version.get().unlink_external_store(store, delete_external_data=delete_external_data)
|
|
1444
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store}')
|
|
1411
1445
|
|
|
1412
1446
|
def sync(
|
|
1413
1447
|
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
@@ -1434,10 +1468,11 @@ class Table(SchemaObject):
|
|
|
1434
1468
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1435
1469
|
|
|
1436
1470
|
sync_status = pxt.io.SyncStatus.empty()
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1471
|
+
with Env.get().begin_xact():
|
|
1472
|
+
for store in stores:
|
|
1473
|
+
store_obj = self._tbl_version.get().external_stores[store]
|
|
1474
|
+
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
1475
|
+
sync_status = sync_status.combine(store_sync_status)
|
|
1441
1476
|
|
|
1442
1477
|
return sync_status
|
|
1443
1478
|
|