pixeltable 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +21 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +195 -158
- pixeltable/catalog/table_version.py +187 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +90 -90
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/column_ref.py +9 -9
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +4 -4
- pixeltable/exprs/expr.py +20 -5
- pixeltable/exprs/function_call.py +98 -58
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +15 -15
- pixeltable/exprs/rowid_ref.py +21 -7
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/function.py +38 -6
- pixeltable/func/query_template_function.py +3 -6
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +26 -23
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/packager.py +12 -9
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/filecache.py +2 -1
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/METADATA +1 -1
- pixeltable-0.3.7.dist-info/RECORD +174 -0
- pixeltable-0.3.5.dist-info/RECORD +0 -172
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -5,9 +5,9 @@ import builtins
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
8
9
|
|
|
9
10
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import pandas as pd
|
|
@@ -21,6 +21,7 @@ import pixeltable.exprs as exprs
|
|
|
21
21
|
import pixeltable.index as index
|
|
22
22
|
import pixeltable.metadata.schema as schema
|
|
23
23
|
import pixeltable.type_system as ts
|
|
24
|
+
from pixeltable.env import Env
|
|
24
25
|
|
|
25
26
|
from ..exprs import ColumnRef
|
|
26
27
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -37,6 +38,7 @@ from .globals import (
|
|
|
37
38
|
)
|
|
38
39
|
from .schema_object import SchemaObject
|
|
39
40
|
from .table_version import TableVersion
|
|
41
|
+
from .table_version_handle import TableVersionHandle
|
|
40
42
|
from .table_version_path import TableVersionPath
|
|
41
43
|
|
|
42
44
|
if TYPE_CHECKING:
|
|
@@ -56,29 +58,32 @@ class Table(SchemaObject):
|
|
|
56
58
|
# Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
57
59
|
# FileCache.emit_eviction_warnings() at the end of the operation.
|
|
58
60
|
|
|
61
|
+
_is_dropped: bool
|
|
62
|
+
__tbl_version_path: TableVersionPath
|
|
63
|
+
|
|
59
64
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
60
65
|
super().__init__(id, name, dir_id)
|
|
61
66
|
self._is_dropped = False
|
|
62
67
|
self.__tbl_version_path = tbl_version_path
|
|
63
68
|
|
|
64
|
-
@property
|
|
65
|
-
def _has_dependents(self) -> bool:
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
# @property
|
|
70
|
+
# def _has_dependents(self) -> bool:
|
|
71
|
+
# """Returns True if this table has any dependent views, or snapshots."""
|
|
72
|
+
# return len(self._get_views(recursive=False)) > 0
|
|
68
73
|
|
|
69
74
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
70
75
|
self._check_is_dropped()
|
|
71
76
|
super()._move(new_name, new_dir_id)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
77
|
+
conn = env.Env.get().conn
|
|
78
|
+
stmt = sql.text(
|
|
79
|
+
(
|
|
80
|
+
f'UPDATE {schema.Table.__table__} '
|
|
81
|
+
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
82
|
+
f" {schema.Table.md.name}['name'] = :new_name "
|
|
83
|
+
f'WHERE {schema.Table.id.name} = :id'
|
|
80
84
|
)
|
|
81
|
-
|
|
85
|
+
)
|
|
86
|
+
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
82
87
|
|
|
83
88
|
def get_metadata(self) -> dict[str, Any]:
|
|
84
89
|
"""
|
|
@@ -105,29 +110,29 @@ class Table(SchemaObject):
|
|
|
105
110
|
```
|
|
106
111
|
"""
|
|
107
112
|
self._check_is_dropped()
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
113
|
+
with env.Env.get().begin_xact():
|
|
114
|
+
md = super().get_metadata()
|
|
115
|
+
md['base'] = self._base._path() if self._base is not None else None
|
|
116
|
+
md['schema'] = self._schema
|
|
117
|
+
md['version'] = self._version
|
|
118
|
+
md['schema_version'] = self._tbl_version.get().schema_version
|
|
119
|
+
md['comment'] = self._comment
|
|
120
|
+
md['num_retained_versions'] = self._num_retained_versions
|
|
121
|
+
md['media_validation'] = self._media_validation.name.lower()
|
|
122
|
+
return md
|
|
117
123
|
|
|
118
124
|
@property
|
|
119
125
|
def _version(self) -> int:
|
|
120
126
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
121
|
-
return self._tbl_version.version
|
|
127
|
+
return self._tbl_version.get().version
|
|
122
128
|
|
|
123
129
|
@property
|
|
124
|
-
def _tbl_version(self) ->
|
|
130
|
+
def _tbl_version(self) -> TableVersionHandle:
|
|
125
131
|
"""Return TableVersion for just this table."""
|
|
126
132
|
return self._tbl_version_path.tbl_version
|
|
127
133
|
|
|
128
134
|
@property
|
|
129
135
|
def _tbl_version_path(self) -> TableVersionPath:
|
|
130
|
-
"""Return TableVersionPath for just this table."""
|
|
131
136
|
self._check_is_dropped()
|
|
132
137
|
return self.__tbl_version_path
|
|
133
138
|
|
|
@@ -140,7 +145,10 @@ class Table(SchemaObject):
|
|
|
140
145
|
|
|
141
146
|
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
142
147
|
"""Return a ColumnRef for the given name."""
|
|
143
|
-
|
|
148
|
+
col = self._tbl_version_path.get_column(name)
|
|
149
|
+
if col is None:
|
|
150
|
+
raise AttributeError(f'Column {name!r} unknown')
|
|
151
|
+
return ColumnRef(col)
|
|
144
152
|
|
|
145
153
|
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
146
154
|
"""Return a ColumnRef for the given name."""
|
|
@@ -158,14 +166,16 @@ class Table(SchemaObject):
|
|
|
158
166
|
A list of view paths.
|
|
159
167
|
"""
|
|
160
168
|
self._check_is_dropped()
|
|
161
|
-
|
|
169
|
+
with env.Env.get().begin_xact():
|
|
170
|
+
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
162
171
|
|
|
163
172
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
164
|
-
|
|
173
|
+
cat = catalog.Catalog.get()
|
|
174
|
+
view_ids = cat.get_views(self._id)
|
|
175
|
+
views = [cat.get_tbl(id) for id in view_ids]
|
|
165
176
|
if recursive:
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
return dependents
|
|
177
|
+
views.extend([t for view in views for t in view._get_views(recursive=True)])
|
|
178
|
+
return views
|
|
169
179
|
|
|
170
180
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
171
181
|
"""Return a DataFrame for this table."""
|
|
@@ -255,7 +265,7 @@ class Table(SchemaObject):
|
|
|
255
265
|
if self._tbl_version_path.base is None:
|
|
256
266
|
return None
|
|
257
267
|
base_id = self._tbl_version_path.base.tbl_version.id
|
|
258
|
-
return catalog.Catalog.get().
|
|
268
|
+
return catalog.Catalog.get().get_tbl(base_id)
|
|
259
269
|
|
|
260
270
|
@property
|
|
261
271
|
def _bases(self) -> list['Table']:
|
|
@@ -271,15 +281,15 @@ class Table(SchemaObject):
|
|
|
271
281
|
|
|
272
282
|
@property
|
|
273
283
|
def _comment(self) -> str:
|
|
274
|
-
return self._tbl_version.comment
|
|
284
|
+
return self._tbl_version.get().comment
|
|
275
285
|
|
|
276
286
|
@property
|
|
277
287
|
def _num_retained_versions(self):
|
|
278
|
-
return self._tbl_version.num_retained_versions
|
|
288
|
+
return self._tbl_version.get().num_retained_versions
|
|
279
289
|
|
|
280
290
|
@property
|
|
281
291
|
def _media_validation(self) -> MediaValidation:
|
|
282
|
-
return self._tbl_version.media_validation
|
|
292
|
+
return self._tbl_version.get().media_validation
|
|
283
293
|
|
|
284
294
|
def __repr__(self) -> str:
|
|
285
295
|
return self._descriptors().to_string()
|
|
@@ -307,9 +317,9 @@ class Table(SchemaObject):
|
|
|
307
317
|
def _title_descriptor(self) -> str:
|
|
308
318
|
title: str
|
|
309
319
|
if self._base is None:
|
|
310
|
-
title = f'Table\n{self._path!r}'
|
|
320
|
+
title = f'Table\n{self._path()!r}'
|
|
311
321
|
else:
|
|
312
|
-
title = f'View\n{self._path!r}'
|
|
322
|
+
title = f'View\n{self._path()!r}'
|
|
313
323
|
title += f'\n(of {self.__bases_to_desc()})'
|
|
314
324
|
return title
|
|
315
325
|
|
|
@@ -320,7 +330,7 @@ class Table(SchemaObject):
|
|
|
320
330
|
'Type': col.col_type._to_str(as_schema=True),
|
|
321
331
|
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else '',
|
|
322
332
|
}
|
|
323
|
-
for col in self.
|
|
333
|
+
for col in self._tbl_version_path.columns()
|
|
324
334
|
if columns is None or col.name in columns
|
|
325
335
|
)
|
|
326
336
|
|
|
@@ -328,15 +338,15 @@ class Table(SchemaObject):
|
|
|
328
338
|
bases = self._bases
|
|
329
339
|
assert len(bases) >= 1
|
|
330
340
|
if len(bases) <= 2:
|
|
331
|
-
return ', '.join(repr(b._path) for b in bases)
|
|
341
|
+
return ', '.join(repr(b._path()) for b in bases)
|
|
332
342
|
else:
|
|
333
|
-
return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
|
|
343
|
+
return f'{bases[0]._path()!r}, ..., {bases[-1]._path()!r}'
|
|
334
344
|
|
|
335
345
|
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
336
346
|
from pixeltable import index
|
|
337
347
|
|
|
338
348
|
pd_rows = []
|
|
339
|
-
for name, info in self._tbl_version.idxs_by_name.items():
|
|
349
|
+
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
340
350
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
341
351
|
display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
|
|
342
352
|
if info.idx.string_embed is not None and info.idx.image_embed is not None:
|
|
@@ -354,11 +364,16 @@ class Table(SchemaObject):
|
|
|
354
364
|
|
|
355
365
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
356
366
|
pd_rows = []
|
|
357
|
-
for name, store in self._tbl_version.external_stores.items():
|
|
367
|
+
for name, store in self._tbl_version.get().external_stores.items():
|
|
358
368
|
row = {'External Store': name, 'Type': type(store).__name__}
|
|
359
369
|
pd_rows.append(row)
|
|
360
370
|
return pd.DataFrame(pd_rows)
|
|
361
371
|
|
|
372
|
+
def ensure_md_loaded(self) -> None:
|
|
373
|
+
"""Ensure that table metadata is loaded."""
|
|
374
|
+
for col in self._tbl_version.get().cols_by_id.values():
|
|
375
|
+
_ = col.value_expr
|
|
376
|
+
|
|
362
377
|
def describe(self) -> None:
|
|
363
378
|
"""
|
|
364
379
|
Print the table schema.
|
|
@@ -373,15 +388,12 @@ class Table(SchemaObject):
|
|
|
373
388
|
|
|
374
389
|
def _drop(self) -> None:
|
|
375
390
|
cat = catalog.Catalog.get()
|
|
376
|
-
# verify all dependents are deleted by now
|
|
377
|
-
for dep in cat.tbl_dependents[self._id]:
|
|
378
|
-
assert dep._is_dropped
|
|
379
391
|
self._check_is_dropped()
|
|
380
|
-
self._tbl_version.drop()
|
|
392
|
+
self._tbl_version.get().drop()
|
|
381
393
|
self._is_dropped = True
|
|
382
394
|
# update catalog
|
|
383
395
|
cat = catalog.Catalog.get()
|
|
384
|
-
|
|
396
|
+
cat.remove_tbl(self._id)
|
|
385
397
|
|
|
386
398
|
# TODO Factor this out into a separate module.
|
|
387
399
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
@@ -406,7 +418,7 @@ class Table(SchemaObject):
|
|
|
406
418
|
return any(
|
|
407
419
|
col in store.get_local_columns()
|
|
408
420
|
for view in [self] + self._get_views(recursive=True)
|
|
409
|
-
for store in view._tbl_version.external_stores.values()
|
|
421
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
410
422
|
)
|
|
411
423
|
|
|
412
424
|
def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
|
|
@@ -424,12 +436,12 @@ class Table(SchemaObject):
|
|
|
424
436
|
elif if_exists == IfExistsParam.IGNORE:
|
|
425
437
|
cols_to_ignore.append(new_col_name)
|
|
426
438
|
elif if_exists == IfExistsParam.REPLACE or if_exists == IfExistsParam.REPLACE_FORCE:
|
|
427
|
-
if new_col_name not in self._tbl_version.cols_by_name:
|
|
439
|
+
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
428
440
|
# for views, it is possible that the existing column
|
|
429
441
|
# is a base table column; in that case, we should not
|
|
430
442
|
# drop/replace that column. Continue to raise error.
|
|
431
443
|
raise excs.Error(f'Column {new_col_name!r} is a base table column. Cannot replace it.')
|
|
432
|
-
col = self._tbl_version.cols_by_name[new_col_name]
|
|
444
|
+
col = self._tbl_version.get().cols_by_name[new_col_name]
|
|
433
445
|
# cannot drop a column with dependents; so reject
|
|
434
446
|
# replace directive if column has dependents.
|
|
435
447
|
if self._column_has_dependents(col):
|
|
@@ -437,7 +449,7 @@ class Table(SchemaObject):
|
|
|
437
449
|
f'Column {new_col_name!r} already exists and has dependents. Cannot {if_exists.name.lower()} it.'
|
|
438
450
|
)
|
|
439
451
|
self.drop_column(new_col_name)
|
|
440
|
-
assert new_col_name not in self._tbl_version.cols_by_name
|
|
452
|
+
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
441
453
|
return cols_to_ignore
|
|
442
454
|
|
|
443
455
|
def add_columns(
|
|
@@ -487,23 +499,25 @@ class Table(SchemaObject):
|
|
|
487
499
|
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
488
500
|
for col_name, spec in schema.items()
|
|
489
501
|
}
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
self.
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
502
|
+
|
|
503
|
+
with Env.get().begin_xact():
|
|
504
|
+
# handle existing columns based on if_exists parameter
|
|
505
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
506
|
+
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
507
|
+
)
|
|
508
|
+
# if all columns to be added already exist and user asked to ignore
|
|
509
|
+
# existing columns, there's nothing to do.
|
|
510
|
+
for cname in cols_to_ignore:
|
|
511
|
+
assert cname in col_schema
|
|
512
|
+
del col_schema[cname]
|
|
513
|
+
if len(col_schema) == 0:
|
|
514
|
+
return UpdateStatus()
|
|
515
|
+
new_cols = self._create_columns(col_schema)
|
|
516
|
+
for new_col in new_cols:
|
|
517
|
+
self._verify_column(new_col)
|
|
518
|
+
status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
519
|
+
FileCache.get().emit_eviction_warnings()
|
|
520
|
+
return status
|
|
507
521
|
|
|
508
522
|
def add_column(
|
|
509
523
|
self,
|
|
@@ -540,7 +554,7 @@ class Table(SchemaObject):
|
|
|
540
554
|
"""
|
|
541
555
|
self._check_is_dropped()
|
|
542
556
|
# verify kwargs
|
|
543
|
-
if self._tbl_version.is_snapshot:
|
|
557
|
+
if self._tbl_version.get().is_snapshot:
|
|
544
558
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
545
559
|
# verify kwargs and construct column schema dict
|
|
546
560
|
if len(kwargs) != 1:
|
|
@@ -617,21 +631,22 @@ class Table(SchemaObject):
|
|
|
617
631
|
if stored is not None:
|
|
618
632
|
col_schema['stored'] = stored
|
|
619
633
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
634
|
+
with Env.get().begin_xact():
|
|
635
|
+
# handle existing columns based on if_exists parameter
|
|
636
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
637
|
+
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
638
|
+
)
|
|
639
|
+
# if the column to add already exists and user asked to ignore
|
|
640
|
+
# exiting column, there's nothing to do.
|
|
641
|
+
if len(cols_to_ignore) != 0:
|
|
642
|
+
assert cols_to_ignore[0] == col_name
|
|
643
|
+
return UpdateStatus()
|
|
644
|
+
|
|
645
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
646
|
+
self._verify_column(new_col)
|
|
647
|
+
status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
648
|
+
FileCache.get().emit_eviction_warnings()
|
|
649
|
+
return status
|
|
635
650
|
|
|
636
651
|
@classmethod
|
|
637
652
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -682,6 +697,7 @@ class Table(SchemaObject):
|
|
|
682
697
|
elif isinstance(spec, exprs.Expr):
|
|
683
698
|
# create copy so we can modify it
|
|
684
699
|
value_expr = spec.copy()
|
|
700
|
+
value_expr.bind_rel_paths()
|
|
685
701
|
elif isinstance(spec, dict):
|
|
686
702
|
cls._validate_column_spec(name, spec)
|
|
687
703
|
if 'type' in spec:
|
|
@@ -692,6 +708,7 @@ class Table(SchemaObject):
|
|
|
692
708
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
693
709
|
# create copy so we can modify it
|
|
694
710
|
value_expr = value_expr.copy()
|
|
711
|
+
value_expr.bind_rel_paths()
|
|
695
712
|
stored = spec.get('stored', True)
|
|
696
713
|
primary_key = spec.get('primary_key')
|
|
697
714
|
media_validation_str = spec.get('media_validation')
|
|
@@ -789,7 +806,7 @@ class Table(SchemaObject):
|
|
|
789
806
|
raise excs.Error(f'Column {column!r} unknown')
|
|
790
807
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
791
808
|
return
|
|
792
|
-
col = self._tbl_version.cols_by_name[column]
|
|
809
|
+
col = self._tbl_version.get().cols_by_name[column]
|
|
793
810
|
else:
|
|
794
811
|
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
795
812
|
if not exists:
|
|
@@ -806,25 +823,26 @@ class Table(SchemaObject):
|
|
|
806
823
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
807
824
|
)
|
|
808
825
|
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
if len(dependent_stores) > 0:
|
|
818
|
-
dependent_store_names = [
|
|
819
|
-
store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
|
|
820
|
-
for view, store in dependent_stores
|
|
826
|
+
with Env.get().begin_xact():
|
|
827
|
+
# See if this column has a dependent store. We need to look through all stores in all
|
|
828
|
+
# (transitive) views of this table.
|
|
829
|
+
dependent_stores = [
|
|
830
|
+
(view, store)
|
|
831
|
+
for view in [self] + self._get_views(recursive=True)
|
|
832
|
+
for store in view._tbl_version.get().external_stores.values()
|
|
833
|
+
if col in store.get_local_columns()
|
|
821
834
|
]
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
835
|
+
if len(dependent_stores) > 0:
|
|
836
|
+
dependent_store_names = [
|
|
837
|
+
store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
|
|
838
|
+
for view, store in dependent_stores
|
|
839
|
+
]
|
|
840
|
+
raise excs.Error(
|
|
841
|
+
f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
|
|
842
|
+
f'{", ".join(dependent_store_names)}'
|
|
843
|
+
)
|
|
826
844
|
|
|
827
|
-
|
|
845
|
+
self._tbl_version.get().drop_column(col)
|
|
828
846
|
|
|
829
847
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
830
848
|
"""Rename a column.
|
|
@@ -842,7 +860,8 @@ class Table(SchemaObject):
|
|
|
842
860
|
>>> tbl = pxt.get_table('my_table')
|
|
843
861
|
... tbl.rename_column('col1', 'col2')
|
|
844
862
|
"""
|
|
845
|
-
|
|
863
|
+
with Env.get().begin_xact():
|
|
864
|
+
self._tbl_version.get().rename_column(old_name, new_name)
|
|
846
865
|
|
|
847
866
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
848
867
|
"""
|
|
@@ -854,7 +873,7 @@ class Table(SchemaObject):
|
|
|
854
873
|
"""
|
|
855
874
|
assert not self._is_dropped
|
|
856
875
|
index_info = []
|
|
857
|
-
for idx_name, idx in self._tbl_version.idxs_by_name.items():
|
|
876
|
+
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
858
877
|
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
859
878
|
return index_info
|
|
860
879
|
|
|
@@ -958,26 +977,31 @@ class Table(SchemaObject):
|
|
|
958
977
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
959
978
|
col = column.col
|
|
960
979
|
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
980
|
+
with Env.get().begin_xact():
|
|
981
|
+
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
982
|
+
_if_exists = IfExistsParam.validated(if_exists, 'if_exists')
|
|
983
|
+
# An index with the same name already exists.
|
|
984
|
+
# Handle it according to if_exists.
|
|
985
|
+
if _if_exists == IfExistsParam.ERROR:
|
|
986
|
+
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
987
|
+
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
988
|
+
raise excs.Error(
|
|
989
|
+
f'Index `{idx_name}` is not an embedding index. Cannot {_if_exists.name.lower()} it.'
|
|
990
|
+
)
|
|
991
|
+
if _if_exists == IfExistsParam.IGNORE:
|
|
992
|
+
return
|
|
993
|
+
assert _if_exists == IfExistsParam.REPLACE or _if_exists == IfExistsParam.REPLACE_FORCE
|
|
994
|
+
self.drop_index(idx_name=idx_name)
|
|
995
|
+
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
996
|
+
from pixeltable.index import EmbeddingIndex
|
|
997
|
+
|
|
998
|
+
# create the EmbeddingIndex instance to verify args
|
|
999
|
+
idx = EmbeddingIndex(
|
|
1000
|
+
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
1001
|
+
)
|
|
1002
|
+
status = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
1003
|
+
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
1004
|
+
FileCache.get().emit_eviction_warnings()
|
|
981
1005
|
|
|
982
1006
|
def drop_embedding_index(
|
|
983
1007
|
self,
|
|
@@ -1041,7 +1065,9 @@ class Table(SchemaObject):
|
|
|
1041
1065
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
1042
1066
|
col = column.col
|
|
1043
1067
|
assert col is not None
|
|
1044
|
-
|
|
1068
|
+
|
|
1069
|
+
with Env.get().begin_xact():
|
|
1070
|
+
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1045
1071
|
|
|
1046
1072
|
def drop_index(
|
|
1047
1073
|
self,
|
|
@@ -1105,7 +1131,9 @@ class Table(SchemaObject):
|
|
|
1105
1131
|
self.__check_column_ref_exists(column, include_bases=True)
|
|
1106
1132
|
col = column.col
|
|
1107
1133
|
assert col is not None
|
|
1108
|
-
|
|
1134
|
+
|
|
1135
|
+
with Env.get().begin_xact():
|
|
1136
|
+
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
1109
1137
|
|
|
1110
1138
|
def _drop_index(
|
|
1111
1139
|
self,
|
|
@@ -1121,18 +1149,18 @@ class Table(SchemaObject):
|
|
|
1121
1149
|
|
|
1122
1150
|
if idx_name is not None:
|
|
1123
1151
|
_if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1124
|
-
if idx_name not in self._tbl_version.idxs_by_name:
|
|
1152
|
+
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1125
1153
|
if _if_not_exists == IfNotExistsParam.ERROR:
|
|
1126
1154
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1127
1155
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
1128
1156
|
return
|
|
1129
|
-
idx_id = self._tbl_version.idxs_by_name[idx_name].id
|
|
1157
|
+
idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
|
|
1130
1158
|
else:
|
|
1131
1159
|
if col.tbl.id != self._tbl_version.id:
|
|
1132
1160
|
raise excs.Error(
|
|
1133
|
-
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)'
|
|
1161
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
|
|
1134
1162
|
)
|
|
1135
|
-
idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
1163
|
+
idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1136
1164
|
if _idx_class is not None:
|
|
1137
1165
|
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
1138
1166
|
if len(idx_info) == 0:
|
|
@@ -1144,7 +1172,7 @@ class Table(SchemaObject):
|
|
|
1144
1172
|
if len(idx_info) > 1:
|
|
1145
1173
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
1146
1174
|
idx_id = idx_info[0].id
|
|
1147
|
-
self._tbl_version.drop_index(idx_id)
|
|
1175
|
+
self._tbl_version.get().drop_index(idx_id)
|
|
1148
1176
|
|
|
1149
1177
|
@overload
|
|
1150
1178
|
def insert(
|
|
@@ -1258,9 +1286,10 @@ class Table(SchemaObject):
|
|
|
1258
1286
|
|
|
1259
1287
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1260
1288
|
"""
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1289
|
+
with Env.get().begin_xact():
|
|
1290
|
+
status = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1291
|
+
FileCache.get().emit_eviction_warnings()
|
|
1292
|
+
return status
|
|
1264
1293
|
|
|
1265
1294
|
def batch_update(
|
|
1266
1295
|
self,
|
|
@@ -1298,7 +1327,7 @@ class Table(SchemaObject):
|
|
|
1298
1327
|
rows = list(rows)
|
|
1299
1328
|
|
|
1300
1329
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1301
|
-
pk_col_names = set(c.name for c in self._tbl_version.primary_key_columns())
|
|
1330
|
+
pk_col_names = set(c.name for c in self._tbl_version.get().primary_key_columns())
|
|
1302
1331
|
|
|
1303
1332
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1304
1333
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
@@ -1307,7 +1336,9 @@ class Table(SchemaObject):
|
|
|
1307
1336
|
raise excs.Error('Table must have primary key for batch update')
|
|
1308
1337
|
|
|
1309
1338
|
for row_spec in rows:
|
|
1310
|
-
col_vals = self._tbl_version._validate_update_spec(
|
|
1339
|
+
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1340
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False
|
|
1341
|
+
)
|
|
1311
1342
|
if has_rowid:
|
|
1312
1343
|
# we expect the _rowid column to be present for each row
|
|
1313
1344
|
assert _ROWID_COLUMN_NAME in row_spec
|
|
@@ -1318,15 +1349,17 @@ class Table(SchemaObject):
|
|
|
1318
1349
|
missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
|
|
1319
1350
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1320
1351
|
row_updates.append(col_vals)
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1352
|
+
|
|
1353
|
+
with Env.get().begin_xact():
|
|
1354
|
+
status = self._tbl_version.get().batch_update(
|
|
1355
|
+
row_updates,
|
|
1356
|
+
rowids,
|
|
1357
|
+
error_if_not_exists=if_not_exists == 'error',
|
|
1358
|
+
insert_if_not_exists=if_not_exists == 'insert',
|
|
1359
|
+
cascade=cascade,
|
|
1360
|
+
)
|
|
1361
|
+
FileCache.get().emit_eviction_warnings()
|
|
1362
|
+
return status
|
|
1330
1363
|
|
|
1331
1364
|
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
1332
1365
|
"""Delete rows in this table.
|
|
@@ -1353,23 +1386,25 @@ class Table(SchemaObject):
|
|
|
1353
1386
|
"""
|
|
1354
1387
|
if self._tbl_version_path.is_snapshot():
|
|
1355
1388
|
raise excs.Error('Cannot revert a snapshot')
|
|
1356
|
-
|
|
1389
|
+
with Env.get().begin_xact():
|
|
1390
|
+
self._tbl_version.get().revert()
|
|
1357
1391
|
|
|
1358
1392
|
@property
|
|
1359
1393
|
def external_stores(self) -> list[str]:
|
|
1360
|
-
return list(self._tbl_version.external_stores.keys())
|
|
1394
|
+
return list(self._tbl_version.get().external_stores.keys())
|
|
1361
1395
|
|
|
1362
1396
|
def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
|
|
1363
1397
|
"""
|
|
1364
1398
|
Links the specified `ExternalStore` to this table.
|
|
1365
1399
|
"""
|
|
1366
|
-
if self._tbl_version.is_snapshot:
|
|
1400
|
+
if self._tbl_version.get().is_snapshot:
|
|
1367
1401
|
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1368
1402
|
if store.name in self.external_stores:
|
|
1369
1403
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1370
1404
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1371
|
-
|
|
1372
|
-
|
|
1405
|
+
with Env.get().begin_xact():
|
|
1406
|
+
self._tbl_version.get().link_external_store(store)
|
|
1407
|
+
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1373
1408
|
|
|
1374
1409
|
def unlink_external_stores(
|
|
1375
1410
|
self,
|
|
@@ -1403,9 +1438,10 @@ class Table(SchemaObject):
|
|
|
1403
1438
|
if store not in all_stores:
|
|
1404
1439
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1405
1440
|
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1441
|
+
with Env.get().begin_xact():
|
|
1442
|
+
for store in stores:
|
|
1443
|
+
self._tbl_version.get().unlink_external_store(store, delete_external_data=delete_external_data)
|
|
1444
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store}')
|
|
1409
1445
|
|
|
1410
1446
|
def sync(
|
|
1411
1447
|
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
@@ -1432,10 +1468,11 @@ class Table(SchemaObject):
|
|
|
1432
1468
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1433
1469
|
|
|
1434
1470
|
sync_status = pxt.io.SyncStatus.empty()
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1471
|
+
with Env.get().begin_xact():
|
|
1472
|
+
for store in stores:
|
|
1473
|
+
store_obj = self._tbl_version.get().external_stores[store]
|
|
1474
|
+
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
1475
|
+
sync_status = sync_status.combine(store_sync_status)
|
|
1439
1476
|
|
|
1440
1477
|
return sync_status
|
|
1441
1478
|
|