pixeltable 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -27
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +309 -59
- pixeltable/catalog/globals.py +5 -5
- pixeltable/catalog/insertable_table.py +13 -1
- pixeltable/catalog/path.py +13 -6
- pixeltable/catalog/table.py +28 -41
- pixeltable/catalog/table_version.py +100 -72
- pixeltable/catalog/view.py +35 -9
- pixeltable/dataframe.py +2 -2
- pixeltable/exceptions.py +20 -2
- pixeltable/exec/expr_eval/evaluators.py +0 -4
- pixeltable/exec/expr_eval/expr_eval_node.py +0 -1
- pixeltable/exec/sql_node.py +3 -3
- pixeltable/exprs/json_path.py +1 -5
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +1 -1
- pixeltable/func/expr_template_function.py +2 -2
- pixeltable/func/function.py +3 -4
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +1 -1
- pixeltable/functions/bedrock.py +130 -0
- pixeltable/functions/huggingface.py +7 -6
- pixeltable/functions/image.py +15 -16
- pixeltable/functions/mistralai.py +3 -2
- pixeltable/functions/openai.py +9 -8
- pixeltable/functions/together.py +4 -3
- pixeltable/globals.py +7 -2
- pixeltable/io/datarows.py +4 -3
- pixeltable/io/label_studio.py +17 -17
- pixeltable/io/pandas.py +13 -12
- pixeltable/io/table_data_conduit.py +8 -2
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +9 -25
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +7 -4
- pixeltable/utils/exception_handler.py +59 -0
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/METADATA +1 -1
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/RECORD +53 -48
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/path.py
CHANGED
|
@@ -11,8 +11,8 @@ _logger = logging.getLogger('pixeltable')
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Path:
|
|
14
|
-
def __init__(self, path: str, empty_is_valid: bool = False):
|
|
15
|
-
if not is_valid_path(path, empty_is_valid):
|
|
14
|
+
def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
|
|
15
|
+
if not is_valid_path(path, empty_is_valid, allow_system_paths):
|
|
16
16
|
raise excs.Error(f"Invalid path format: '{path}'")
|
|
17
17
|
self.components = path.split('.')
|
|
18
18
|
|
|
@@ -29,21 +29,25 @@ class Path:
|
|
|
29
29
|
def is_root(self) -> bool:
|
|
30
30
|
return not self.components[0]
|
|
31
31
|
|
|
32
|
+
@property
|
|
33
|
+
def is_system_path(self) -> bool:
|
|
34
|
+
return self.components[0].startswith('_')
|
|
35
|
+
|
|
32
36
|
@property
|
|
33
37
|
def parent(self) -> Path:
|
|
34
38
|
if len(self.components) == 1:
|
|
35
39
|
if self.is_root:
|
|
36
40
|
return self
|
|
37
41
|
else:
|
|
38
|
-
return Path('', empty_is_valid=True)
|
|
42
|
+
return Path('', empty_is_valid=True, allow_system_paths=True)
|
|
39
43
|
else:
|
|
40
|
-
return Path('.'.join(self.components[:-1]))
|
|
44
|
+
return Path('.'.join(self.components[:-1]), allow_system_paths=True)
|
|
41
45
|
|
|
42
46
|
def append(self, name: str) -> Path:
|
|
43
47
|
if self.is_root:
|
|
44
|
-
return Path(name)
|
|
48
|
+
return Path(name, allow_system_paths=True)
|
|
45
49
|
else:
|
|
46
|
-
return Path(f'{self
|
|
50
|
+
return Path(f'{self}.{name}', allow_system_paths=True)
|
|
47
51
|
|
|
48
52
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
49
53
|
"""
|
|
@@ -67,6 +71,9 @@ class Path:
|
|
|
67
71
|
for i in range(0, len(self.components)):
|
|
68
72
|
yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
|
|
69
73
|
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
return repr(str(self))
|
|
76
|
+
|
|
70
77
|
def __str__(self) -> str:
|
|
71
78
|
return '.'.join(self.components)
|
|
72
79
|
|
pixeltable/catalog/table.py
CHANGED
|
@@ -95,6 +95,7 @@ class Table(SchemaObject):
|
|
|
95
95
|
'col1': StringType(),
|
|
96
96
|
'col2': IntType(),
|
|
97
97
|
},
|
|
98
|
+
'is_replica': False,
|
|
98
99
|
'version': 22,
|
|
99
100
|
'schema_version': 1,
|
|
100
101
|
'comment': '',
|
|
@@ -108,8 +109,9 @@ class Table(SchemaObject):
|
|
|
108
109
|
self._check_is_dropped()
|
|
109
110
|
with env.Env.get().begin_xact():
|
|
110
111
|
md = super().get_metadata()
|
|
111
|
-
md['base'] = self.
|
|
112
|
+
md['base'] = self._base_table._path() if self._base_table is not None else None
|
|
112
113
|
md['schema'] = self._schema
|
|
114
|
+
md['is_replica'] = self._tbl_version.get().is_replica
|
|
113
115
|
md['version'] = self._version
|
|
114
116
|
md['schema_version'] = self._tbl_version.get().schema_version
|
|
115
117
|
md['comment'] = self._comment
|
|
@@ -139,14 +141,14 @@ class Table(SchemaObject):
|
|
|
139
141
|
if self._is_dropped:
|
|
140
142
|
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
141
143
|
|
|
142
|
-
def __getattr__(self, name: str) -> '
|
|
144
|
+
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
143
145
|
"""Return a ColumnRef for the given name."""
|
|
144
146
|
col = self._tbl_version_path.get_column(name)
|
|
145
147
|
if col is None:
|
|
146
148
|
raise AttributeError(f'Column {name!r} unknown')
|
|
147
149
|
return ColumnRef(col)
|
|
148
150
|
|
|
149
|
-
def __getitem__(self, name: str) -> '
|
|
151
|
+
def __getitem__(self, name: str) -> 'exprs.ColumnRef':
|
|
150
152
|
"""Return a ColumnRef for the given name."""
|
|
151
153
|
return getattr(self, name)
|
|
152
154
|
|
|
@@ -253,28 +255,27 @@ class Table(SchemaObject):
|
|
|
253
255
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
254
256
|
|
|
255
257
|
@property
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
The base
|
|
259
|
-
|
|
260
|
-
"""
|
|
261
|
-
if self._tbl_version_path.base is None:
|
|
262
|
-
return None
|
|
263
|
-
base_id = self._tbl_version_path.base.tbl_version.id
|
|
264
|
-
return catalog.Catalog.get().get_table_by_id(base_id)
|
|
258
|
+
@abc.abstractmethod
|
|
259
|
+
def _base_table(self) -> Optional['Table']:
|
|
260
|
+
"""The base's Table instance"""
|
|
261
|
+
...
|
|
265
262
|
|
|
266
263
|
@property
|
|
267
|
-
def
|
|
268
|
-
"""
|
|
269
|
-
The ancestor list of bases of this table, starting with its immediate base.
|
|
270
|
-
"""
|
|
264
|
+
def _base_tables(self) -> list['Table']:
|
|
265
|
+
"""The ancestor list of bases of this table, starting with its immediate base."""
|
|
271
266
|
bases = []
|
|
272
|
-
base = self.
|
|
267
|
+
base = self._base_table
|
|
273
268
|
while base is not None:
|
|
274
269
|
bases.append(base)
|
|
275
|
-
base = base.
|
|
270
|
+
base = base._base_table
|
|
276
271
|
return bases
|
|
277
272
|
|
|
273
|
+
@property
|
|
274
|
+
@abc.abstractmethod
|
|
275
|
+
def _effective_base_versions(self) -> list[Optional[int]]:
|
|
276
|
+
"""The effective versions of the ancestor bases, starting with its immediate base."""
|
|
277
|
+
...
|
|
278
|
+
|
|
278
279
|
@property
|
|
279
280
|
def _comment(self) -> str:
|
|
280
281
|
return self._tbl_version.get().comment
|
|
@@ -298,7 +299,7 @@ class Table(SchemaObject):
|
|
|
298
299
|
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
299
300
|
"""
|
|
300
301
|
helper = DescriptionHelper()
|
|
301
|
-
helper.append(self.
|
|
302
|
+
helper.append(self._table_descriptor())
|
|
302
303
|
helper.append(self._col_descriptor())
|
|
303
304
|
idxs = self._index_descriptor()
|
|
304
305
|
if not idxs.empty:
|
|
@@ -310,14 +311,8 @@ class Table(SchemaObject):
|
|
|
310
311
|
helper.append(f'COMMENT: {self._comment}')
|
|
311
312
|
return helper
|
|
312
313
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
if self._base is None:
|
|
316
|
-
title = f'Table\n{self._path()!r}'
|
|
317
|
-
else:
|
|
318
|
-
title = f'View\n{self._path()!r}'
|
|
319
|
-
title += f'\n(of {self.__bases_to_desc()})'
|
|
320
|
-
return title
|
|
314
|
+
@abc.abstractmethod
|
|
315
|
+
def _table_descriptor(self) -> str: ...
|
|
321
316
|
|
|
322
317
|
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
323
318
|
return pd.DataFrame(
|
|
@@ -330,14 +325,6 @@ class Table(SchemaObject):
|
|
|
330
325
|
if columns is None or col.name in columns
|
|
331
326
|
)
|
|
332
327
|
|
|
333
|
-
def __bases_to_desc(self) -> str:
|
|
334
|
-
bases = self._bases
|
|
335
|
-
assert len(bases) >= 1
|
|
336
|
-
if len(bases) <= 2:
|
|
337
|
-
return ', '.join(repr(b._path()) for b in bases)
|
|
338
|
-
else:
|
|
339
|
-
return f'{bases[0]._path()!r}, ..., {bases[-1]._path()!r}'
|
|
340
|
-
|
|
341
328
|
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
342
329
|
from pixeltable import index
|
|
343
330
|
|
|
@@ -371,9 +358,9 @@ class Table(SchemaObject):
|
|
|
371
358
|
"""
|
|
372
359
|
self._check_is_dropped()
|
|
373
360
|
if getattr(builtins, '__IPYTHON__', False):
|
|
374
|
-
from IPython.display import display
|
|
361
|
+
from IPython.display import Markdown, display
|
|
375
362
|
|
|
376
|
-
display(self._repr_html_())
|
|
363
|
+
display(Markdown(self._repr_html_()))
|
|
377
364
|
else:
|
|
378
365
|
print(repr(self))
|
|
379
366
|
|
|
@@ -689,7 +676,7 @@ class Table(SchemaObject):
|
|
|
689
676
|
for name, spec in schema.items():
|
|
690
677
|
col_type: Optional[ts.ColumnType] = None
|
|
691
678
|
value_expr: Optional[exprs.Expr] = None
|
|
692
|
-
primary_key:
|
|
679
|
+
primary_key: bool = False
|
|
693
680
|
media_validation: Optional[catalog.MediaValidation] = None
|
|
694
681
|
stored = True
|
|
695
682
|
|
|
@@ -711,7 +698,7 @@ class Table(SchemaObject):
|
|
|
711
698
|
value_expr = value_expr.copy()
|
|
712
699
|
value_expr.bind_rel_paths()
|
|
713
700
|
stored = spec.get('stored', True)
|
|
714
|
-
primary_key = spec.get('primary_key')
|
|
701
|
+
primary_key = spec.get('primary_key', False)
|
|
715
702
|
media_validation_str = spec.get('media_validation')
|
|
716
703
|
media_validation = (
|
|
717
704
|
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
|
|
@@ -1282,7 +1269,7 @@ class Table(SchemaObject):
|
|
|
1282
1269
|
raise NotImplementedError
|
|
1283
1270
|
|
|
1284
1271
|
def update(
|
|
1285
|
-
self, value_spec: dict[str, Any], where: Optional['
|
|
1272
|
+
self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
|
|
1286
1273
|
) -> UpdateStatus:
|
|
1287
1274
|
"""Update rows in this table.
|
|
1288
1275
|
|
|
@@ -1383,7 +1370,7 @@ class Table(SchemaObject):
|
|
|
1383
1370
|
FileCache.get().emit_eviction_warnings()
|
|
1384
1371
|
return status
|
|
1385
1372
|
|
|
1386
|
-
def delete(self, where: Optional['
|
|
1373
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
1387
1374
|
"""Delete rows in this table.
|
|
1388
1375
|
|
|
1389
1376
|
Args:
|
|
@@ -5,7 +5,7 @@ import importlib
|
|
|
5
5
|
import logging
|
|
6
6
|
import time
|
|
7
7
|
import uuid
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
11
|
import jsonschema.exceptions
|
|
@@ -18,6 +18,7 @@ from pixeltable import exprs, index
|
|
|
18
18
|
from pixeltable.env import Env
|
|
19
19
|
from pixeltable.iterators import ComponentIterator
|
|
20
20
|
from pixeltable.metadata import schema
|
|
21
|
+
from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
21
22
|
from pixeltable.utils.filecache import FileCache
|
|
22
23
|
from pixeltable.utils.media_store import MediaStore
|
|
23
24
|
|
|
@@ -55,6 +56,7 @@ class TableVersion:
|
|
|
55
56
|
name: str
|
|
56
57
|
user: Optional[str]
|
|
57
58
|
effective_version: Optional[int]
|
|
59
|
+
is_replica: bool
|
|
58
60
|
version: int
|
|
59
61
|
comment: str
|
|
60
62
|
media_validation: MediaValidation
|
|
@@ -111,6 +113,7 @@ class TableVersion:
|
|
|
111
113
|
self.user = tbl_md.user
|
|
112
114
|
self.effective_version = effective_version
|
|
113
115
|
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
116
|
+
self.is_replica = tbl_md.is_replica
|
|
114
117
|
self.comment = schema_version_md.comment
|
|
115
118
|
self.num_retained_versions = schema_version_md.num_retained_versions
|
|
116
119
|
self.schema_version = schema_version_md.schema_version
|
|
@@ -232,6 +235,7 @@ class TableVersion:
|
|
|
232
235
|
tbl_id=str(tbl_id),
|
|
233
236
|
name=name,
|
|
234
237
|
user=user,
|
|
238
|
+
is_replica=False,
|
|
235
239
|
current_version=0,
|
|
236
240
|
current_schema_version=0,
|
|
237
241
|
next_col_id=len(cols),
|
|
@@ -310,24 +314,16 @@ class TableVersion:
|
|
|
310
314
|
session.add(schema_version_record)
|
|
311
315
|
return tbl_record.id, tbl_version
|
|
312
316
|
|
|
313
|
-
@classmethod
|
|
314
|
-
def delete_md(cls, tbl_id: UUID) -> None:
|
|
315
|
-
conn = Env.get().conn
|
|
316
|
-
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
317
|
-
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
318
|
-
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
319
|
-
|
|
320
317
|
def drop(self) -> None:
|
|
318
|
+
from .catalog import Catalog
|
|
319
|
+
|
|
320
|
+
cat = Catalog.get()
|
|
321
321
|
# delete this table and all associated data
|
|
322
322
|
MediaStore.delete(self.id)
|
|
323
323
|
FileCache.get().clear(tbl_id=self.id)
|
|
324
|
-
|
|
324
|
+
cat.delete_tbl_md(self.id)
|
|
325
325
|
self.store_tbl.drop()
|
|
326
|
-
|
|
327
326
|
# de-register table version from catalog
|
|
328
|
-
from .catalog import Catalog
|
|
329
|
-
|
|
330
|
-
cat = Catalog.get()
|
|
331
327
|
cat.remove_tbl_version(self)
|
|
332
328
|
|
|
333
329
|
def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
@@ -381,7 +377,7 @@ class TableVersion:
|
|
|
381
377
|
|
|
382
378
|
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
383
379
|
# this guarantees that references always point backwards
|
|
384
|
-
if col_md.value_expr is not None:
|
|
380
|
+
if not self.is_snapshot and col_md.value_expr is not None:
|
|
385
381
|
self._record_refd_columns(col)
|
|
386
382
|
|
|
387
383
|
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
@@ -437,29 +433,15 @@ class TableVersion:
|
|
|
437
433
|
specified preceding schema version
|
|
438
434
|
"""
|
|
439
435
|
assert update_tbl_version or preceding_schema_version is None
|
|
436
|
+
from pixeltable.catalog import Catalog
|
|
440
437
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
.where(schema.Table.id == self.id)
|
|
438
|
+
tbl_md = self._create_tbl_md()
|
|
439
|
+
version_md = self._create_version_md(timestamp) if update_tbl_version else None
|
|
440
|
+
schema_version_md = (
|
|
441
|
+
self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
|
|
446
442
|
)
|
|
447
443
|
|
|
448
|
-
|
|
449
|
-
version_md = self._create_version_md(timestamp)
|
|
450
|
-
conn.execute(
|
|
451
|
-
sql.insert(schema.TableVersion.__table__).values(
|
|
452
|
-
tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)
|
|
453
|
-
)
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
if preceding_schema_version is not None:
|
|
457
|
-
schema_version_md = self._create_schema_version_md(preceding_schema_version)
|
|
458
|
-
conn.execute(
|
|
459
|
-
sql.insert(schema.TableSchemaVersion.__table__).values(
|
|
460
|
-
tbl_id=self.id, schema_version=self.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
461
|
-
)
|
|
462
|
-
)
|
|
444
|
+
Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
|
|
463
445
|
|
|
464
446
|
def ensure_md_loaded(self) -> None:
|
|
465
447
|
"""Ensure that table metadata is loaded."""
|
|
@@ -480,33 +462,36 @@ class TableVersion:
|
|
|
480
462
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
481
463
|
return status
|
|
482
464
|
|
|
483
|
-
def
|
|
484
|
-
"""Add a B-tree index on this column if it has a compatible type"""
|
|
465
|
+
def _is_btree_indexable(self, col: Column) -> bool:
|
|
485
466
|
if not col.stored:
|
|
486
467
|
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
487
|
-
return
|
|
468
|
+
return False
|
|
488
469
|
# Skip index for stored media columns produced by an iterator
|
|
489
470
|
if col.col_type.is_media_type() and self.is_iterator_column(col):
|
|
490
|
-
return
|
|
471
|
+
return False
|
|
491
472
|
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
492
473
|
# wrong type for a B-tree
|
|
493
|
-
return
|
|
494
|
-
if col.col_type.is_bool_type():
|
|
474
|
+
return False
|
|
475
|
+
if col.col_type.is_bool_type(): # noqa : SIM103 Supress `Return the negated condition directly` check
|
|
495
476
|
# B-trees on bools aren't useful
|
|
477
|
+
return False
|
|
478
|
+
return True
|
|
479
|
+
|
|
480
|
+
def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
|
|
481
|
+
"""Add a B-tree index on this column if it has a compatible type"""
|
|
482
|
+
if not self._is_btree_indexable(col):
|
|
496
483
|
return None
|
|
497
484
|
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
|
|
498
485
|
return status
|
|
499
486
|
|
|
500
|
-
def
|
|
487
|
+
def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
|
|
488
|
+
"""Create value and undo columns for the given index.
|
|
489
|
+
Args:
|
|
490
|
+
idx: index for which columns will be created.
|
|
491
|
+
Returns:
|
|
492
|
+
A tuple containing the value column and the undo column.
|
|
493
|
+
"""
|
|
501
494
|
assert not self.is_snapshot
|
|
502
|
-
idx_id = self.next_idx_id
|
|
503
|
-
self.next_idx_id += 1
|
|
504
|
-
if idx_name is None:
|
|
505
|
-
idx_name = f'idx{idx_id}'
|
|
506
|
-
else:
|
|
507
|
-
assert is_valid_identifier(idx_name)
|
|
508
|
-
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
509
|
-
|
|
510
495
|
# add the index value and undo columns (which need to be nullable)
|
|
511
496
|
val_col = Column(
|
|
512
497
|
col_id=self.next_col_id,
|
|
@@ -535,7 +520,19 @@ class TableVersion:
|
|
|
535
520
|
undo_col.tbl = self.create_handle()
|
|
536
521
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
537
522
|
self.next_col_id += 1
|
|
523
|
+
return val_col, undo_col
|
|
538
524
|
|
|
525
|
+
def _create_index(
|
|
526
|
+
self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
|
|
527
|
+
) -> None:
|
|
528
|
+
"""Create the given index along with index md"""
|
|
529
|
+
idx_id = self.next_idx_id
|
|
530
|
+
self.next_idx_id += 1
|
|
531
|
+
if idx_name is None:
|
|
532
|
+
idx_name = f'idx{idx_id}'
|
|
533
|
+
else:
|
|
534
|
+
assert is_valid_identifier(idx_name)
|
|
535
|
+
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
539
536
|
# create and register the index metadata
|
|
540
537
|
idx_cls = type(idx)
|
|
541
538
|
idx_md = schema.IndexMd(
|
|
@@ -553,14 +550,27 @@ class TableVersion:
|
|
|
553
550
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
554
551
|
self.idx_md[idx_id] = idx_md
|
|
555
552
|
self.idxs_by_name[idx_name] = idx_info
|
|
553
|
+
try:
|
|
554
|
+
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
555
|
+
finally:
|
|
556
556
|
|
|
557
|
+
def cleanup_index() -> None:
|
|
558
|
+
"""Delete the newly added in-memory index structure"""
|
|
559
|
+
del self.idxs_by_name[idx_name]
|
|
560
|
+
del self.idx_md[idx_id]
|
|
561
|
+
self.next_idx_id = idx_id
|
|
562
|
+
|
|
563
|
+
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
564
|
+
run_cleanup_on_exception(cleanup_index)
|
|
565
|
+
|
|
566
|
+
def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
567
|
+
val_col, undo_vol = self._create_index_columns(idx)
|
|
557
568
|
# add the columns and update the metadata
|
|
558
569
|
# TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
|
|
559
570
|
# with the database operations
|
|
560
|
-
status = self._add_columns([val_col,
|
|
571
|
+
status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
|
|
561
572
|
# now create the index structure
|
|
562
|
-
|
|
563
|
-
|
|
573
|
+
self._create_index(col, val_col, undo_vol, idx_name, idx)
|
|
564
574
|
return status
|
|
565
575
|
|
|
566
576
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -601,9 +611,21 @@ class TableVersion:
|
|
|
601
611
|
self.version += 1
|
|
602
612
|
preceding_schema_version = self.schema_version
|
|
603
613
|
self.schema_version = self.version
|
|
604
|
-
|
|
614
|
+
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
615
|
+
all_cols: list[Column] = []
|
|
605
616
|
for col in cols:
|
|
606
|
-
|
|
617
|
+
all_cols.append(col)
|
|
618
|
+
if self._is_btree_indexable(col):
|
|
619
|
+
idx = index.BtreeIndex(col)
|
|
620
|
+
val_col, undo_col = self._create_index_columns(idx)
|
|
621
|
+
index_cols[col] = (idx, val_col, undo_col)
|
|
622
|
+
all_cols.append(val_col)
|
|
623
|
+
all_cols.append(undo_col)
|
|
624
|
+
# Add all columns
|
|
625
|
+
status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
|
|
626
|
+
# Create indices and their mds
|
|
627
|
+
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
628
|
+
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
607
629
|
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
608
630
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
609
631
|
|
|
@@ -619,9 +641,9 @@ class TableVersion:
|
|
|
619
641
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
620
642
|
) -> UpdateStatus:
|
|
621
643
|
"""Add and populate columns within the current transaction"""
|
|
622
|
-
|
|
644
|
+
cols_to_add = list(cols)
|
|
623
645
|
row_count = self.store_tbl.count()
|
|
624
|
-
for col in
|
|
646
|
+
for col in cols_to_add:
|
|
625
647
|
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
626
648
|
raise excs.Error(
|
|
627
649
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
@@ -629,7 +651,8 @@ class TableVersion:
|
|
|
629
651
|
|
|
630
652
|
num_excs = 0
|
|
631
653
|
cols_with_excs: list[Column] = []
|
|
632
|
-
for col in
|
|
654
|
+
for col in cols_to_add:
|
|
655
|
+
excs_per_col = 0
|
|
633
656
|
col.schema_version_add = self.schema_version
|
|
634
657
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
635
658
|
# because it might be referenced by the next column's value_expr
|
|
@@ -652,29 +675,32 @@ class TableVersion:
|
|
|
652
675
|
|
|
653
676
|
plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
|
|
654
677
|
plan.ctx.num_rows = row_count
|
|
655
|
-
|
|
656
678
|
try:
|
|
657
679
|
plan.open()
|
|
658
680
|
try:
|
|
659
|
-
|
|
681
|
+
excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
|
|
660
682
|
except sql.exc.DBAPIError as exc:
|
|
661
683
|
# Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
|
|
662
684
|
raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
|
|
663
|
-
if
|
|
685
|
+
if excs_per_col > 0:
|
|
664
686
|
cols_with_excs.append(col)
|
|
665
|
-
|
|
666
|
-
self.cols.pop()
|
|
667
|
-
for c in cols:
|
|
668
|
-
# remove columns that we already added
|
|
669
|
-
if c.id not in self.cols_by_id:
|
|
670
|
-
continue
|
|
671
|
-
if c.name is not None:
|
|
672
|
-
del self.cols_by_name[c.name]
|
|
673
|
-
del self.cols_by_id[c.id]
|
|
674
|
-
# we need to re-initialize the sqlalchemy schema
|
|
675
|
-
self.store_tbl.create_sa_tbl()
|
|
676
|
-
raise exc
|
|
687
|
+
num_excs += excs_per_col
|
|
677
688
|
finally:
|
|
689
|
+
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
690
|
+
def cleanup_on_error() -> None:
|
|
691
|
+
"""Delete columns that are added as part of current add_columns operation and re-initialize
|
|
692
|
+
the sqlalchemy schema"""
|
|
693
|
+
self.cols = [col for col in self.cols if col not in cols_to_add]
|
|
694
|
+
for col in cols_to_add:
|
|
695
|
+
# remove columns that we already added
|
|
696
|
+
if col.id in self.cols_by_id:
|
|
697
|
+
del self.cols_by_id[col.id]
|
|
698
|
+
if col.name is not None and col.name in self.cols_by_name:
|
|
699
|
+
del self.cols_by_name[col.name]
|
|
700
|
+
self.store_tbl.create_sa_tbl()
|
|
701
|
+
|
|
702
|
+
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
703
|
+
run_cleanup_on_exception(cleanup_on_error)
|
|
678
704
|
plan.close()
|
|
679
705
|
|
|
680
706
|
if print_stats:
|
|
@@ -1298,6 +1324,7 @@ class TableVersion:
|
|
|
1298
1324
|
column_md: dict[int, schema.ColumnMd] = {}
|
|
1299
1325
|
for col in cols:
|
|
1300
1326
|
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1327
|
+
assert col.is_pk is not None
|
|
1301
1328
|
column_md[col.id] = schema.ColumnMd(
|
|
1302
1329
|
id=col.id,
|
|
1303
1330
|
col_type=col.col_type.as_dict(),
|
|
@@ -1320,6 +1347,7 @@ class TableVersion:
|
|
|
1320
1347
|
tbl_id=str(self.id),
|
|
1321
1348
|
name=self.name,
|
|
1322
1349
|
user=self.user,
|
|
1350
|
+
is_replica=self.is_replica,
|
|
1323
1351
|
current_version=self.version,
|
|
1324
1352
|
current_schema_version=self.schema_version,
|
|
1325
1353
|
next_col_id=self.next_col_id,
|
pixeltable/catalog/view.py
CHANGED
|
@@ -8,7 +8,7 @@ from uuid import UUID
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.metadata.schema as md_schema
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
|
-
from pixeltable import exprs, func
|
|
11
|
+
from pixeltable import catalog, exprs, func
|
|
12
12
|
from pixeltable.env import Env
|
|
13
13
|
from pixeltable.iterators import ComponentIterator
|
|
14
14
|
|
|
@@ -20,7 +20,7 @@ from .table_version_handle import TableVersionHandle
|
|
|
20
20
|
from .table_version_path import TableVersionPath
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
|
-
|
|
23
|
+
from pixeltable.globals import TableDataSource
|
|
24
24
|
|
|
25
25
|
_logger = logging.getLogger('pixeltable')
|
|
26
26
|
|
|
@@ -65,7 +65,7 @@ class View(Table):
|
|
|
65
65
|
base: TableVersionPath,
|
|
66
66
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
67
67
|
additional_columns: dict[str, Any],
|
|
68
|
-
predicate: Optional['
|
|
68
|
+
predicate: Optional['exprs.Expr'],
|
|
69
69
|
is_snapshot: bool,
|
|
70
70
|
num_retained_versions: int,
|
|
71
71
|
comment: str,
|
|
@@ -242,7 +242,7 @@ class View(Table):
|
|
|
242
242
|
# there is not TableVersion to drop
|
|
243
243
|
self._check_is_dropped()
|
|
244
244
|
self.is_dropped = True
|
|
245
|
-
|
|
245
|
+
catalog.Catalog.get().delete_tbl_md(self._id)
|
|
246
246
|
else:
|
|
247
247
|
super()._drop()
|
|
248
248
|
|
|
@@ -252,11 +252,6 @@ class View(Table):
|
|
|
252
252
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
253
253
|
return md
|
|
254
254
|
|
|
255
|
-
if TYPE_CHECKING:
|
|
256
|
-
import datasets # type: ignore[import-untyped]
|
|
257
|
-
|
|
258
|
-
from pixeltable.globals import RowData, TableDataSource
|
|
259
|
-
|
|
260
255
|
def insert(
|
|
261
256
|
self,
|
|
262
257
|
source: Optional[TableDataSource] = None,
|
|
@@ -272,3 +267,34 @@ class View(Table):
|
|
|
272
267
|
|
|
273
268
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
274
269
|
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def _base_table(self) -> Optional['Table']:
|
|
273
|
+
# if this is a pure snapshot, our tbl_version_path only reflects the base (there is no TableVersion instance
|
|
274
|
+
# for the snapshot itself)
|
|
275
|
+
base_id = self._tbl_version.id if self._snapshot_only else self._tbl_version_path.base.tbl_version.id
|
|
276
|
+
return catalog.Catalog.get().get_table_by_id(base_id)
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def _effective_base_versions(self) -> list[Optional[int]]:
|
|
280
|
+
effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
|
|
281
|
+
if self._snapshot_only:
|
|
282
|
+
return effective_versions
|
|
283
|
+
else:
|
|
284
|
+
return effective_versions[1:]
|
|
285
|
+
|
|
286
|
+
def _table_descriptor(self) -> str:
|
|
287
|
+
display_name = 'Snapshot' if self._snapshot_only else 'View'
|
|
288
|
+
result = [f'{display_name} {self._path()!r}']
|
|
289
|
+
bases_descrs: list[str] = []
|
|
290
|
+
for base, effective_version in zip(self._base_tables, self._effective_base_versions):
|
|
291
|
+
if effective_version is None:
|
|
292
|
+
bases_descrs.append(f'{base._path()!r}')
|
|
293
|
+
else:
|
|
294
|
+
base_descr = f'{base._path()}:{effective_version}'
|
|
295
|
+
bases_descrs.append(f'{base_descr!r}')
|
|
296
|
+
result.append(f' (of {", ".join(bases_descrs)})')
|
|
297
|
+
|
|
298
|
+
if self._tbl_version.get().predicate is not None:
|
|
299
|
+
result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
|
|
300
|
+
return ''.join(result)
|
pixeltable/dataframe.py
CHANGED
|
@@ -513,9 +513,9 @@ class DataFrame:
|
|
|
513
513
|
(select list, where clause, ...) vertically.
|
|
514
514
|
"""
|
|
515
515
|
if getattr(builtins, '__IPYTHON__', False):
|
|
516
|
-
from IPython.display import display
|
|
516
|
+
from IPython.display import Markdown, display
|
|
517
517
|
|
|
518
|
-
display(self._repr_html_())
|
|
518
|
+
display(Markdown(self._repr_html_()))
|
|
519
519
|
else:
|
|
520
520
|
print(repr(self))
|
|
521
521
|
|
pixeltable/exceptions.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
from types import TracebackType
|
|
3
2
|
from typing import TYPE_CHECKING, Any
|
|
4
3
|
|
|
@@ -10,7 +9,6 @@ class Error(Exception):
|
|
|
10
9
|
pass
|
|
11
10
|
|
|
12
11
|
|
|
13
|
-
@dataclass
|
|
14
12
|
class ExprEvalError(Exception):
|
|
15
13
|
expr: 'exprs.Expr'
|
|
16
14
|
expr_msg: str
|
|
@@ -19,6 +17,26 @@ class ExprEvalError(Exception):
|
|
|
19
17
|
input_vals: list[Any]
|
|
20
18
|
row_num: int
|
|
21
19
|
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
expr: 'exprs.Expr',
|
|
23
|
+
expr_msg: str,
|
|
24
|
+
exc: Exception,
|
|
25
|
+
exc_tb: TracebackType,
|
|
26
|
+
input_vals: list[Any],
|
|
27
|
+
row_num: int,
|
|
28
|
+
) -> None:
|
|
29
|
+
exct = type(exc)
|
|
30
|
+
super().__init__(
|
|
31
|
+
f'Expression evaluation failed with an error of type `{exct.__module__}.{exct.__qualname__}`:\n{expr}'
|
|
32
|
+
)
|
|
33
|
+
self.expr = expr
|
|
34
|
+
self.expr_msg = expr_msg
|
|
35
|
+
self.exc = exc
|
|
36
|
+
self.exc_tb = exc_tb
|
|
37
|
+
self.input_vals = input_vals
|
|
38
|
+
self.row_num = row_num
|
|
39
|
+
|
|
22
40
|
|
|
23
41
|
class PixeltableWarning(Warning):
|
|
24
42
|
pass
|