pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +11 -2
- pixeltable/catalog/catalog.py +407 -119
- pixeltable/catalog/column.py +38 -26
- pixeltable/catalog/globals.py +130 -15
- pixeltable/catalog/insertable_table.py +10 -9
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +245 -119
- pixeltable/catalog/table_version.py +142 -116
- pixeltable/catalog/table_version_handle.py +30 -2
- pixeltable/catalog/table_version_path.py +28 -4
- pixeltable/catalog/view.py +14 -20
- pixeltable/config.py +4 -0
- pixeltable/dataframe.py +10 -9
- pixeltable/env.py +5 -11
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/sql_node.py +47 -30
- pixeltable/exprs/column_property_ref.py +2 -10
- pixeltable/exprs/column_ref.py +24 -21
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/expr.py +4 -4
- pixeltable/exprs/row_builder.py +44 -13
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +4 -2
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +8 -6
- pixeltable/functions/mistralai.py +2 -13
- pixeltable/functions/openai.py +1 -6
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/util.py +6 -1
- pixeltable/globals.py +0 -2
- pixeltable/io/external_store.py +81 -54
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +49 -45
- pixeltable/io/table_data_conduit.py +1 -1
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -0
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +59 -139
- pixeltable/share/packager.py +2 -2
- pixeltable/store.py +114 -103
- pixeltable/type_system.py +30 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
- pixeltable/utils/sample.py +0 -25
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -9,6 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
10
10
|
|
|
11
11
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
|
+
import datetime
|
|
12
13
|
from uuid import UUID
|
|
13
14
|
|
|
14
15
|
import pandas as pd
|
|
@@ -17,6 +18,7 @@ import sqlalchemy as sql
|
|
|
17
18
|
import pixeltable as pxt
|
|
18
19
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
19
20
|
from pixeltable.metadata import schema
|
|
21
|
+
from pixeltable.metadata.utils import MetadataUtils
|
|
20
22
|
|
|
21
23
|
from ..exprs import ColumnRef
|
|
22
24
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -48,21 +50,23 @@ class Table(SchemaObject):
|
|
|
48
50
|
"""
|
|
49
51
|
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
50
52
|
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
53
|
+
|
|
54
|
+
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
55
|
+
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
51
56
|
"""
|
|
52
57
|
|
|
53
|
-
#
|
|
54
|
-
|
|
58
|
+
# the chain of TableVersions needed to run queries and supply metadata (eg, schema)
|
|
59
|
+
_tbl_version_path: TableVersionPath
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
|
|
61
|
+
# the physical TableVersion backing this Table; None for pure snapshots
|
|
62
|
+
_tbl_version: Optional[TableVersionHandle]
|
|
58
63
|
|
|
59
64
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
60
65
|
super().__init__(id, name, dir_id)
|
|
61
|
-
self.
|
|
62
|
-
self.
|
|
66
|
+
self._tbl_version_path = tbl_version_path
|
|
67
|
+
self._tbl_version = None
|
|
63
68
|
|
|
64
69
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
65
|
-
self._check_is_dropped()
|
|
66
70
|
super()._move(new_name, new_dir_id)
|
|
67
71
|
conn = env.Env.get().conn
|
|
68
72
|
stmt = sql.text(
|
|
@@ -75,6 +79,7 @@ class Table(SchemaObject):
|
|
|
75
79
|
)
|
|
76
80
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
77
81
|
|
|
82
|
+
# this is duplicated from SchemaObject so that our API docs show the docstring for Table
|
|
78
83
|
def get_metadata(self) -> dict[str, Any]:
|
|
79
84
|
"""
|
|
80
85
|
Retrieves metadata associated with this table.
|
|
@@ -100,42 +105,27 @@ class Table(SchemaObject):
|
|
|
100
105
|
}
|
|
101
106
|
```
|
|
102
107
|
"""
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def _version(self) -> int:
|
|
108
|
+
return super().get_metadata()
|
|
109
|
+
|
|
110
|
+
def _get_metadata(self) -> dict[str, Any]:
|
|
111
|
+
md = super()._get_metadata()
|
|
112
|
+
base = self._get_base_table()
|
|
113
|
+
md['base'] = base._path() if base is not None else None
|
|
114
|
+
md['schema'] = self._get_schema()
|
|
115
|
+
md['is_replica'] = self._tbl_version_path.is_replica()
|
|
116
|
+
md['version'] = self._get_version()
|
|
117
|
+
md['schema_version'] = self._tbl_version_path.schema_version()
|
|
118
|
+
md['comment'] = self._get_comment()
|
|
119
|
+
md['num_retained_versions'] = self._get_num_retained_versions()
|
|
120
|
+
md['media_validation'] = self._get_media_validation().name.lower()
|
|
121
|
+
return md
|
|
122
|
+
|
|
123
|
+
def _get_version(self) -> int:
|
|
120
124
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
121
|
-
return self.
|
|
122
|
-
|
|
123
|
-
@property
|
|
124
|
-
def _tbl_version(self) -> TableVersionHandle:
|
|
125
|
-
"""Return TableVersion for just this table."""
|
|
126
|
-
return self._tbl_version_path.tbl_version
|
|
127
|
-
|
|
128
|
-
@property
|
|
129
|
-
def _tbl_version_path(self) -> TableVersionPath:
|
|
130
|
-
self._check_is_dropped()
|
|
131
|
-
return self.__tbl_version_path
|
|
125
|
+
return self._tbl_version_path.version()
|
|
132
126
|
|
|
133
127
|
def __hash__(self) -> int:
|
|
134
|
-
return hash(self.
|
|
135
|
-
|
|
136
|
-
def _check_is_dropped(self) -> None:
|
|
137
|
-
if self._is_dropped:
|
|
138
|
-
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
128
|
+
return hash(self._tbl_version_path.tbl_id)
|
|
139
129
|
|
|
140
130
|
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
141
131
|
"""Return a ColumnRef for the given name."""
|
|
@@ -162,15 +152,18 @@ class Table(SchemaObject):
|
|
|
162
152
|
from pixeltable.catalog import Catalog
|
|
163
153
|
|
|
164
154
|
with Catalog.get().begin_xact(for_write=False):
|
|
165
|
-
self._check_is_dropped()
|
|
166
155
|
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
167
156
|
|
|
168
|
-
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
157
|
+
def _get_views(self, *, recursive: bool = True, include_snapshots: bool = True) -> list['Table']:
|
|
169
158
|
cat = catalog.Catalog.get()
|
|
170
159
|
view_ids = cat.get_view_ids(self._id)
|
|
171
160
|
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
161
|
+
if not include_snapshots:
|
|
162
|
+
views = [t for t in views if not t._tbl_version_path.is_snapshot()]
|
|
172
163
|
if recursive:
|
|
173
|
-
views.extend(
|
|
164
|
+
views.extend(
|
|
165
|
+
t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
|
|
166
|
+
)
|
|
174
167
|
return views
|
|
175
168
|
|
|
176
169
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
@@ -276,35 +269,32 @@ class Table(SchemaObject):
|
|
|
276
269
|
"""Return the number of rows in this table."""
|
|
277
270
|
return self._df().count()
|
|
278
271
|
|
|
279
|
-
@property
|
|
280
272
|
def columns(self) -> list[str]:
|
|
281
273
|
"""Return the names of the columns in this table."""
|
|
282
274
|
cols = self._tbl_version_path.columns()
|
|
283
275
|
return [c.name for c in cols]
|
|
284
276
|
|
|
285
|
-
|
|
286
|
-
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
277
|
+
def _get_schema(self) -> dict[str, ts.ColumnType]:
|
|
287
278
|
"""Return the schema (column names and column types) of this table."""
|
|
288
279
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
289
280
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
281
|
+
def get_base_table(self) -> Optional['Table']:
|
|
282
|
+
from pixeltable.catalog import Catalog
|
|
283
|
+
|
|
284
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
285
|
+
return self._get_base_table()
|
|
294
286
|
|
|
295
|
-
@property
|
|
296
287
|
@abc.abstractmethod
|
|
297
|
-
def
|
|
298
|
-
"""The base's Table instance"""
|
|
288
|
+
def _get_base_table(self) -> Optional['Table']:
|
|
289
|
+
"""The base's Table instance. Requires a transaction context"""
|
|
299
290
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
base = self._base_table
|
|
291
|
+
def _get_base_tables(self) -> list['Table']:
|
|
292
|
+
"""The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
|
|
293
|
+
bases: list[Table] = []
|
|
294
|
+
base = self._get_base_table()
|
|
305
295
|
while base is not None:
|
|
306
296
|
bases.append(base)
|
|
307
|
-
base = base.
|
|
297
|
+
base = base._get_base_table()
|
|
308
298
|
return bases
|
|
309
299
|
|
|
310
300
|
@property
|
|
@@ -312,17 +302,14 @@ class Table(SchemaObject):
|
|
|
312
302
|
def _effective_base_versions(self) -> list[Optional[int]]:
|
|
313
303
|
"""The effective versions of the ancestor bases, starting with its immediate base."""
|
|
314
304
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
return self._tbl_version.get().comment
|
|
305
|
+
def _get_comment(self) -> str:
|
|
306
|
+
return self._tbl_version_path.comment()
|
|
318
307
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
return self._tbl_version.get().num_retained_versions
|
|
308
|
+
def _get_num_retained_versions(self) -> int:
|
|
309
|
+
return self._tbl_version_path.num_retained_versions()
|
|
322
310
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
return self._tbl_version.get().media_validation
|
|
311
|
+
def _get_media_validation(self) -> MediaValidation:
|
|
312
|
+
return self._tbl_version_path.media_validation()
|
|
326
313
|
|
|
327
314
|
def __repr__(self) -> str:
|
|
328
315
|
return self._descriptors().to_string()
|
|
@@ -346,8 +333,8 @@ class Table(SchemaObject):
|
|
|
346
333
|
stores = self._external_store_descriptor()
|
|
347
334
|
if not stores.empty:
|
|
348
335
|
helper.append(stores)
|
|
349
|
-
if self.
|
|
350
|
-
helper.append(f'COMMENT: {self.
|
|
336
|
+
if self._get_comment():
|
|
337
|
+
helper.append(f'COMMENT: {self._get_comment()}')
|
|
351
338
|
return helper
|
|
352
339
|
|
|
353
340
|
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
@@ -364,6 +351,8 @@ class Table(SchemaObject):
|
|
|
364
351
|
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
365
352
|
from pixeltable import index
|
|
366
353
|
|
|
354
|
+
if self._tbl_version is None:
|
|
355
|
+
return pd.DataFrame([])
|
|
367
356
|
pd_rows = []
|
|
368
357
|
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
369
358
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
@@ -383,7 +372,7 @@ class Table(SchemaObject):
|
|
|
383
372
|
|
|
384
373
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
385
374
|
pd_rows = []
|
|
386
|
-
for name, store in self.
|
|
375
|
+
for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
|
|
387
376
|
row = {'External Store': name, 'Type': type(store).__name__}
|
|
388
377
|
pd_rows.append(row)
|
|
389
378
|
return pd.DataFrame(pd_rows)
|
|
@@ -392,7 +381,6 @@ class Table(SchemaObject):
|
|
|
392
381
|
"""
|
|
393
382
|
Print the table schema.
|
|
394
383
|
"""
|
|
395
|
-
self._check_is_dropped()
|
|
396
384
|
if getattr(builtins, '__IPYTHON__', False):
|
|
397
385
|
from IPython.display import Markdown, display
|
|
398
386
|
|
|
@@ -400,11 +388,6 @@ class Table(SchemaObject):
|
|
|
400
388
|
else:
|
|
401
389
|
print(repr(self))
|
|
402
390
|
|
|
403
|
-
def _drop(self) -> None:
|
|
404
|
-
self._check_is_dropped()
|
|
405
|
-
self._tbl_version.get().drop()
|
|
406
|
-
self._is_dropped = True
|
|
407
|
-
|
|
408
391
|
# TODO Factor this out into a separate module.
|
|
409
392
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
410
393
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
@@ -422,9 +405,11 @@ class Table(SchemaObject):
|
|
|
422
405
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
423
406
|
"""Returns True if the column has dependents, False otherwise."""
|
|
424
407
|
assert col is not None
|
|
425
|
-
assert col.name in self.
|
|
426
|
-
|
|
408
|
+
assert col.name in self._get_schema()
|
|
409
|
+
cat = catalog.Catalog.get()
|
|
410
|
+
if any(c.name is not None for c in cat.get_column_dependents(col.tbl.id, col.id)):
|
|
427
411
|
return True
|
|
412
|
+
assert self._tbl_version is not None
|
|
428
413
|
return any(
|
|
429
414
|
col in store.get_local_columns()
|
|
430
415
|
for view in (self, *self._get_views(recursive=True))
|
|
@@ -436,8 +421,8 @@ class Table(SchemaObject):
|
|
|
436
421
|
|
|
437
422
|
If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
|
|
438
423
|
"""
|
|
439
|
-
assert not
|
|
440
|
-
existing_col_names = set(self.
|
|
424
|
+
assert self._tbl_version is not None
|
|
425
|
+
existing_col_names = set(self._get_schema().keys())
|
|
441
426
|
cols_to_ignore = []
|
|
442
427
|
for new_col_name in new_col_names:
|
|
443
428
|
if new_col_name in existing_col_names:
|
|
@@ -507,9 +492,9 @@ class Table(SchemaObject):
|
|
|
507
492
|
"""
|
|
508
493
|
from pixeltable.catalog import Catalog
|
|
509
494
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
if self.
|
|
495
|
+
# lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
|
|
496
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
497
|
+
if self._tbl_version_path.is_snapshot():
|
|
513
498
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
514
499
|
col_schema = {
|
|
515
500
|
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
@@ -530,6 +515,7 @@ class Table(SchemaObject):
|
|
|
530
515
|
new_cols = self._create_columns(col_schema)
|
|
531
516
|
for new_col in new_cols:
|
|
532
517
|
self._verify_column(new_col)
|
|
518
|
+
assert self._tbl_version is not None
|
|
533
519
|
status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
534
520
|
FileCache.get().emit_eviction_warnings()
|
|
535
521
|
return status
|
|
@@ -570,10 +556,9 @@ class Table(SchemaObject):
|
|
|
570
556
|
"""
|
|
571
557
|
from pixeltable.catalog import Catalog
|
|
572
558
|
|
|
573
|
-
with Catalog.get().begin_xact(
|
|
574
|
-
self._check_is_dropped()
|
|
559
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
575
560
|
# verify kwargs
|
|
576
|
-
if self.
|
|
561
|
+
if self._tbl_version_path.is_snapshot():
|
|
577
562
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
578
563
|
# verify kwargs and construct column schema dict
|
|
579
564
|
if len(kwargs) != 1:
|
|
@@ -637,9 +622,8 @@ class Table(SchemaObject):
|
|
|
637
622
|
"""
|
|
638
623
|
from pixeltable.catalog import Catalog
|
|
639
624
|
|
|
640
|
-
with Catalog.get().begin_xact(
|
|
641
|
-
self.
|
|
642
|
-
if self.get_metadata()['is_snapshot']:
|
|
625
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
626
|
+
if self._tbl_version_path.is_snapshot():
|
|
643
627
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
644
628
|
if len(kwargs) != 1:
|
|
645
629
|
raise excs.Error(
|
|
@@ -676,6 +660,7 @@ class Table(SchemaObject):
|
|
|
676
660
|
|
|
677
661
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
678
662
|
self._verify_column(new_col)
|
|
663
|
+
assert self._tbl_version is not None
|
|
679
664
|
status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
680
665
|
FileCache.get().emit_eviction_warnings()
|
|
681
666
|
return status
|
|
@@ -822,8 +807,9 @@ class Table(SchemaObject):
|
|
|
822
807
|
"""
|
|
823
808
|
from pixeltable.catalog import Catalog
|
|
824
809
|
|
|
825
|
-
|
|
826
|
-
|
|
810
|
+
cat = Catalog.get()
|
|
811
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
812
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
827
813
|
if self._tbl_version_path.is_snapshot():
|
|
828
814
|
raise excs.Error('Cannot drop column from a snapshot.')
|
|
829
815
|
col: Column = None
|
|
@@ -846,20 +832,22 @@ class Table(SchemaObject):
|
|
|
846
832
|
return
|
|
847
833
|
col = column.col
|
|
848
834
|
|
|
849
|
-
dependent_user_cols = [c for c in col.
|
|
835
|
+
dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
|
|
850
836
|
if len(dependent_user_cols) > 0:
|
|
851
837
|
raise excs.Error(
|
|
852
838
|
f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
|
|
853
839
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
854
840
|
)
|
|
855
841
|
|
|
842
|
+
_ = self._get_views(recursive=True, include_snapshots=False)
|
|
856
843
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
857
844
|
# (transitive) views of this table.
|
|
845
|
+
col_handle = col.handle
|
|
858
846
|
dependent_stores = [
|
|
859
847
|
(view, store)
|
|
860
|
-
for view in (self, *self._get_views(recursive=True))
|
|
848
|
+
for view in (self, *self._get_views(recursive=True, include_snapshots=False))
|
|
861
849
|
for store in view._tbl_version.get().external_stores.values()
|
|
862
|
-
if
|
|
850
|
+
if col_handle in store.get_local_columns()
|
|
863
851
|
]
|
|
864
852
|
if len(dependent_stores) > 0:
|
|
865
853
|
dependent_store_names = [
|
|
@@ -891,7 +879,7 @@ class Table(SchemaObject):
|
|
|
891
879
|
"""
|
|
892
880
|
from pixeltable.catalog import Catalog
|
|
893
881
|
|
|
894
|
-
with Catalog.get().begin_xact(
|
|
882
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
895
883
|
self._tbl_version.get().rename_column(old_name, new_name)
|
|
896
884
|
|
|
897
885
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
@@ -902,7 +890,6 @@ class Table(SchemaObject):
|
|
|
902
890
|
A list of index information, each containing the index's
|
|
903
891
|
id, name, and the name of the column it indexes.
|
|
904
892
|
"""
|
|
905
|
-
assert not self._is_dropped
|
|
906
893
|
index_info = []
|
|
907
894
|
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
908
895
|
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
@@ -1001,7 +988,7 @@ class Table(SchemaObject):
|
|
|
1001
988
|
"""
|
|
1002
989
|
from pixeltable.catalog import Catalog
|
|
1003
990
|
|
|
1004
|
-
with Catalog.get().begin_xact(
|
|
991
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1005
992
|
if self._tbl_version_path.is_snapshot():
|
|
1006
993
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
1007
994
|
col = self._resolve_column_parameter(column)
|
|
@@ -1090,7 +1077,7 @@ class Table(SchemaObject):
|
|
|
1090
1077
|
if (column is None) == (idx_name is None):
|
|
1091
1078
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1092
1079
|
|
|
1093
|
-
with Catalog.get().begin_xact(
|
|
1080
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1094
1081
|
col: Column = None
|
|
1095
1082
|
if idx_name is None:
|
|
1096
1083
|
col = self._resolve_column_parameter(column)
|
|
@@ -1169,7 +1156,7 @@ class Table(SchemaObject):
|
|
|
1169
1156
|
if (column is None) == (idx_name is None):
|
|
1170
1157
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1171
1158
|
|
|
1172
|
-
with Catalog.get().begin_xact(
|
|
1159
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1173
1160
|
col: Column = None
|
|
1174
1161
|
if idx_name is None:
|
|
1175
1162
|
col = self._resolve_column_parameter(column)
|
|
@@ -1185,6 +1172,8 @@ class Table(SchemaObject):
|
|
|
1185
1172
|
_idx_class: Optional[type[index.IndexBase]] = None,
|
|
1186
1173
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1187
1174
|
) -> None:
|
|
1175
|
+
from pixeltable.catalog import Catalog
|
|
1176
|
+
|
|
1188
1177
|
if self._tbl_version_path.is_snapshot():
|
|
1189
1178
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
1190
1179
|
assert (col is None) != (idx_name is None)
|
|
@@ -1216,7 +1205,10 @@ class Table(SchemaObject):
|
|
|
1216
1205
|
idx_info = idx_info_list[0]
|
|
1217
1206
|
|
|
1218
1207
|
# Find out if anything depends on this index
|
|
1219
|
-
|
|
1208
|
+
val_col = idx_info.val_col
|
|
1209
|
+
dependent_user_cols = [
|
|
1210
|
+
c for c in Catalog.get().get_column_dependents(val_col.tbl.id, val_col.id) if c.name is not None
|
|
1211
|
+
]
|
|
1220
1212
|
if len(dependent_user_cols) > 0:
|
|
1221
1213
|
raise excs.Error(
|
|
1222
1214
|
f'Cannot drop index because the following columns depend on it:\n'
|
|
@@ -1332,6 +1324,9 @@ class Table(SchemaObject):
|
|
|
1332
1324
|
where: a predicate to filter rows to update.
|
|
1333
1325
|
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
1334
1326
|
|
|
1327
|
+
Returns:
|
|
1328
|
+
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
1329
|
+
|
|
1335
1330
|
Examples:
|
|
1336
1331
|
Set column `int_col` to 1 for all rows:
|
|
1337
1332
|
|
|
@@ -1351,7 +1346,9 @@ class Table(SchemaObject):
|
|
|
1351
1346
|
"""
|
|
1352
1347
|
from pixeltable.catalog import Catalog
|
|
1353
1348
|
|
|
1354
|
-
with Catalog.get().begin_xact(
|
|
1349
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1350
|
+
if self._tbl_version_path.is_snapshot():
|
|
1351
|
+
raise excs.Error('Cannot update a snapshot')
|
|
1355
1352
|
status = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1356
1353
|
FileCache.get().emit_eviction_warnings()
|
|
1357
1354
|
return status
|
|
@@ -1389,7 +1386,7 @@ class Table(SchemaObject):
|
|
|
1389
1386
|
"""
|
|
1390
1387
|
from pixeltable.catalog import Catalog
|
|
1391
1388
|
|
|
1392
|
-
with Catalog.get().begin_xact(
|
|
1389
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1393
1390
|
if self._tbl_version_path.is_snapshot():
|
|
1394
1391
|
raise excs.Error('Cannot update a snapshot')
|
|
1395
1392
|
rows = list(rows)
|
|
@@ -1428,6 +1425,69 @@ class Table(SchemaObject):
|
|
|
1428
1425
|
FileCache.get().emit_eviction_warnings()
|
|
1429
1426
|
return status
|
|
1430
1427
|
|
|
1428
|
+
def recompute_columns(
|
|
1429
|
+
self, *columns: Union[str, ColumnRef], errors_only: bool = False, cascade: bool = True
|
|
1430
|
+
) -> UpdateStatus:
|
|
1431
|
+
"""Recompute the values in one or more computed columns of this table.
|
|
1432
|
+
|
|
1433
|
+
Args:
|
|
1434
|
+
columns: The names or references of the computed columns to recompute.
|
|
1435
|
+
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1436
|
+
`errortype` property is non-None). Only allowed for recomputing a single column.
|
|
1437
|
+
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
1438
|
+
|
|
1439
|
+
Examples:
|
|
1440
|
+
Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
|
|
1441
|
+
depends on them:
|
|
1442
|
+
|
|
1443
|
+
>>> tbl.recompute_columns('c1', 'c2')
|
|
1444
|
+
|
|
1445
|
+
Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
|
|
1446
|
+
it:
|
|
1447
|
+
|
|
1448
|
+
>>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
|
|
1449
|
+
|
|
1450
|
+
Recompute column `c1` and its dependents, but only for rows that have errors in it:
|
|
1451
|
+
|
|
1452
|
+
>>> tbl.recompute_columns('c1', errors_only=True)
|
|
1453
|
+
"""
|
|
1454
|
+
from pixeltable.catalog import Catalog
|
|
1455
|
+
|
|
1456
|
+
cat = Catalog.get()
|
|
1457
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
1458
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1459
|
+
if self._tbl_version_path.is_snapshot():
|
|
1460
|
+
raise excs.Error('Cannot recompute columns of a snapshot.')
|
|
1461
|
+
if len(columns) == 0:
|
|
1462
|
+
raise excs.Error('At least one column must be specified to recompute')
|
|
1463
|
+
if errors_only and len(columns) > 1:
|
|
1464
|
+
raise excs.Error('Cannot use errors_only=True with multiple columns')
|
|
1465
|
+
|
|
1466
|
+
col_names: list[str] = []
|
|
1467
|
+
for column in columns:
|
|
1468
|
+
col_name: str
|
|
1469
|
+
col: Column
|
|
1470
|
+
if isinstance(column, str):
|
|
1471
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1472
|
+
if col is None:
|
|
1473
|
+
raise excs.Error(f'Unknown column: {column!r}')
|
|
1474
|
+
col_name = column
|
|
1475
|
+
else:
|
|
1476
|
+
assert isinstance(column, ColumnRef)
|
|
1477
|
+
col = column.col
|
|
1478
|
+
if not self._tbl_version_path.has_column(col, include_bases=True):
|
|
1479
|
+
raise excs.Error(f'Unknown column: {col.name!r}')
|
|
1480
|
+
col_name = col.name
|
|
1481
|
+
if not col.is_computed:
|
|
1482
|
+
raise excs.Error(f'Column {col_name!r} is not a computed column')
|
|
1483
|
+
if col.tbl.id != self._tbl_version_path.tbl_id:
|
|
1484
|
+
raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
|
|
1485
|
+
col_names.append(col_name)
|
|
1486
|
+
|
|
1487
|
+
status = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
|
|
1488
|
+
FileCache.get().emit_eviction_warnings()
|
|
1489
|
+
return status
|
|
1490
|
+
|
|
1431
1491
|
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
1432
1492
|
"""Delete rows in this table.
|
|
1433
1493
|
|
|
@@ -1453,14 +1513,13 @@ class Table(SchemaObject):
|
|
|
1453
1513
|
"""
|
|
1454
1514
|
from pixeltable.catalog import Catalog
|
|
1455
1515
|
|
|
1456
|
-
with Catalog.get().begin_xact(
|
|
1516
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1457
1517
|
if self._tbl_version_path.is_snapshot():
|
|
1458
1518
|
raise excs.Error('Cannot revert a snapshot')
|
|
1459
1519
|
self._tbl_version.get().revert()
|
|
1460
1520
|
# remove cached md in order to force a reload on the next operation
|
|
1461
|
-
self.
|
|
1521
|
+
self._tbl_version_path.clear_cached_md()
|
|
1462
1522
|
|
|
1463
|
-
@property
|
|
1464
1523
|
def external_stores(self) -> list[str]:
|
|
1465
1524
|
return list(self._tbl_version.get().external_stores.keys())
|
|
1466
1525
|
|
|
@@ -1470,10 +1529,10 @@ class Table(SchemaObject):
|
|
|
1470
1529
|
"""
|
|
1471
1530
|
from pixeltable.catalog import Catalog
|
|
1472
1531
|
|
|
1473
|
-
with Catalog.get().begin_xact(
|
|
1474
|
-
if self.
|
|
1532
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1533
|
+
if self._tbl_version_path.is_snapshot():
|
|
1475
1534
|
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1476
|
-
if store.name in self.external_stores:
|
|
1535
|
+
if store.name in self.external_stores():
|
|
1477
1536
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1478
1537
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1479
1538
|
|
|
@@ -1501,9 +1560,10 @@ class Table(SchemaObject):
|
|
|
1501
1560
|
"""
|
|
1502
1561
|
from pixeltable.catalog import Catalog
|
|
1503
1562
|
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1563
|
+
if self._tbl_version_path.is_snapshot():
|
|
1564
|
+
return
|
|
1565
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1566
|
+
all_stores = self.external_stores()
|
|
1507
1567
|
|
|
1508
1568
|
if stores is None:
|
|
1509
1569
|
stores = all_stores
|
|
@@ -1540,9 +1600,13 @@ class Table(SchemaObject):
|
|
|
1540
1600
|
"""
|
|
1541
1601
|
from pixeltable.catalog import Catalog
|
|
1542
1602
|
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1603
|
+
if self._tbl_version_path.is_snapshot():
|
|
1604
|
+
return pxt.io.SyncStatus()
|
|
1605
|
+
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1606
|
+
# have their updates propagated down the tree
|
|
1607
|
+
base_tv = self._tbl_version_path.get_tbl_versions()[-1]
|
|
1608
|
+
with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
|
|
1609
|
+
all_stores = self.external_stores()
|
|
1546
1610
|
|
|
1547
1611
|
if stores is None:
|
|
1548
1612
|
stores = all_stores
|
|
@@ -1553,16 +1617,78 @@ class Table(SchemaObject):
|
|
|
1553
1617
|
if store not in all_stores:
|
|
1554
1618
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1555
1619
|
|
|
1556
|
-
sync_status = pxt.io.SyncStatus
|
|
1620
|
+
sync_status = pxt.io.SyncStatus()
|
|
1557
1621
|
for store in stores:
|
|
1558
1622
|
store_obj = self._tbl_version.get().external_stores[store]
|
|
1559
1623
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
1560
|
-
sync_status
|
|
1624
|
+
sync_status += store_sync_status
|
|
1561
1625
|
|
|
1562
1626
|
return sync_status
|
|
1563
1627
|
|
|
1564
1628
|
def __dir__(self) -> list[str]:
|
|
1565
|
-
return list(super().__dir__()) + list(self.
|
|
1629
|
+
return list(super().__dir__()) + list(self._get_schema().keys())
|
|
1566
1630
|
|
|
1567
1631
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1568
|
-
return list(self.
|
|
1632
|
+
return list(self._get_schema().keys())
|
|
1633
|
+
|
|
1634
|
+
def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
|
|
1635
|
+
"""Returns rows of information about the versions of this table, most recent first.
|
|
1636
|
+
|
|
1637
|
+
Args:
|
|
1638
|
+
n: a limit to the number of versions listed
|
|
1639
|
+
|
|
1640
|
+
Examples:
|
|
1641
|
+
Report history:
|
|
1642
|
+
|
|
1643
|
+
>>> tbl.history()
|
|
1644
|
+
|
|
1645
|
+
Report only the most recent 5 changes to the table:
|
|
1646
|
+
|
|
1647
|
+
>>> tbl.history(n=5)
|
|
1648
|
+
|
|
1649
|
+
Returns:
|
|
1650
|
+
A list of information about each version, ordered from most recent to oldest version.
|
|
1651
|
+
"""
|
|
1652
|
+
from pixeltable.catalog import Catalog
|
|
1653
|
+
|
|
1654
|
+
if n is None:
|
|
1655
|
+
n = 1000_000_000
|
|
1656
|
+
if not isinstance(n, int) or n < 1:
|
|
1657
|
+
raise excs.Error(f'Invalid value for n: {n}')
|
|
1658
|
+
|
|
1659
|
+
# Retrieve the table history components from the catalog
|
|
1660
|
+
tbl_id = self._id
|
|
1661
|
+
# Collect an extra version, if available, to allow for computation of the first version's schema change
|
|
1662
|
+
vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
|
|
1663
|
+
|
|
1664
|
+
# Construct the metadata change description dictionary
|
|
1665
|
+
md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
|
|
1666
|
+
md_dict = MetadataUtils._create_md_change_dict(md_list)
|
|
1667
|
+
|
|
1668
|
+
# Construct report lines
|
|
1669
|
+
if len(vers_list) > n:
|
|
1670
|
+
assert len(vers_list) == n + 1
|
|
1671
|
+
over_count = 1
|
|
1672
|
+
else:
|
|
1673
|
+
over_count = 0
|
|
1674
|
+
|
|
1675
|
+
report_lines: list[list[Any]] = []
|
|
1676
|
+
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1677
|
+
version = vers_md.version_md.version
|
|
1678
|
+
schema_change = md_dict.get(version, '')
|
|
1679
|
+
change_type = 'schema' if schema_change != '' else 'data'
|
|
1680
|
+
report_line = [
|
|
1681
|
+
version,
|
|
1682
|
+
datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
|
|
1683
|
+
change_type,
|
|
1684
|
+
schema_change,
|
|
1685
|
+
]
|
|
1686
|
+
report_lines.append(report_line)
|
|
1687
|
+
|
|
1688
|
+
report_schema = {
|
|
1689
|
+
'version': ts.IntType(),
|
|
1690
|
+
'created_at': ts.TimestampType(),
|
|
1691
|
+
'change': ts.StringType(),
|
|
1692
|
+
'schema_change': ts.StringType(),
|
|
1693
|
+
}
|
|
1694
|
+
return pxt.dataframe.DataFrameResultSet(report_lines, report_schema)
|