pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +296 -105
- pixeltable/catalog/column.py +10 -8
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +25 -20
- pixeltable/catalog/schema_object.py +3 -6
- pixeltable/catalog/table.py +261 -189
- pixeltable/catalog/table_version.py +333 -202
- pixeltable/catalog/table_version_handle.py +15 -2
- pixeltable/catalog/table_version_path.py +60 -14
- pixeltable/catalog/view.py +38 -6
- pixeltable/dataframe.py +196 -18
- pixeltable/env.py +4 -4
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +171 -22
- pixeltable/exprs/column_property_ref.py +15 -6
- pixeltable/exprs/column_ref.py +32 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +7 -0
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/query_template_function.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/functions/gemini.py +0 -1
- pixeltable/functions/string.py +212 -58
- pixeltable/globals.py +12 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +8 -29
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +0 -31
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +8 -1
- pixeltable/plan.py +221 -14
- pixeltable/share/packager.py +137 -13
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- pixeltable/utils/sample.py +25 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/RECORD +58 -55
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -4,11 +4,11 @@ import abc
|
|
|
4
4
|
import builtins
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
|
+
from keyword import iskeyword as is_python_keyword
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
9
10
|
|
|
10
11
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
11
|
-
from keyword import iskeyword as is_python_keyword
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
|
|
14
14
|
import pandas as pd
|
|
@@ -16,7 +16,6 @@ import sqlalchemy as sql
|
|
|
16
16
|
|
|
17
17
|
import pixeltable as pxt
|
|
18
18
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
19
|
-
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.metadata import schema
|
|
21
20
|
|
|
22
21
|
from ..exprs import ColumnRef
|
|
@@ -62,11 +61,6 @@ class Table(SchemaObject):
|
|
|
62
61
|
self._is_dropped = False
|
|
63
62
|
self.__tbl_version_path = tbl_version_path
|
|
64
63
|
|
|
65
|
-
# @property
|
|
66
|
-
# def _has_dependents(self) -> bool:
|
|
67
|
-
# """Returns True if this table has any dependent views, or snapshots."""
|
|
68
|
-
# return len(self._get_views(recursive=False)) > 0
|
|
69
|
-
|
|
70
64
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
71
65
|
self._check_is_dropped()
|
|
72
66
|
super()._move(new_name, new_dir_id)
|
|
@@ -106,10 +100,12 @@ class Table(SchemaObject):
|
|
|
106
100
|
}
|
|
107
101
|
```
|
|
108
102
|
"""
|
|
109
|
-
|
|
110
|
-
|
|
103
|
+
from pixeltable.catalog import Catalog
|
|
104
|
+
|
|
105
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
106
|
+
self._check_is_dropped()
|
|
111
107
|
md = super().get_metadata()
|
|
112
|
-
md['base'] = self._base_table._path if self._base_table is not None else None
|
|
108
|
+
md['base'] = self._base_table._path() if self._base_table is not None else None
|
|
113
109
|
md['schema'] = self._schema
|
|
114
110
|
md['is_replica'] = self._tbl_version.get().is_replica
|
|
115
111
|
md['version'] = self._version
|
|
@@ -163,9 +159,11 @@ class Table(SchemaObject):
|
|
|
163
159
|
Returns:
|
|
164
160
|
A list of view paths.
|
|
165
161
|
"""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
162
|
+
from pixeltable.catalog import Catalog
|
|
163
|
+
|
|
164
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
165
|
+
self._check_is_dropped()
|
|
166
|
+
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
169
167
|
|
|
170
168
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
171
169
|
cat = catalog.Catalog.get()
|
|
@@ -187,14 +185,20 @@ class Table(SchemaObject):
|
|
|
187
185
|
|
|
188
186
|
See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
|
|
189
187
|
"""
|
|
190
|
-
|
|
188
|
+
from pixeltable.catalog import Catalog
|
|
189
|
+
|
|
190
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
191
|
+
return self._df().select(*items, **named_items)
|
|
191
192
|
|
|
192
193
|
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
193
194
|
"""Filter rows from this table based on the expression.
|
|
194
195
|
|
|
195
196
|
See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
|
|
196
197
|
"""
|
|
197
|
-
|
|
198
|
+
from pixeltable.catalog import Catalog
|
|
199
|
+
|
|
200
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
201
|
+
return self._df().where(pred)
|
|
198
202
|
|
|
199
203
|
def join(
|
|
200
204
|
self,
|
|
@@ -204,21 +208,30 @@ class Table(SchemaObject):
|
|
|
204
208
|
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
|
|
205
209
|
) -> 'pxt.DataFrame':
|
|
206
210
|
"""Join this table with another table."""
|
|
207
|
-
|
|
211
|
+
from pixeltable.catalog import Catalog
|
|
212
|
+
|
|
213
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
214
|
+
return self._df().join(other, on=on, how=how)
|
|
208
215
|
|
|
209
216
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
210
217
|
"""Order the rows of this table based on the expression.
|
|
211
218
|
|
|
212
219
|
See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
|
|
213
220
|
"""
|
|
214
|
-
|
|
221
|
+
from pixeltable.catalog import Catalog
|
|
222
|
+
|
|
223
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
224
|
+
return self._df().order_by(*items, asc=asc)
|
|
215
225
|
|
|
216
226
|
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
217
227
|
"""Group the rows of this table based on the expression.
|
|
218
228
|
|
|
219
229
|
See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
|
|
220
230
|
"""
|
|
221
|
-
|
|
231
|
+
from pixeltable.catalog import Catalog
|
|
232
|
+
|
|
233
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
234
|
+
return self._df().group_by(*items)
|
|
222
235
|
|
|
223
236
|
def distinct(self) -> 'pxt.DataFrame':
|
|
224
237
|
"""Remove duplicate rows from table."""
|
|
@@ -227,6 +240,22 @@ class Table(SchemaObject):
|
|
|
227
240
|
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
228
241
|
return self._df().limit(n)
|
|
229
242
|
|
|
243
|
+
def sample(
|
|
244
|
+
self,
|
|
245
|
+
n: Optional[int] = None,
|
|
246
|
+
n_per_stratum: Optional[int] = None,
|
|
247
|
+
fraction: Optional[float] = None,
|
|
248
|
+
seed: Optional[int] = None,
|
|
249
|
+
stratify_by: Any = None,
|
|
250
|
+
) -> pxt.DataFrame:
|
|
251
|
+
"""Choose a shuffled sample of rows
|
|
252
|
+
|
|
253
|
+
See [`DataFrame.sample`][pixeltable.DataFrame.sample] for more details.
|
|
254
|
+
"""
|
|
255
|
+
return self._df().sample(
|
|
256
|
+
n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
|
|
257
|
+
)
|
|
258
|
+
|
|
230
259
|
def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
|
|
231
260
|
"""Return rows from this table."""
|
|
232
261
|
return self._df().collect()
|
|
@@ -305,18 +334,21 @@ class Table(SchemaObject):
|
|
|
305
334
|
"""
|
|
306
335
|
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
307
336
|
"""
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
helper.append(
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
337
|
+
from pixeltable.catalog import Catalog
|
|
338
|
+
|
|
339
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
340
|
+
helper = DescriptionHelper()
|
|
341
|
+
helper.append(self._table_descriptor())
|
|
342
|
+
helper.append(self._col_descriptor())
|
|
343
|
+
idxs = self._index_descriptor()
|
|
344
|
+
if not idxs.empty:
|
|
345
|
+
helper.append(idxs)
|
|
346
|
+
stores = self._external_store_descriptor()
|
|
347
|
+
if not stores.empty:
|
|
348
|
+
helper.append(stores)
|
|
349
|
+
if self._comment:
|
|
350
|
+
helper.append(f'COMMENT: {self._comment}')
|
|
351
|
+
return helper
|
|
320
352
|
|
|
321
353
|
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
322
354
|
return pd.DataFrame(
|
|
@@ -473,15 +505,17 @@ class Table(SchemaObject):
|
|
|
473
505
|
... }
|
|
474
506
|
... tbl.add_columns(schema)
|
|
475
507
|
"""
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
508
|
+
from pixeltable.catalog import Catalog
|
|
509
|
+
|
|
510
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
511
|
+
self._check_is_dropped()
|
|
512
|
+
if self.get_metadata()['is_snapshot']:
|
|
513
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
514
|
+
col_schema = {
|
|
515
|
+
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
516
|
+
for col_name, spec in schema.items()
|
|
517
|
+
}
|
|
518
|
+
|
|
485
519
|
# handle existing columns based on if_exists parameter
|
|
486
520
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
487
521
|
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
@@ -534,22 +568,25 @@ class Table(SchemaObject):
|
|
|
534
568
|
|
|
535
569
|
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
536
570
|
"""
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
)
|
|
552
|
-
|
|
571
|
+
from pixeltable.catalog import Catalog
|
|
572
|
+
|
|
573
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
574
|
+
self._check_is_dropped()
|
|
575
|
+
# verify kwargs
|
|
576
|
+
if self._tbl_version.get().is_snapshot:
|
|
577
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
578
|
+
# verify kwargs and construct column schema dict
|
|
579
|
+
if len(kwargs) != 1:
|
|
580
|
+
raise excs.Error(
|
|
581
|
+
f'add_column() requires exactly one keyword argument of the form "col_name=col_type"; '
|
|
582
|
+
f'got {len(kwargs)} instead ({", ".join(kwargs.keys())})'
|
|
583
|
+
)
|
|
584
|
+
col_type = next(iter(kwargs.values()))
|
|
585
|
+
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
586
|
+
raise excs.Error(
|
|
587
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
588
|
+
)
|
|
589
|
+
return self.add_columns(kwargs, if_exists=if_exists)
|
|
553
590
|
|
|
554
591
|
def add_computed_column(
|
|
555
592
|
self,
|
|
@@ -598,33 +635,35 @@ class Table(SchemaObject):
|
|
|
598
635
|
|
|
599
636
|
>>> tbl.add_computed_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
600
637
|
"""
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
638
|
+
from pixeltable.catalog import Catalog
|
|
639
|
+
|
|
640
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
641
|
+
self._check_is_dropped()
|
|
642
|
+
if self.get_metadata()['is_snapshot']:
|
|
643
|
+
raise excs.Error('Cannot add column to a snapshot.')
|
|
644
|
+
if len(kwargs) != 1:
|
|
645
|
+
raise excs.Error(
|
|
646
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
647
|
+
'"column-name=type|value-expression"; '
|
|
648
|
+
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
649
|
+
)
|
|
650
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
651
|
+
if not is_valid_identifier(col_name):
|
|
652
|
+
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
653
|
+
|
|
654
|
+
col_schema: dict[str, Any] = {'value': spec}
|
|
655
|
+
if stored is not None:
|
|
656
|
+
col_schema['stored'] = stored
|
|
657
|
+
|
|
658
|
+
# Raise an error if the column expression refers to a column error property
|
|
659
|
+
if isinstance(spec, exprs.Expr):
|
|
660
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
661
|
+
if e.is_error_prop():
|
|
662
|
+
raise excs.Error(
|
|
663
|
+
'Use of a reference to an error property of another column is not allowed in a computed '
|
|
664
|
+
f'column. The specified computation for this column contains this reference: `{e!r}`'
|
|
665
|
+
)
|
|
626
666
|
|
|
627
|
-
with Env.get().begin_xact():
|
|
628
667
|
# handle existing columns based on if_exists parameter
|
|
629
668
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
630
669
|
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
@@ -781,36 +820,39 @@ class Table(SchemaObject):
|
|
|
781
820
|
>>> tbl = pxt.get_table('my_table')
|
|
782
821
|
... tbl.drop_col(tbl.col, if_not_exists='ignore')
|
|
783
822
|
"""
|
|
784
|
-
|
|
785
|
-
if self._tbl_version_path.is_snapshot():
|
|
786
|
-
raise excs.Error('Cannot drop column from a snapshot.')
|
|
787
|
-
col: Column = None
|
|
788
|
-
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
789
|
-
if isinstance(column, str):
|
|
790
|
-
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
791
|
-
if col is None:
|
|
792
|
-
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
793
|
-
raise excs.Error(f'Column {column!r} unknown')
|
|
794
|
-
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
795
|
-
return
|
|
796
|
-
col = self._tbl_version.get().cols_by_name[column]
|
|
797
|
-
else:
|
|
798
|
-
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
799
|
-
if not exists:
|
|
800
|
-
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
801
|
-
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
802
|
-
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
803
|
-
return
|
|
804
|
-
col = column.col
|
|
823
|
+
from pixeltable.catalog import Catalog
|
|
805
824
|
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
)
|
|
825
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
826
|
+
self._check_is_dropped()
|
|
827
|
+
if self._tbl_version_path.is_snapshot():
|
|
828
|
+
raise excs.Error('Cannot drop column from a snapshot.')
|
|
829
|
+
col: Column = None
|
|
830
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
831
|
+
|
|
832
|
+
if isinstance(column, str):
|
|
833
|
+
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
834
|
+
if col is None:
|
|
835
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
836
|
+
raise excs.Error(f'Column {column!r} unknown')
|
|
837
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
838
|
+
return
|
|
839
|
+
col = self._tbl_version.get().cols_by_name[column]
|
|
840
|
+
else:
|
|
841
|
+
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
842
|
+
if not exists:
|
|
843
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
844
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
845
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
846
|
+
return
|
|
847
|
+
col = column.col
|
|
848
|
+
|
|
849
|
+
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
850
|
+
if len(dependent_user_cols) > 0:
|
|
851
|
+
raise excs.Error(
|
|
852
|
+
f'Cannot drop column `{col.name}` because the following columns depend on it:\n'
|
|
853
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
854
|
+
)
|
|
812
855
|
|
|
813
|
-
with Env.get().begin_xact():
|
|
814
856
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
815
857
|
# (transitive) views of this table.
|
|
816
858
|
dependent_stores = [
|
|
@@ -847,7 +889,9 @@ class Table(SchemaObject):
|
|
|
847
889
|
>>> tbl = pxt.get_table('my_table')
|
|
848
890
|
... tbl.rename_column('col1', 'col2')
|
|
849
891
|
"""
|
|
850
|
-
|
|
892
|
+
from pixeltable.catalog import Catalog
|
|
893
|
+
|
|
894
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
851
895
|
self._tbl_version.get().rename_column(old_name, new_name)
|
|
852
896
|
|
|
853
897
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
@@ -955,11 +999,13 @@ class Table(SchemaObject):
|
|
|
955
999
|
... image_embed=image_embedding_fn
|
|
956
1000
|
... )
|
|
957
1001
|
"""
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
1002
|
+
from pixeltable.catalog import Catalog
|
|
1003
|
+
|
|
1004
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1005
|
+
if self._tbl_version_path.is_snapshot():
|
|
1006
|
+
raise excs.Error('Cannot add an index to a snapshot')
|
|
1007
|
+
col = self._resolve_column_parameter(column)
|
|
961
1008
|
|
|
962
|
-
with Env.get().begin_xact():
|
|
963
1009
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
964
1010
|
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
965
1011
|
# An index with the same name already exists.
|
|
@@ -1039,15 +1085,17 @@ class Table(SchemaObject):
|
|
|
1039
1085
|
>>> tbl = pxt.get_table('my_table')
|
|
1040
1086
|
... tbl.drop_embedding_index(idx_name='idx1', if_not_exists='ignore')
|
|
1041
1087
|
"""
|
|
1088
|
+
from pixeltable.catalog import Catalog
|
|
1089
|
+
|
|
1042
1090
|
if (column is None) == (idx_name is None):
|
|
1043
1091
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1044
1092
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1093
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1094
|
+
col: Column = None
|
|
1095
|
+
if idx_name is None:
|
|
1096
|
+
col = self._resolve_column_parameter(column)
|
|
1097
|
+
assert col is not None
|
|
1049
1098
|
|
|
1050
|
-
with Env.get().begin_xact():
|
|
1051
1099
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1052
1100
|
|
|
1053
1101
|
def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
|
|
@@ -1116,15 +1164,17 @@ class Table(SchemaObject):
|
|
|
1116
1164
|
... tbl.drop_index(idx_name='idx1', if_not_exists='ignore')
|
|
1117
1165
|
|
|
1118
1166
|
"""
|
|
1167
|
+
from pixeltable.catalog import Catalog
|
|
1168
|
+
|
|
1119
1169
|
if (column is None) == (idx_name is None):
|
|
1120
1170
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1121
1171
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1172
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1173
|
+
col: Column = None
|
|
1174
|
+
if idx_name is None:
|
|
1175
|
+
col = self._resolve_column_parameter(column)
|
|
1176
|
+
assert col is not None
|
|
1126
1177
|
|
|
1127
|
-
with Env.get().begin_xact():
|
|
1128
1178
|
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
1129
1179
|
|
|
1130
1180
|
def _drop_index(
|
|
@@ -1150,7 +1200,7 @@ class Table(SchemaObject):
|
|
|
1150
1200
|
else:
|
|
1151
1201
|
if col.tbl.id != self._tbl_version.id:
|
|
1152
1202
|
raise excs.Error(
|
|
1153
|
-
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.
|
|
1203
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name!r})'
|
|
1154
1204
|
)
|
|
1155
1205
|
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1156
1206
|
if _idx_class is not None:
|
|
@@ -1299,7 +1349,9 @@ class Table(SchemaObject):
|
|
|
1299
1349
|
|
|
1300
1350
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1301
1351
|
"""
|
|
1302
|
-
|
|
1352
|
+
from pixeltable.catalog import Catalog
|
|
1353
|
+
|
|
1354
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1303
1355
|
status = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1304
1356
|
FileCache.get().emit_eviction_warnings()
|
|
1305
1357
|
return status
|
|
@@ -1335,35 +1387,37 @@ class Table(SchemaObject):
|
|
|
1335
1387
|
[{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
|
|
1336
1388
|
if_not_exists='insert')
|
|
1337
1389
|
"""
|
|
1338
|
-
|
|
1339
|
-
raise excs.Error('Cannot update a snapshot')
|
|
1340
|
-
rows = list(rows)
|
|
1390
|
+
from pixeltable.catalog import Catalog
|
|
1341
1391
|
|
|
1342
|
-
|
|
1343
|
-
|
|
1392
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1393
|
+
if self._tbl_version_path.is_snapshot():
|
|
1394
|
+
raise excs.Error('Cannot update a snapshot')
|
|
1395
|
+
rows = list(rows)
|
|
1344
1396
|
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
rowids: list[tuple[int, ...]] = []
|
|
1348
|
-
if len(pk_col_names) == 0 and not has_rowid:
|
|
1349
|
-
raise excs.Error('Table must have primary key for batch update')
|
|
1397
|
+
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1398
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1350
1399
|
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
)
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
if
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1400
|
+
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1401
|
+
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
1402
|
+
rowids: list[tuple[int, ...]] = []
|
|
1403
|
+
if len(pk_col_names) == 0 and not has_rowid:
|
|
1404
|
+
raise excs.Error('Table must have primary key for batch update')
|
|
1405
|
+
|
|
1406
|
+
for row_spec in rows:
|
|
1407
|
+
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1408
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1409
|
+
)
|
|
1410
|
+
if has_rowid:
|
|
1411
|
+
# we expect the _rowid column to be present for each row
|
|
1412
|
+
assert _ROWID_COLUMN_NAME in row_spec
|
|
1413
|
+
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1414
|
+
else:
|
|
1415
|
+
col_names = {col.name for col in col_vals}
|
|
1416
|
+
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1417
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1418
|
+
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1419
|
+
row_updates.append(col_vals)
|
|
1365
1420
|
|
|
1366
|
-
with Env.get().begin_xact():
|
|
1367
1421
|
status = self._tbl_version.get().batch_update(
|
|
1368
1422
|
row_updates,
|
|
1369
1423
|
rowids,
|
|
@@ -1397,10 +1451,14 @@ class Table(SchemaObject):
|
|
|
1397
1451
|
.. warning::
|
|
1398
1452
|
This operation is irreversible.
|
|
1399
1453
|
"""
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
with
|
|
1454
|
+
from pixeltable.catalog import Catalog
|
|
1455
|
+
|
|
1456
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1457
|
+
if self._tbl_version_path.is_snapshot():
|
|
1458
|
+
raise excs.Error('Cannot revert a snapshot')
|
|
1403
1459
|
self._tbl_version.get().revert()
|
|
1460
|
+
# remove cached md in order to force a reload on the next operation
|
|
1461
|
+
self.__tbl_version_path.clear_cached_md()
|
|
1404
1462
|
|
|
1405
1463
|
@property
|
|
1406
1464
|
def external_stores(self) -> list[str]:
|
|
@@ -1410,12 +1468,16 @@ class Table(SchemaObject):
|
|
|
1410
1468
|
"""
|
|
1411
1469
|
Links the specified `ExternalStore` to this table.
|
|
1412
1470
|
"""
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1471
|
+
from pixeltable.catalog import Catalog
|
|
1472
|
+
|
|
1473
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1474
|
+
if self._tbl_version.get().is_snapshot:
|
|
1475
|
+
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1476
|
+
if store.name in self.external_stores:
|
|
1477
|
+
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1478
|
+
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1479
|
+
|
|
1480
|
+
store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
|
|
1419
1481
|
self._tbl_version.get().link_external_store(store)
|
|
1420
1482
|
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1421
1483
|
|
|
@@ -1437,24 +1499,32 @@ class Table(SchemaObject):
|
|
|
1437
1499
|
delete_external_data (bool): If `True`, then the external data store will also be deleted. WARNING: This
|
|
1438
1500
|
is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
|
|
1439
1501
|
"""
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
stores
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1502
|
+
from pixeltable.catalog import Catalog
|
|
1503
|
+
|
|
1504
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1505
|
+
self._check_is_dropped()
|
|
1506
|
+
all_stores = self.external_stores
|
|
1507
|
+
|
|
1508
|
+
if stores is None:
|
|
1509
|
+
stores = all_stores
|
|
1510
|
+
elif isinstance(stores, str):
|
|
1511
|
+
stores = [stores]
|
|
1512
|
+
|
|
1513
|
+
# Validation
|
|
1514
|
+
if not ignore_errors:
|
|
1515
|
+
for store_name in stores:
|
|
1516
|
+
if store_name not in all_stores:
|
|
1517
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store_name}')
|
|
1518
|
+
|
|
1519
|
+
for store_name in stores:
|
|
1520
|
+
store = self._tbl_version.get().external_stores[store_name]
|
|
1521
|
+
# get hold of the store's debug string before deleting it
|
|
1522
|
+
store_str = str(store)
|
|
1523
|
+
store.unlink(self._tbl_version.get()) # might call tbl_version.drop_columns()
|
|
1524
|
+
self._tbl_version.get().unlink_external_store(store)
|
|
1525
|
+
if delete_external_data and isinstance(store, pxt.io.external_store.Project):
|
|
1526
|
+
store.delete()
|
|
1527
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
|
|
1458
1528
|
|
|
1459
1529
|
def sync(
|
|
1460
1530
|
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
@@ -1468,20 +1538,22 @@ class Table(SchemaObject):
|
|
|
1468
1538
|
export_data: If `True`, data from this table will be exported to the external stores during synchronization.
|
|
1469
1539
|
import_data: If `True`, data from the external stores will be imported to this table during synchronization.
|
|
1470
1540
|
"""
|
|
1471
|
-
|
|
1472
|
-
|
|
1541
|
+
from pixeltable.catalog import Catalog
|
|
1542
|
+
|
|
1543
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
1544
|
+
self._check_is_dropped()
|
|
1545
|
+
all_stores = self.external_stores
|
|
1473
1546
|
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1547
|
+
if stores is None:
|
|
1548
|
+
stores = all_stores
|
|
1549
|
+
elif isinstance(stores, str):
|
|
1550
|
+
stores = [stores]
|
|
1478
1551
|
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1552
|
+
for store in stores:
|
|
1553
|
+
if store not in all_stores:
|
|
1554
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
1482
1555
|
|
|
1483
|
-
|
|
1484
|
-
with Env.get().begin_xact():
|
|
1556
|
+
sync_status = pxt.io.SyncStatus.empty()
|
|
1485
1557
|
for store in stores:
|
|
1486
1558
|
store_obj = self._tbl_version.get().external_stores[store]
|
|
1487
1559
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|