pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +25 -15
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +123 -103
- pixeltable/catalog/table_version.py +292 -143
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +68 -27
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +39 -23
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +18 -17
- pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
- pixeltable/exec/expr_eval/globals.py +33 -11
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +170 -42
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +31 -16
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +21 -15
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +214 -109
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +61 -28
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +3 -2
- pixeltable/io/label_studio.py +80 -71
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +10 -13
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +9 -2
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +130 -85
- pixeltable/utils/arrow.py +1 -7
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +44 -0
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +13 -8
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.1.dist-info/RECORD +0 -160
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -5,7 +5,8 @@ import builtins
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
|
|
8
|
+
|
|
9
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
9
10
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Sequence, Union, overload
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
|
|
@@ -25,8 +26,15 @@ from ..exprs import ColumnRef
|
|
|
25
26
|
from ..utils.description_helper import DescriptionHelper
|
|
26
27
|
from ..utils.filecache import FileCache
|
|
27
28
|
from .column import Column
|
|
28
|
-
from .globals import (
|
|
29
|
-
|
|
29
|
+
from .globals import (
|
|
30
|
+
_ROWID_COLUMN_NAME,
|
|
31
|
+
IfExistsParam,
|
|
32
|
+
IfNotExistsParam,
|
|
33
|
+
MediaValidation,
|
|
34
|
+
UpdateStatus,
|
|
35
|
+
is_system_column_name,
|
|
36
|
+
is_valid_identifier,
|
|
37
|
+
)
|
|
30
38
|
from .schema_object import SchemaObject
|
|
31
39
|
from .table_version import TableVersion
|
|
32
40
|
from .table_version_path import TableVersionPath
|
|
@@ -38,11 +46,13 @@ if TYPE_CHECKING:
|
|
|
38
46
|
|
|
39
47
|
_logger = logging.getLogger('pixeltable')
|
|
40
48
|
|
|
49
|
+
|
|
41
50
|
class Table(SchemaObject):
|
|
42
51
|
"""
|
|
43
52
|
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
44
53
|
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
45
54
|
"""
|
|
55
|
+
|
|
46
56
|
# Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
47
57
|
# FileCache.emit_eviction_warnings() at the end of the operation.
|
|
48
58
|
|
|
@@ -60,11 +70,14 @@ class Table(SchemaObject):
|
|
|
60
70
|
self._check_is_dropped()
|
|
61
71
|
super()._move(new_name, new_dir_id)
|
|
62
72
|
with env.Env.get().engine.begin() as conn:
|
|
63
|
-
stmt = sql.text(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
73
|
+
stmt = sql.text(
|
|
74
|
+
(
|
|
75
|
+
f'UPDATE {schema.Table.__table__} '
|
|
76
|
+
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
77
|
+
f" {schema.Table.md.name}['name'] = :new_name "
|
|
78
|
+
f'WHERE {schema.Table.id.name} = :id'
|
|
79
|
+
)
|
|
80
|
+
)
|
|
68
81
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
69
82
|
|
|
70
83
|
def get_metadata(self) -> dict[str, Any]:
|
|
@@ -155,14 +168,14 @@ class Table(SchemaObject):
|
|
|
155
168
|
return dependents
|
|
156
169
|
|
|
157
170
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
158
|
-
"""Return a DataFrame for this table.
|
|
159
|
-
"""
|
|
171
|
+
"""Return a DataFrame for this table."""
|
|
160
172
|
# local import: avoid circular imports
|
|
161
173
|
from pixeltable.plan import FromClause
|
|
174
|
+
|
|
162
175
|
return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
|
|
163
176
|
|
|
164
177
|
def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
|
|
165
|
-
"""
|
|
178
|
+
"""Select columns or expressions from this table.
|
|
166
179
|
|
|
167
180
|
See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
|
|
168
181
|
"""
|
|
@@ -176,8 +189,11 @@ class Table(SchemaObject):
|
|
|
176
189
|
return self._df().where(pred)
|
|
177
190
|
|
|
178
191
|
def join(
|
|
179
|
-
|
|
180
|
-
|
|
192
|
+
self,
|
|
193
|
+
other: 'Table',
|
|
194
|
+
*,
|
|
195
|
+
on: Optional['exprs.Expr'] = None,
|
|
196
|
+
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
|
|
181
197
|
) -> 'pxt.DataFrame':
|
|
182
198
|
"""Join this table with another table."""
|
|
183
199
|
return self._df().join(other, on=on, how=how)
|
|
@@ -203,22 +219,15 @@ class Table(SchemaObject):
|
|
|
203
219
|
"""Return rows from this table."""
|
|
204
220
|
return self._df().collect()
|
|
205
221
|
|
|
206
|
-
def show(
|
|
207
|
-
|
|
208
|
-
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
209
|
-
"""Return rows from this table.
|
|
210
|
-
"""
|
|
222
|
+
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
223
|
+
"""Return rows from this table."""
|
|
211
224
|
return self._df().show(*args, **kwargs)
|
|
212
225
|
|
|
213
|
-
def head(
|
|
214
|
-
self, *args, **kwargs
|
|
215
|
-
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
226
|
+
def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
216
227
|
"""Return the first n rows inserted into this table."""
|
|
217
228
|
return self._df().head(*args, **kwargs)
|
|
218
229
|
|
|
219
|
-
def tail(
|
|
220
|
-
self, *args, **kwargs
|
|
221
|
-
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
230
|
+
def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
222
231
|
"""Return the last n rows inserted into this table."""
|
|
223
232
|
return self._df().tail(*args, **kwargs)
|
|
224
233
|
|
|
@@ -228,7 +237,7 @@ class Table(SchemaObject):
|
|
|
228
237
|
|
|
229
238
|
@property
|
|
230
239
|
def columns(self) -> list[str]:
|
|
231
|
-
"""Return the names of the columns in this table.
|
|
240
|
+
"""Return the names of the columns in this table."""
|
|
232
241
|
cols = self._tbl_version_path.columns()
|
|
233
242
|
return [c.name for c in cols]
|
|
234
243
|
|
|
@@ -309,7 +318,7 @@ class Table(SchemaObject):
|
|
|
309
318
|
{
|
|
310
319
|
'Column Name': col.name,
|
|
311
320
|
'Type': col.col_type._to_str(as_schema=True),
|
|
312
|
-
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
|
|
321
|
+
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else '',
|
|
313
322
|
}
|
|
314
323
|
for col in self.__tbl_version_path.columns()
|
|
315
324
|
if columns is None or col.name in columns
|
|
@@ -346,10 +355,7 @@ class Table(SchemaObject):
|
|
|
346
355
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
347
356
|
pd_rows = []
|
|
348
357
|
for name, store in self._tbl_version.external_stores.items():
|
|
349
|
-
row = {
|
|
350
|
-
'External Store': name,
|
|
351
|
-
'Type': type(store).__name__,
|
|
352
|
-
}
|
|
358
|
+
row = {'External Store': name, 'Type': type(store).__name__}
|
|
353
359
|
pd_rows.append(row)
|
|
354
360
|
return pd.DataFrame(pd_rows)
|
|
355
361
|
|
|
@@ -360,6 +366,7 @@ class Table(SchemaObject):
|
|
|
360
366
|
self._check_is_dropped()
|
|
361
367
|
if getattr(builtins, '__IPYTHON__', False):
|
|
362
368
|
from IPython.display import display
|
|
369
|
+
|
|
363
370
|
display(self._repr_html_())
|
|
364
371
|
else:
|
|
365
372
|
print(repr(self))
|
|
@@ -378,15 +385,15 @@ class Table(SchemaObject):
|
|
|
378
385
|
|
|
379
386
|
# TODO Factor this out into a separate module.
|
|
380
387
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
381
|
-
def to_pytorch_dataset(self, image_format
|
|
388
|
+
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
382
389
|
"""Return a PyTorch Dataset for this table.
|
|
383
|
-
|
|
390
|
+
See DataFrame.to_pytorch_dataset()
|
|
384
391
|
"""
|
|
385
392
|
return self._df().to_pytorch_dataset(image_format=image_format)
|
|
386
393
|
|
|
387
394
|
def to_coco_dataset(self) -> Path:
|
|
388
395
|
"""Return the path to a COCO json file for this table.
|
|
389
|
-
|
|
396
|
+
See DataFrame.to_coco_dataset()
|
|
390
397
|
"""
|
|
391
398
|
return self._df().to_coco_dataset()
|
|
392
399
|
|
|
@@ -399,10 +406,11 @@ class Table(SchemaObject):
|
|
|
399
406
|
return any(
|
|
400
407
|
col in store.get_local_columns()
|
|
401
408
|
for view in [self] + self._get_views(recursive=True)
|
|
402
|
-
for store in view._tbl_version.external_stores.values()
|
|
409
|
+
for store in view._tbl_version.external_stores.values()
|
|
410
|
+
)
|
|
403
411
|
|
|
404
412
|
def _ignore_or_drop_existing_columns(self, new_col_names: list[str], if_exists: IfExistsParam) -> list[str]:
|
|
405
|
-
"""
|
|
413
|
+
"""Check and handle existing columns in the new column specification based on the if_exists parameter.
|
|
406
414
|
|
|
407
415
|
If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
|
|
408
416
|
"""
|
|
@@ -420,9 +428,7 @@ class Table(SchemaObject):
|
|
|
420
428
|
# for views, it is possible that the existing column
|
|
421
429
|
# is a base table column; in that case, we should not
|
|
422
430
|
# drop/replace that column. Continue to raise error.
|
|
423
|
-
raise excs.Error(
|
|
424
|
-
f'Column {new_col_name!r} is a base table column. Cannot replace it.'
|
|
425
|
-
)
|
|
431
|
+
raise excs.Error(f'Column {new_col_name!r} is a base table column. Cannot replace it.')
|
|
426
432
|
col = self._tbl_version.cols_by_name[new_col_name]
|
|
427
433
|
# cannot drop a column with dependents; so reject
|
|
428
434
|
# replace directive if column has dependents.
|
|
@@ -437,7 +443,7 @@ class Table(SchemaObject):
|
|
|
437
443
|
def add_columns(
|
|
438
444
|
self,
|
|
439
445
|
schema: dict[str, Union[ts.ColumnType, builtins.type, _GenericAlias]],
|
|
440
|
-
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
446
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
441
447
|
) -> UpdateStatus:
|
|
442
448
|
"""
|
|
443
449
|
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
|
|
@@ -482,7 +488,9 @@ class Table(SchemaObject):
|
|
|
482
488
|
for col_name, spec in schema.items()
|
|
483
489
|
}
|
|
484
490
|
# handle existing columns based on if_exists parameter
|
|
485
|
-
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
491
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
492
|
+
list(col_schema.keys()), IfExistsParam.validated(if_exists, 'if_exists')
|
|
493
|
+
)
|
|
486
494
|
# if all columns to be added already exist and user asked to ignore
|
|
487
495
|
# existing columns, there's nothing to do.
|
|
488
496
|
for cname in cols_to_ignore:
|
|
@@ -501,7 +509,7 @@ class Table(SchemaObject):
|
|
|
501
509
|
self,
|
|
502
510
|
*,
|
|
503
511
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
504
|
-
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr]
|
|
512
|
+
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr],
|
|
505
513
|
) -> UpdateStatus:
|
|
506
514
|
"""
|
|
507
515
|
Adds an ordinary (non-computed) column to the table.
|
|
@@ -528,7 +536,7 @@ class Table(SchemaObject):
|
|
|
528
536
|
|
|
529
537
|
Alternatively, this can also be expressed as:
|
|
530
538
|
|
|
531
|
-
>>> tbl
|
|
539
|
+
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
532
540
|
"""
|
|
533
541
|
self._check_is_dropped()
|
|
534
542
|
# verify kwargs
|
|
@@ -547,7 +555,6 @@ class Table(SchemaObject):
|
|
|
547
555
|
)
|
|
548
556
|
return self.add_columns(kwargs, if_exists=if_exists)
|
|
549
557
|
|
|
550
|
-
|
|
551
558
|
def add_computed_column(
|
|
552
559
|
self,
|
|
553
560
|
*,
|
|
@@ -555,7 +562,7 @@ class Table(SchemaObject):
|
|
|
555
562
|
print_stats: bool = False,
|
|
556
563
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
557
564
|
if_exists: Literal['error', 'ignore', 'replace'] = 'error',
|
|
558
|
-
**kwargs: exprs.Expr
|
|
565
|
+
**kwargs: exprs.Expr,
|
|
559
566
|
) -> UpdateStatus:
|
|
560
567
|
"""
|
|
561
568
|
Adds a computed column to the table.
|
|
@@ -611,7 +618,9 @@ class Table(SchemaObject):
|
|
|
611
618
|
col_schema['stored'] = stored
|
|
612
619
|
|
|
613
620
|
# handle existing columns based on if_exists parameter
|
|
614
|
-
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
621
|
+
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
622
|
+
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
623
|
+
)
|
|
615
624
|
# if the column to add already exists and user asked to ignore
|
|
616
625
|
# exiting column, there's nothing to do.
|
|
617
626
|
if len(cols_to_ignore) != 0:
|
|
@@ -677,7 +686,8 @@ class Table(SchemaObject):
|
|
|
677
686
|
cls._validate_column_spec(name, spec)
|
|
678
687
|
if 'type' in spec:
|
|
679
688
|
col_type = ts.ColumnType.normalize_type(
|
|
680
|
-
spec['type'], nullable_default=True, allow_builtin_types=False
|
|
689
|
+
spec['type'], nullable_default=True, allow_builtin_types=False
|
|
690
|
+
)
|
|
681
691
|
value_expr = spec.get('value')
|
|
682
692
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
683
693
|
# create copy so we can modify it
|
|
@@ -686,15 +696,19 @@ class Table(SchemaObject):
|
|
|
686
696
|
primary_key = spec.get('primary_key')
|
|
687
697
|
media_validation_str = spec.get('media_validation')
|
|
688
698
|
media_validation = (
|
|
689
|
-
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
|
|
690
|
-
else None
|
|
699
|
+
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
|
|
691
700
|
)
|
|
692
701
|
else:
|
|
693
702
|
raise excs.Error(f'Invalid value for column {name!r}')
|
|
694
703
|
|
|
695
704
|
column = Column(
|
|
696
|
-
name,
|
|
697
|
-
|
|
705
|
+
name,
|
|
706
|
+
col_type=col_type,
|
|
707
|
+
computed_with=value_expr,
|
|
708
|
+
stored=stored,
|
|
709
|
+
is_pk=primary_key,
|
|
710
|
+
media_validation=media_validation,
|
|
711
|
+
)
|
|
698
712
|
columns.append(column)
|
|
699
713
|
return columns
|
|
700
714
|
|
|
@@ -704,13 +718,16 @@ class Table(SchemaObject):
|
|
|
704
718
|
if is_system_column_name(col.name):
|
|
705
719
|
raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
706
720
|
if not is_valid_identifier(col.name):
|
|
707
|
-
raise excs.Error(f
|
|
708
|
-
if col.stored is False and not
|
|
709
|
-
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed
|
|
721
|
+
raise excs.Error(f'Invalid column name: {col.name!r}')
|
|
722
|
+
if col.stored is False and not col.is_computed:
|
|
723
|
+
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
|
|
710
724
|
if col.stored is False and col.has_window_fn_call():
|
|
711
|
-
raise excs.Error(
|
|
712
|
-
|
|
713
|
-
|
|
725
|
+
raise excs.Error(
|
|
726
|
+
(
|
|
727
|
+
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
|
|
728
|
+
f'function'
|
|
729
|
+
)
|
|
730
|
+
)
|
|
714
731
|
|
|
715
732
|
@classmethod
|
|
716
733
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
@@ -838,19 +855,19 @@ class Table(SchemaObject):
|
|
|
838
855
|
assert not self._is_dropped
|
|
839
856
|
index_info = []
|
|
840
857
|
for idx_name, idx in self._tbl_version.idxs_by_name.items():
|
|
841
|
-
index_info.append({
|
|
842
|
-
'_id': idx.id,
|
|
843
|
-
'_name': idx_name,
|
|
844
|
-
'_column': idx.col.name
|
|
845
|
-
})
|
|
858
|
+
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
846
859
|
return index_info
|
|
847
860
|
|
|
848
861
|
def add_embedding_index(
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
862
|
+
self,
|
|
863
|
+
column: Union[str, ColumnRef],
|
|
864
|
+
*,
|
|
865
|
+
idx_name: Optional[str] = None,
|
|
866
|
+
embedding: Optional[pxt.Function] = None,
|
|
867
|
+
string_embed: Optional[pxt.Function] = None,
|
|
868
|
+
image_embed: Optional[pxt.Function] = None,
|
|
869
|
+
metric: str = 'cosine',
|
|
870
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
854
871
|
) -> None:
|
|
855
872
|
"""
|
|
856
873
|
Add an embedding index to the table. Once the index is created, it will be automatically kept up-to-date as new
|
|
@@ -963,10 +980,11 @@ class Table(SchemaObject):
|
|
|
963
980
|
FileCache.get().emit_eviction_warnings()
|
|
964
981
|
|
|
965
982
|
def drop_embedding_index(
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
983
|
+
self,
|
|
984
|
+
*,
|
|
985
|
+
column: Union[str, ColumnRef, None] = None,
|
|
986
|
+
idx_name: Optional[str] = None,
|
|
987
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
970
988
|
) -> None:
|
|
971
989
|
"""
|
|
972
990
|
Drop an embedding index from the table. Either a column name or an index name (but not both) must be
|
|
@@ -1026,10 +1044,11 @@ class Table(SchemaObject):
|
|
|
1026
1044
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1027
1045
|
|
|
1028
1046
|
def drop_index(
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1047
|
+
self,
|
|
1048
|
+
*,
|
|
1049
|
+
column: Union[str, ColumnRef, None] = None,
|
|
1050
|
+
idx_name: Optional[str] = None,
|
|
1051
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1033
1052
|
) -> None:
|
|
1034
1053
|
"""
|
|
1035
1054
|
Drop an index from the table. Either a column name or an index name (but not both) must be
|
|
@@ -1089,10 +1108,12 @@ class Table(SchemaObject):
|
|
|
1089
1108
|
self._drop_index(col=col, idx_name=idx_name, if_not_exists=if_not_exists)
|
|
1090
1109
|
|
|
1091
1110
|
def _drop_index(
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1111
|
+
self,
|
|
1112
|
+
*,
|
|
1113
|
+
col: Optional[Column] = None,
|
|
1114
|
+
idx_name: Optional[str] = None,
|
|
1115
|
+
_idx_class: Optional[type[index.IndexBase]] = None,
|
|
1116
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1096
1117
|
) -> None:
|
|
1097
1118
|
if self._tbl_version_path.is_snapshot():
|
|
1098
1119
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
@@ -1109,7 +1130,8 @@ class Table(SchemaObject):
|
|
|
1109
1130
|
else:
|
|
1110
1131
|
if col.tbl.id != self._tbl_version.id:
|
|
1111
1132
|
raise excs.Error(
|
|
1112
|
-
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)'
|
|
1133
|
+
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)'
|
|
1134
|
+
)
|
|
1113
1135
|
idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
1114
1136
|
if _idx_class is not None:
|
|
1115
1137
|
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
@@ -1131,16 +1153,12 @@ class Table(SchemaObject):
|
|
|
1131
1153
|
/,
|
|
1132
1154
|
*,
|
|
1133
1155
|
print_stats: bool = False,
|
|
1134
|
-
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
1156
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1135
1157
|
) -> UpdateStatus: ...
|
|
1136
1158
|
|
|
1137
1159
|
@overload
|
|
1138
1160
|
def insert(
|
|
1139
|
-
self,
|
|
1140
|
-
*,
|
|
1141
|
-
print_stats: bool = False,
|
|
1142
|
-
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1143
|
-
**kwargs: Any
|
|
1161
|
+
self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
|
|
1144
1162
|
) -> UpdateStatus: ...
|
|
1145
1163
|
|
|
1146
1164
|
@abc.abstractmethod # type: ignore[misc]
|
|
@@ -1151,7 +1169,7 @@ class Table(SchemaObject):
|
|
|
1151
1169
|
*,
|
|
1152
1170
|
print_stats: bool = False,
|
|
1153
1171
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1154
|
-
**kwargs: Any
|
|
1172
|
+
**kwargs: Any,
|
|
1155
1173
|
) -> UpdateStatus:
|
|
1156
1174
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
1157
1175
|
|
|
@@ -1214,7 +1232,7 @@ class Table(SchemaObject):
|
|
|
1214
1232
|
raise NotImplementedError
|
|
1215
1233
|
|
|
1216
1234
|
def update(
|
|
1217
|
-
|
|
1235
|
+
self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
|
|
1218
1236
|
) -> UpdateStatus:
|
|
1219
1237
|
"""Update rows in this table.
|
|
1220
1238
|
|
|
@@ -1245,8 +1263,10 @@ class Table(SchemaObject):
|
|
|
1245
1263
|
return status
|
|
1246
1264
|
|
|
1247
1265
|
def batch_update(
|
|
1248
|
-
|
|
1249
|
-
|
|
1266
|
+
self,
|
|
1267
|
+
rows: Iterable[dict[str, Any]],
|
|
1268
|
+
cascade: bool = True,
|
|
1269
|
+
if_not_exists: Literal['error', 'ignore', 'insert'] = 'error',
|
|
1250
1270
|
) -> UpdateStatus:
|
|
1251
1271
|
"""Update rows in this table.
|
|
1252
1272
|
|
|
@@ -1299,8 +1319,12 @@ class Table(SchemaObject):
|
|
|
1299
1319
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1300
1320
|
row_updates.append(col_vals)
|
|
1301
1321
|
status = self._tbl_version.batch_update(
|
|
1302
|
-
row_updates,
|
|
1303
|
-
|
|
1322
|
+
row_updates,
|
|
1323
|
+
rowids,
|
|
1324
|
+
error_if_not_exists=if_not_exists == 'error',
|
|
1325
|
+
insert_if_not_exists=if_not_exists == 'insert',
|
|
1326
|
+
cascade=cascade,
|
|
1327
|
+
)
|
|
1304
1328
|
FileCache.get().emit_eviction_warnings()
|
|
1305
1329
|
return status
|
|
1306
1330
|
|
|
@@ -1345,14 +1369,14 @@ class Table(SchemaObject):
|
|
|
1345
1369
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1346
1370
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1347
1371
|
self._tbl_version.link_external_store(store)
|
|
1348
|
-
|
|
1372
|
+
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1349
1373
|
|
|
1350
1374
|
def unlink_external_stores(
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1375
|
+
self,
|
|
1376
|
+
stores: Optional[str | list[str]] = None,
|
|
1377
|
+
*,
|
|
1378
|
+
delete_external_data: bool = False,
|
|
1379
|
+
ignore_errors: bool = False,
|
|
1356
1380
|
) -> None:
|
|
1357
1381
|
"""
|
|
1358
1382
|
Unlinks this table's external stores.
|
|
@@ -1381,14 +1405,10 @@ class Table(SchemaObject):
|
|
|
1381
1405
|
|
|
1382
1406
|
for store in stores:
|
|
1383
1407
|
self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
|
|
1384
|
-
|
|
1408
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store}')
|
|
1385
1409
|
|
|
1386
1410
|
def sync(
|
|
1387
|
-
|
|
1388
|
-
stores: Optional[str | list[str]] = None,
|
|
1389
|
-
*,
|
|
1390
|
-
export_data: bool = True,
|
|
1391
|
-
import_data: bool = True
|
|
1411
|
+
self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
|
|
1392
1412
|
) -> 'pxt.io.SyncStatus':
|
|
1393
1413
|
"""
|
|
1394
1414
|
Synchronizes this table with its linked external stores.
|