pixeltable 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +11 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +28 -14
- pixeltable/catalog/insertable_table.py +81 -43
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +140 -109
- pixeltable/catalog/table_version.py +60 -43
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +17 -9
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +109 -43
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +2 -3
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +3 -2
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +39 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +51 -21
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -10
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +6 -21
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +22 -65
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +2 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +22 -11
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +85 -33
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +8 -5
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +35 -48
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +1 -1
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.9.dist-info/METADATA +0 -382
- pixeltable-0.3.9.dist-info/RECORD +0 -175
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/path.py
CHANGED
|
@@ -27,7 +27,7 @@ class Path:
|
|
|
27
27
|
|
|
28
28
|
@property
|
|
29
29
|
def is_root(self) -> bool:
|
|
30
|
-
return self.components[0]
|
|
30
|
+
return not self.components[0]
|
|
31
31
|
|
|
32
32
|
@property
|
|
33
33
|
def parent(self) -> Path:
|
|
@@ -43,7 +43,7 @@ class Path:
|
|
|
43
43
|
if self.is_root:
|
|
44
44
|
return Path(name)
|
|
45
45
|
else:
|
|
46
|
-
return Path(f'{
|
|
46
|
+
return Path(f'{self!s}.{name}')
|
|
47
47
|
|
|
48
48
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
49
49
|
"""
|
pixeltable/catalog/table.py
CHANGED
|
@@ -8,20 +8,16 @@ from pathlib import Path
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
9
9
|
|
|
10
10
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
11
|
+
from keyword import iskeyword as is_python_keyword
|
|
11
12
|
from uuid import UUID
|
|
12
13
|
|
|
13
14
|
import pandas as pd
|
|
14
15
|
import sqlalchemy as sql
|
|
15
16
|
|
|
16
17
|
import pixeltable as pxt
|
|
17
|
-
import
|
|
18
|
-
import pixeltable.env as env
|
|
19
|
-
import pixeltable.exceptions as excs
|
|
20
|
-
import pixeltable.exprs as exprs
|
|
21
|
-
import pixeltable.index as index
|
|
22
|
-
import pixeltable.metadata.schema as schema
|
|
23
|
-
import pixeltable.type_system as ts
|
|
18
|
+
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
24
19
|
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.metadata import schema
|
|
25
21
|
|
|
26
22
|
from ..exprs import ColumnRef
|
|
27
23
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -37,7 +33,6 @@ from .globals import (
|
|
|
37
33
|
is_valid_identifier,
|
|
38
34
|
)
|
|
39
35
|
from .schema_object import SchemaObject
|
|
40
|
-
from .table_version import TableVersion
|
|
41
36
|
from .table_version_handle import TableVersionHandle
|
|
42
37
|
from .table_version_path import TableVersionPath
|
|
43
38
|
|
|
@@ -45,6 +40,7 @@ if TYPE_CHECKING:
|
|
|
45
40
|
import torch.utils.data
|
|
46
41
|
|
|
47
42
|
import pixeltable.plan
|
|
43
|
+
from pixeltable.globals import TableDataSource
|
|
48
44
|
|
|
49
45
|
_logger = logging.getLogger('pixeltable')
|
|
50
46
|
|
|
@@ -79,7 +75,7 @@ class Table(SchemaObject):
|
|
|
79
75
|
(
|
|
80
76
|
f'UPDATE {schema.Table.__table__} '
|
|
81
77
|
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
82
|
-
f" {schema.Table.md.name}
|
|
78
|
+
f" {schema.Table.md.name} = jsonb_set({schema.Table.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
83
79
|
f'WHERE {schema.Table.id.name} = :id'
|
|
84
80
|
)
|
|
85
81
|
)
|
|
@@ -229,15 +225,15 @@ class Table(SchemaObject):
|
|
|
229
225
|
"""Return rows from this table."""
|
|
230
226
|
return self._df().collect()
|
|
231
227
|
|
|
232
|
-
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
228
|
+
def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
233
229
|
"""Return rows from this table."""
|
|
234
230
|
return self._df().show(*args, **kwargs)
|
|
235
231
|
|
|
236
|
-
def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
232
|
+
def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
237
233
|
"""Return the first n rows inserted into this table."""
|
|
238
234
|
return self._df().head(*args, **kwargs)
|
|
239
235
|
|
|
240
|
-
def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
236
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
241
237
|
"""Return the last n rows inserted into this table."""
|
|
242
238
|
return self._df().tail(*args, **kwargs)
|
|
243
239
|
|
|
@@ -284,7 +280,7 @@ class Table(SchemaObject):
|
|
|
284
280
|
return self._tbl_version.get().comment
|
|
285
281
|
|
|
286
282
|
@property
|
|
287
|
-
def _num_retained_versions(self):
|
|
283
|
+
def _num_retained_versions(self) -> int:
|
|
288
284
|
return self._tbl_version.get().num_retained_versions
|
|
289
285
|
|
|
290
286
|
@property
|
|
@@ -403,12 +399,12 @@ class Table(SchemaObject):
|
|
|
403
399
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
404
400
|
"""Returns True if the column has dependents, False otherwise."""
|
|
405
401
|
assert col is not None
|
|
406
|
-
assert col.name in self._schema
|
|
402
|
+
assert col.name in self._schema
|
|
407
403
|
if any(c.name is not None for c in col.dependent_cols):
|
|
408
404
|
return True
|
|
409
405
|
return any(
|
|
410
406
|
col in store.get_local_columns()
|
|
411
|
-
for view in
|
|
407
|
+
for view in (self, *self._get_views(recursive=True))
|
|
412
408
|
for store in view._tbl_version.get().external_stores.values()
|
|
413
409
|
)
|
|
414
410
|
|
|
@@ -426,7 +422,7 @@ class Table(SchemaObject):
|
|
|
426
422
|
raise excs.Error(f'Duplicate column name: {new_col_name!r}')
|
|
427
423
|
elif if_exists == IfExistsParam.IGNORE:
|
|
428
424
|
cols_to_ignore.append(new_col_name)
|
|
429
|
-
elif if_exists
|
|
425
|
+
elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
|
|
430
426
|
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
431
427
|
# for views, it is possible that the existing column
|
|
432
428
|
# is a base table column; in that case, we should not
|
|
@@ -437,7 +433,8 @@ class Table(SchemaObject):
|
|
|
437
433
|
# replace directive if column has dependents.
|
|
438
434
|
if self._column_has_dependents(col):
|
|
439
435
|
raise excs.Error(
|
|
440
|
-
f'Column {new_col_name!r} already exists and has dependents.
|
|
436
|
+
f'Column {new_col_name!r} already exists and has dependents. '
|
|
437
|
+
f'Cannot {if_exists.name.lower()} it.'
|
|
441
438
|
)
|
|
442
439
|
self.drop_column(new_col_name)
|
|
443
440
|
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
@@ -449,8 +446,8 @@ class Table(SchemaObject):
|
|
|
449
446
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
450
447
|
) -> UpdateStatus:
|
|
451
448
|
"""
|
|
452
|
-
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
453
|
-
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
449
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
450
|
+
columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
454
451
|
|
|
455
452
|
The format of the `schema` argument is identical to the format of the schema in a call to
|
|
456
453
|
[`create_table()`][pixeltable.globals.create_table].
|
|
@@ -461,10 +458,12 @@ class Table(SchemaObject):
|
|
|
461
458
|
|
|
462
459
|
- `'error'`: an exception will be raised.
|
|
463
460
|
- `'ignore'`: do nothing and return.
|
|
464
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
461
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
462
|
+
dependents.
|
|
465
463
|
|
|
466
464
|
Note that the `if_exists` parameter is applied to all columns in the schema.
|
|
467
|
-
To apply different behaviors to different columns, please use
|
|
465
|
+
To apply different behaviors to different columns, please use
|
|
466
|
+
[`add_column()`][pixeltable.Table.add_column] for each column.
|
|
468
467
|
|
|
469
468
|
Returns:
|
|
470
469
|
Information about the execution status of the operation.
|
|
@@ -525,7 +524,8 @@ class Table(SchemaObject):
|
|
|
525
524
|
|
|
526
525
|
- `'error'`: an exception will be raised.
|
|
527
526
|
- `'ignore'`: do nothing and return.
|
|
528
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
527
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
528
|
+
no dependents.
|
|
529
529
|
|
|
530
530
|
Returns:
|
|
531
531
|
Information about the execution status of the operation.
|
|
@@ -556,7 +556,7 @@ class Table(SchemaObject):
|
|
|
556
556
|
col_type = next(iter(kwargs.values()))
|
|
557
557
|
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
558
558
|
raise excs.Error(
|
|
559
|
-
|
|
559
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
560
560
|
)
|
|
561
561
|
return self.add_columns(kwargs, if_exists=if_exists)
|
|
562
562
|
|
|
@@ -587,7 +587,8 @@ class Table(SchemaObject):
|
|
|
587
587
|
|
|
588
588
|
- `'error'`: an exception will be raised.
|
|
589
589
|
- `'ignore'`: do nothing and return.
|
|
590
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
590
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
591
|
+
no dependents.
|
|
591
592
|
|
|
592
593
|
Returns:
|
|
593
594
|
Information about the execution status of the operation.
|
|
@@ -611,7 +612,8 @@ class Table(SchemaObject):
|
|
|
611
612
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
612
613
|
if len(kwargs) != 1:
|
|
613
614
|
raise excs.Error(
|
|
614
|
-
f'add_computed_column() requires exactly one keyword argument of the form
|
|
615
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
616
|
+
'"column-name=type|value-expression"; '
|
|
615
617
|
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
616
618
|
)
|
|
617
619
|
col_name, spec = next(iter(kwargs.items()))
|
|
@@ -622,6 +624,15 @@ class Table(SchemaObject):
|
|
|
622
624
|
if stored is not None:
|
|
623
625
|
col_schema['stored'] = stored
|
|
624
626
|
|
|
627
|
+
# Raise an error if the column expression refers to a column error property
|
|
628
|
+
if isinstance(spec, exprs.Expr):
|
|
629
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
630
|
+
if e.is_error_prop():
|
|
631
|
+
raise excs.Error(
|
|
632
|
+
'Use of a reference to an error property of another column is not allowed in a computed '
|
|
633
|
+
f'column. The specified computation for this column contains this reference: `{e!r}`'
|
|
634
|
+
)
|
|
635
|
+
|
|
625
636
|
with Env.get().begin_xact():
|
|
626
637
|
# handle existing columns based on if_exists parameter
|
|
627
638
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
@@ -648,16 +659,15 @@ class Table(SchemaObject):
|
|
|
648
659
|
"""
|
|
649
660
|
assert isinstance(spec, dict)
|
|
650
661
|
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
651
|
-
for k in spec
|
|
662
|
+
for k in spec:
|
|
652
663
|
if k not in valid_keys:
|
|
653
664
|
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
654
665
|
|
|
655
666
|
if 'type' not in spec and 'value' not in spec:
|
|
656
667
|
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
657
668
|
|
|
658
|
-
if 'type' in spec:
|
|
659
|
-
|
|
660
|
-
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
669
|
+
if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
670
|
+
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
661
671
|
|
|
662
672
|
if 'value' in spec:
|
|
663
673
|
value_expr = exprs.Expr.from_object(spec['value'])
|
|
@@ -720,20 +730,25 @@ class Table(SchemaObject):
|
|
|
720
730
|
columns.append(column)
|
|
721
731
|
return columns
|
|
722
732
|
|
|
733
|
+
@classmethod
|
|
734
|
+
def validate_column_name(cls, name: str) -> None:
|
|
735
|
+
"""Check that a name is usable as a pixeltalbe column name"""
|
|
736
|
+
if is_system_column_name(name) or is_python_keyword(name):
|
|
737
|
+
raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
738
|
+
if not is_valid_identifier(name):
|
|
739
|
+
raise excs.Error(f'Invalid column name: {name!r}')
|
|
740
|
+
|
|
723
741
|
@classmethod
|
|
724
742
|
def _verify_column(cls, col: Column) -> None:
|
|
725
743
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
726
|
-
|
|
727
|
-
raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
728
|
-
if not is_valid_identifier(col.name):
|
|
729
|
-
raise excs.Error(f'Invalid column name: {col.name!r}')
|
|
744
|
+
cls.validate_column_name(col.name)
|
|
730
745
|
if col.stored is False and not col.is_computed:
|
|
731
746
|
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
|
|
732
747
|
if col.stored is False and col.has_window_fn_call():
|
|
733
748
|
raise excs.Error(
|
|
734
749
|
(
|
|
735
|
-
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a
|
|
736
|
-
f'function'
|
|
750
|
+
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
|
|
751
|
+
f'streaming function'
|
|
737
752
|
)
|
|
738
753
|
)
|
|
739
754
|
|
|
@@ -745,16 +760,6 @@ class Table(SchemaObject):
|
|
|
745
760
|
cls._verify_column(col)
|
|
746
761
|
column_names.add(col.name)
|
|
747
762
|
|
|
748
|
-
def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
|
|
749
|
-
col = self._tbl_version_path.get_column(column_name, include_bases)
|
|
750
|
-
if col is None:
|
|
751
|
-
raise excs.Error(f'Column {column_name!r} unknown')
|
|
752
|
-
|
|
753
|
-
def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
|
|
754
|
-
exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
|
|
755
|
-
if not exists:
|
|
756
|
-
raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
|
|
757
|
-
|
|
758
763
|
def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
759
764
|
"""Drop a column from the table.
|
|
760
765
|
|
|
@@ -789,21 +794,21 @@ class Table(SchemaObject):
|
|
|
789
794
|
if self._tbl_version_path.is_snapshot():
|
|
790
795
|
raise excs.Error('Cannot drop column from a snapshot.')
|
|
791
796
|
col: Column = None
|
|
792
|
-
|
|
797
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
793
798
|
if isinstance(column, str):
|
|
794
799
|
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
795
800
|
if col is None:
|
|
796
|
-
if
|
|
801
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
797
802
|
raise excs.Error(f'Column {column!r} unknown')
|
|
798
|
-
assert
|
|
803
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
799
804
|
return
|
|
800
805
|
col = self._tbl_version.get().cols_by_name[column]
|
|
801
806
|
else:
|
|
802
807
|
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
803
808
|
if not exists:
|
|
804
|
-
if
|
|
809
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
805
810
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
806
|
-
assert
|
|
811
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
807
812
|
return
|
|
808
813
|
col = column.col
|
|
809
814
|
|
|
@@ -819,7 +824,7 @@ class Table(SchemaObject):
|
|
|
819
824
|
# (transitive) views of this table.
|
|
820
825
|
dependent_stores = [
|
|
821
826
|
(view, store)
|
|
822
|
-
for view in
|
|
827
|
+
for view in (self, *self._get_views(recursive=True))
|
|
823
828
|
for store in view._tbl_version.get().external_stores.values()
|
|
824
829
|
if col in store.get_local_columns()
|
|
825
830
|
]
|
|
@@ -907,7 +912,7 @@ class Table(SchemaObject):
|
|
|
907
912
|
Args:
|
|
908
913
|
column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
|
|
909
914
|
idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
|
|
910
|
-
automatically. If specified, the name must be unique for this table.
|
|
915
|
+
automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
|
|
911
916
|
embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
|
|
912
917
|
or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
|
|
913
918
|
array of floats.
|
|
@@ -926,7 +931,8 @@ class Table(SchemaObject):
|
|
|
926
931
|
- `'replace'` or `'replace_force'`: replace the existing index with the new one.
|
|
927
932
|
|
|
928
933
|
Raises:
|
|
929
|
-
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
934
|
+
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
935
|
+
the specified column does not exist.
|
|
930
936
|
|
|
931
937
|
Examples:
|
|
932
938
|
Add an index to the `img` column of the table `my_table`:
|
|
@@ -960,37 +966,35 @@ class Table(SchemaObject):
|
|
|
960
966
|
"""
|
|
961
967
|
if self._tbl_version_path.is_snapshot():
|
|
962
968
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
963
|
-
col
|
|
964
|
-
if isinstance(column, str):
|
|
965
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
966
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
967
|
-
else:
|
|
968
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
969
|
-
col = column.col
|
|
969
|
+
col = self._resolve_column_parameter(column)
|
|
970
970
|
|
|
971
971
|
with Env.get().begin_xact():
|
|
972
972
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
973
|
-
|
|
973
|
+
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
974
974
|
# An index with the same name already exists.
|
|
975
975
|
# Handle it according to if_exists.
|
|
976
|
-
if
|
|
976
|
+
if if_exists_ == IfExistsParam.ERROR:
|
|
977
977
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
978
978
|
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
979
979
|
raise excs.Error(
|
|
980
|
-
f'Index `{idx_name}` is not an embedding index. Cannot {
|
|
980
|
+
f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
|
|
981
981
|
)
|
|
982
|
-
if
|
|
982
|
+
if if_exists_ == IfExistsParam.IGNORE:
|
|
983
983
|
return
|
|
984
|
-
assert
|
|
984
|
+
assert if_exists_ in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
|
|
985
985
|
self.drop_index(idx_name=idx_name)
|
|
986
986
|
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
987
987
|
from pixeltable.index import EmbeddingIndex
|
|
988
988
|
|
|
989
|
+
# idx_name must be a valid pixeltable column name
|
|
990
|
+
if idx_name is not None:
|
|
991
|
+
Table.validate_column_name(idx_name)
|
|
992
|
+
|
|
989
993
|
# create the EmbeddingIndex instance to verify args
|
|
990
994
|
idx = EmbeddingIndex(
|
|
991
995
|
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
992
996
|
)
|
|
993
|
-
|
|
997
|
+
_ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
994
998
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
995
999
|
FileCache.get().emit_eviction_warnings()
|
|
996
1000
|
|
|
@@ -1049,17 +1053,28 @@ class Table(SchemaObject):
|
|
|
1049
1053
|
|
|
1050
1054
|
col: Column = None
|
|
1051
1055
|
if idx_name is None:
|
|
1052
|
-
|
|
1053
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
1054
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1055
|
-
else:
|
|
1056
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
1057
|
-
col = column.col
|
|
1056
|
+
col = self._resolve_column_parameter(column)
|
|
1058
1057
|
assert col is not None
|
|
1059
1058
|
|
|
1060
1059
|
with Env.get().begin_xact():
|
|
1061
1060
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1062
1061
|
|
|
1062
|
+
def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
|
|
1063
|
+
"""Resolve a column parameter to a Column object"""
|
|
1064
|
+
col: Column = None
|
|
1065
|
+
if isinstance(column, str):
|
|
1066
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1067
|
+
if col is None:
|
|
1068
|
+
raise excs.Error(f'Column {column!r} unknown')
|
|
1069
|
+
elif isinstance(column, ColumnRef):
|
|
1070
|
+
exists = self._tbl_version_path.has_column(column.col, include_bases=True)
|
|
1071
|
+
if not exists:
|
|
1072
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1073
|
+
col = column.col
|
|
1074
|
+
else:
|
|
1075
|
+
raise excs.Error(f'Invalid column parameter type: {type(column)}')
|
|
1076
|
+
return col
|
|
1077
|
+
|
|
1063
1078
|
def drop_index(
|
|
1064
1079
|
self,
|
|
1065
1080
|
*,
|
|
@@ -1115,12 +1130,7 @@ class Table(SchemaObject):
|
|
|
1115
1130
|
|
|
1116
1131
|
col: Column = None
|
|
1117
1132
|
if idx_name is None:
|
|
1118
|
-
|
|
1119
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
1120
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1121
|
-
else:
|
|
1122
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
1123
|
-
col = column.col
|
|
1133
|
+
col = self._resolve_column_parameter(column)
|
|
1124
1134
|
assert col is not None
|
|
1125
1135
|
|
|
1126
1136
|
with Env.get().begin_xact():
|
|
@@ -1139,55 +1149,68 @@ class Table(SchemaObject):
|
|
|
1139
1149
|
assert (col is None) != (idx_name is None)
|
|
1140
1150
|
|
|
1141
1151
|
if idx_name is not None:
|
|
1142
|
-
|
|
1152
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1143
1153
|
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1144
|
-
if
|
|
1154
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1145
1155
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1146
|
-
assert
|
|
1156
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1147
1157
|
return
|
|
1148
|
-
|
|
1158
|
+
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
1149
1159
|
else:
|
|
1150
1160
|
if col.tbl.id != self._tbl_version.id:
|
|
1151
1161
|
raise excs.Error(
|
|
1152
1162
|
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
|
|
1153
1163
|
)
|
|
1154
|
-
|
|
1164
|
+
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1155
1165
|
if _idx_class is not None:
|
|
1156
|
-
|
|
1157
|
-
if len(
|
|
1158
|
-
|
|
1159
|
-
if
|
|
1166
|
+
idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
|
|
1167
|
+
if len(idx_info_list) == 0:
|
|
1168
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1169
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1160
1170
|
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
1161
|
-
assert
|
|
1171
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1162
1172
|
return
|
|
1163
|
-
if len(
|
|
1173
|
+
if len(idx_info_list) > 1:
|
|
1164
1174
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
1165
|
-
|
|
1166
|
-
|
|
1175
|
+
idx_info = idx_info_list[0]
|
|
1176
|
+
|
|
1177
|
+
# Find out if anything depends on this index
|
|
1178
|
+
dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
|
|
1179
|
+
if len(dependent_user_cols) > 0:
|
|
1180
|
+
raise excs.Error(
|
|
1181
|
+
f'Cannot drop index because the following columns depend on it:\n'
|
|
1182
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
1183
|
+
)
|
|
1184
|
+
self._tbl_version.get().drop_index(idx_info.id)
|
|
1167
1185
|
|
|
1168
1186
|
@overload
|
|
1169
1187
|
def insert(
|
|
1170
1188
|
self,
|
|
1171
|
-
|
|
1189
|
+
source: TableDataSource,
|
|
1172
1190
|
/,
|
|
1173
1191
|
*,
|
|
1174
|
-
|
|
1192
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
1193
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
1175
1194
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1195
|
+
print_stats: bool = False,
|
|
1196
|
+
**kwargs: Any,
|
|
1176
1197
|
) -> UpdateStatus: ...
|
|
1177
1198
|
|
|
1178
1199
|
@overload
|
|
1179
1200
|
def insert(
|
|
1180
|
-
self, *,
|
|
1201
|
+
self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
|
|
1181
1202
|
) -> UpdateStatus: ...
|
|
1182
1203
|
|
|
1183
|
-
@abc.abstractmethod
|
|
1204
|
+
@abc.abstractmethod
|
|
1184
1205
|
def insert(
|
|
1185
1206
|
self,
|
|
1186
|
-
|
|
1207
|
+
source: Optional[TableDataSource] = None,
|
|
1187
1208
|
/,
|
|
1188
1209
|
*,
|
|
1189
|
-
|
|
1210
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
1211
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
1190
1212
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1213
|
+
print_stats: bool = False,
|
|
1191
1214
|
**kwargs: Any,
|
|
1192
1215
|
) -> UpdateStatus:
|
|
1193
1216
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
@@ -1196,11 +1219,12 @@ class Table(SchemaObject):
|
|
|
1196
1219
|
|
|
1197
1220
|
```python
|
|
1198
1221
|
insert(
|
|
1199
|
-
|
|
1222
|
+
source: TableSourceDataType,
|
|
1200
1223
|
/,
|
|
1201
1224
|
*,
|
|
1225
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1202
1226
|
print_stats: bool = False,
|
|
1203
|
-
|
|
1227
|
+
**kwargs: Any,
|
|
1204
1228
|
)```
|
|
1205
1229
|
|
|
1206
1230
|
To insert just a single row, you can use the more concise syntax:
|
|
@@ -1208,23 +1232,25 @@ class Table(SchemaObject):
|
|
|
1208
1232
|
```python
|
|
1209
1233
|
insert(
|
|
1210
1234
|
*,
|
|
1211
|
-
print_stats: bool = False,
|
|
1212
1235
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1236
|
+
print_stats: bool = False,
|
|
1213
1237
|
**kwargs: Any
|
|
1214
1238
|
)```
|
|
1215
1239
|
|
|
1216
1240
|
Args:
|
|
1217
|
-
|
|
1218
|
-
names to values.
|
|
1241
|
+
source: A data source from which data can be imported.
|
|
1219
1242
|
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
1220
|
-
|
|
1243
|
+
(if inserting multiple rows) Additional keyword arguments are passed to the data source.
|
|
1244
|
+
source_format: A hint about the format of the source data
|
|
1245
|
+
schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
|
|
1221
1246
|
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
1222
1247
|
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
1223
1248
|
|
|
1224
1249
|
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
1225
1250
|
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
1226
|
-
|
|
1227
|
-
|
|
1251
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
1252
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
1253
|
+
print_stats: If `True`, print statistics about the cost of computed columns.
|
|
1228
1254
|
|
|
1229
1255
|
Returns:
|
|
1230
1256
|
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
@@ -1236,6 +1262,7 @@ class Table(SchemaObject):
|
|
|
1236
1262
|
- The table has been dropped.
|
|
1237
1263
|
- One of the rows being inserted does not conform to the table schema.
|
|
1238
1264
|
- An error occurs during processing of computed columns, and `on_error='ignore'`.
|
|
1265
|
+
- An error occurs while importing data from a source, and `on_error='abort'`.
|
|
1239
1266
|
|
|
1240
1267
|
Examples:
|
|
1241
1268
|
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
@@ -1247,6 +1274,10 @@ class Table(SchemaObject):
|
|
|
1247
1274
|
Insert a single row using the alternative syntax:
|
|
1248
1275
|
|
|
1249
1276
|
>>> tbl.insert(a=3, b=3, c=3)
|
|
1277
|
+
|
|
1278
|
+
Insert rows from a CSV file:
|
|
1279
|
+
|
|
1280
|
+
>>> tbl.insert(source='path/to/file.csv')
|
|
1250
1281
|
"""
|
|
1251
1282
|
raise NotImplementedError
|
|
1252
1283
|
|
|
@@ -1318,7 +1349,7 @@ class Table(SchemaObject):
|
|
|
1318
1349
|
rows = list(rows)
|
|
1319
1350
|
|
|
1320
1351
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1321
|
-
pk_col_names =
|
|
1352
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1322
1353
|
|
|
1323
1354
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1324
1355
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
@@ -1328,16 +1359,16 @@ class Table(SchemaObject):
|
|
|
1328
1359
|
|
|
1329
1360
|
for row_spec in rows:
|
|
1330
1361
|
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1331
|
-
row_spec, allow_pk=not has_rowid, allow_exprs=False
|
|
1362
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1332
1363
|
)
|
|
1333
1364
|
if has_rowid:
|
|
1334
1365
|
# we expect the _rowid column to be present for each row
|
|
1335
1366
|
assert _ROWID_COLUMN_NAME in row_spec
|
|
1336
1367
|
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1337
1368
|
else:
|
|
1338
|
-
col_names =
|
|
1369
|
+
col_names = {col.name for col in col_vals}
|
|
1339
1370
|
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1340
|
-
missing_cols = pk_col_names -
|
|
1371
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1341
1372
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1342
1373
|
row_updates.append(col_vals)
|
|
1343
1374
|
|