pixeltable 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +12 -14
- pixeltable/catalog/insertable_table.py +4 -7
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +64 -56
- pixeltable/catalog/table_version.py +42 -40
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +8 -7
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -4
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +5 -3
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -15,14 +15,9 @@ import pandas as pd
|
|
|
15
15
|
import sqlalchemy as sql
|
|
16
16
|
|
|
17
17
|
import pixeltable as pxt
|
|
18
|
-
import
|
|
19
|
-
import pixeltable.env as env
|
|
20
|
-
import pixeltable.exceptions as excs
|
|
21
|
-
import pixeltable.exprs as exprs
|
|
22
|
-
import pixeltable.index as index
|
|
23
|
-
import pixeltable.metadata.schema as schema
|
|
24
|
-
import pixeltable.type_system as ts
|
|
18
|
+
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
25
19
|
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.metadata import schema
|
|
26
21
|
|
|
27
22
|
from ..exprs import ColumnRef
|
|
28
23
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -38,16 +33,14 @@ from .globals import (
|
|
|
38
33
|
is_valid_identifier,
|
|
39
34
|
)
|
|
40
35
|
from .schema_object import SchemaObject
|
|
41
|
-
from .table_version import TableVersion
|
|
42
36
|
from .table_version_handle import TableVersionHandle
|
|
43
37
|
from .table_version_path import TableVersionPath
|
|
44
38
|
|
|
45
39
|
if TYPE_CHECKING:
|
|
46
|
-
import datasets # type: ignore[import-untyped]
|
|
47
40
|
import torch.utils.data
|
|
48
41
|
|
|
49
42
|
import pixeltable.plan
|
|
50
|
-
from pixeltable.globals import
|
|
43
|
+
from pixeltable.globals import TableDataSource
|
|
51
44
|
|
|
52
45
|
_logger = logging.getLogger('pixeltable')
|
|
53
46
|
|
|
@@ -82,7 +75,7 @@ class Table(SchemaObject):
|
|
|
82
75
|
(
|
|
83
76
|
f'UPDATE {schema.Table.__table__} '
|
|
84
77
|
f'SET {schema.Table.dir_id.name} = :new_dir_id, '
|
|
85
|
-
f" {schema.Table.md.name}
|
|
78
|
+
f" {schema.Table.md.name} = jsonb_set({schema.Table.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
86
79
|
f'WHERE {schema.Table.id.name} = :id'
|
|
87
80
|
)
|
|
88
81
|
)
|
|
@@ -232,15 +225,15 @@ class Table(SchemaObject):
|
|
|
232
225
|
"""Return rows from this table."""
|
|
233
226
|
return self._df().collect()
|
|
234
227
|
|
|
235
|
-
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
228
|
+
def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
236
229
|
"""Return rows from this table."""
|
|
237
230
|
return self._df().show(*args, **kwargs)
|
|
238
231
|
|
|
239
|
-
def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
232
|
+
def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
240
233
|
"""Return the first n rows inserted into this table."""
|
|
241
234
|
return self._df().head(*args, **kwargs)
|
|
242
235
|
|
|
243
|
-
def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
236
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
244
237
|
"""Return the last n rows inserted into this table."""
|
|
245
238
|
return self._df().tail(*args, **kwargs)
|
|
246
239
|
|
|
@@ -287,7 +280,7 @@ class Table(SchemaObject):
|
|
|
287
280
|
return self._tbl_version.get().comment
|
|
288
281
|
|
|
289
282
|
@property
|
|
290
|
-
def _num_retained_versions(self):
|
|
283
|
+
def _num_retained_versions(self) -> int:
|
|
291
284
|
return self._tbl_version.get().num_retained_versions
|
|
292
285
|
|
|
293
286
|
@property
|
|
@@ -406,12 +399,12 @@ class Table(SchemaObject):
|
|
|
406
399
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
407
400
|
"""Returns True if the column has dependents, False otherwise."""
|
|
408
401
|
assert col is not None
|
|
409
|
-
assert col.name in self._schema
|
|
402
|
+
assert col.name in self._schema
|
|
410
403
|
if any(c.name is not None for c in col.dependent_cols):
|
|
411
404
|
return True
|
|
412
405
|
return any(
|
|
413
406
|
col in store.get_local_columns()
|
|
414
|
-
for view in
|
|
407
|
+
for view in (self, *self._get_views(recursive=True))
|
|
415
408
|
for store in view._tbl_version.get().external_stores.values()
|
|
416
409
|
)
|
|
417
410
|
|
|
@@ -429,7 +422,7 @@ class Table(SchemaObject):
|
|
|
429
422
|
raise excs.Error(f'Duplicate column name: {new_col_name!r}')
|
|
430
423
|
elif if_exists == IfExistsParam.IGNORE:
|
|
431
424
|
cols_to_ignore.append(new_col_name)
|
|
432
|
-
elif if_exists
|
|
425
|
+
elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
|
|
433
426
|
if new_col_name not in self._tbl_version.get().cols_by_name:
|
|
434
427
|
# for views, it is possible that the existing column
|
|
435
428
|
# is a base table column; in that case, we should not
|
|
@@ -440,7 +433,8 @@ class Table(SchemaObject):
|
|
|
440
433
|
# replace directive if column has dependents.
|
|
441
434
|
if self._column_has_dependents(col):
|
|
442
435
|
raise excs.Error(
|
|
443
|
-
f'Column {new_col_name!r} already exists and has dependents.
|
|
436
|
+
f'Column {new_col_name!r} already exists and has dependents. '
|
|
437
|
+
f'Cannot {if_exists.name.lower()} it.'
|
|
444
438
|
)
|
|
445
439
|
self.drop_column(new_col_name)
|
|
446
440
|
assert new_col_name not in self._tbl_version.get().cols_by_name
|
|
@@ -452,8 +446,8 @@ class Table(SchemaObject):
|
|
|
452
446
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
453
447
|
) -> UpdateStatus:
|
|
454
448
|
"""
|
|
455
|
-
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
456
|
-
use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
449
|
+
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
450
|
+
columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
457
451
|
|
|
458
452
|
The format of the `schema` argument is identical to the format of the schema in a call to
|
|
459
453
|
[`create_table()`][pixeltable.globals.create_table].
|
|
@@ -464,10 +458,12 @@ class Table(SchemaObject):
|
|
|
464
458
|
|
|
465
459
|
- `'error'`: an exception will be raised.
|
|
466
460
|
- `'ignore'`: do nothing and return.
|
|
467
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
461
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
|
|
462
|
+
dependents.
|
|
468
463
|
|
|
469
464
|
Note that the `if_exists` parameter is applied to all columns in the schema.
|
|
470
|
-
To apply different behaviors to different columns, please use
|
|
465
|
+
To apply different behaviors to different columns, please use
|
|
466
|
+
[`add_column()`][pixeltable.Table.add_column] for each column.
|
|
471
467
|
|
|
472
468
|
Returns:
|
|
473
469
|
Information about the execution status of the operation.
|
|
@@ -528,7 +524,8 @@ class Table(SchemaObject):
|
|
|
528
524
|
|
|
529
525
|
- `'error'`: an exception will be raised.
|
|
530
526
|
- `'ignore'`: do nothing and return.
|
|
531
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
527
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
|
|
528
|
+
no dependents.
|
|
532
529
|
|
|
533
530
|
Returns:
|
|
534
531
|
Information about the execution status of the operation.
|
|
@@ -559,7 +556,7 @@ class Table(SchemaObject):
|
|
|
559
556
|
col_type = next(iter(kwargs.values()))
|
|
560
557
|
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
561
558
|
raise excs.Error(
|
|
562
|
-
|
|
559
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
563
560
|
)
|
|
564
561
|
return self.add_columns(kwargs, if_exists=if_exists)
|
|
565
562
|
|
|
@@ -590,7 +587,8 @@ class Table(SchemaObject):
|
|
|
590
587
|
|
|
591
588
|
- `'error'`: an exception will be raised.
|
|
592
589
|
- `'ignore'`: do nothing and return.
|
|
593
|
-
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
590
|
+
- `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
|
|
591
|
+
no dependents.
|
|
594
592
|
|
|
595
593
|
Returns:
|
|
596
594
|
Information about the execution status of the operation.
|
|
@@ -614,7 +612,8 @@ class Table(SchemaObject):
|
|
|
614
612
|
raise excs.Error('Cannot add column to a snapshot.')
|
|
615
613
|
if len(kwargs) != 1:
|
|
616
614
|
raise excs.Error(
|
|
617
|
-
f'add_computed_column() requires exactly one keyword argument of the form
|
|
615
|
+
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
616
|
+
'"column-name=type|value-expression"; '
|
|
618
617
|
f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
|
|
619
618
|
)
|
|
620
619
|
col_name, spec = next(iter(kwargs.items()))
|
|
@@ -625,6 +624,15 @@ class Table(SchemaObject):
|
|
|
625
624
|
if stored is not None:
|
|
626
625
|
col_schema['stored'] = stored
|
|
627
626
|
|
|
627
|
+
# Raise an error if the column expression refers to a column error property
|
|
628
|
+
if isinstance(spec, exprs.Expr):
|
|
629
|
+
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
630
|
+
if e.is_error_prop():
|
|
631
|
+
raise excs.Error(
|
|
632
|
+
'Use of a reference to an error property of another column is not allowed in a computed '
|
|
633
|
+
f'column. The specified computation for this column contains this reference: `{e!r}`'
|
|
634
|
+
)
|
|
635
|
+
|
|
628
636
|
with Env.get().begin_xact():
|
|
629
637
|
# handle existing columns based on if_exists parameter
|
|
630
638
|
cols_to_ignore = self._ignore_or_drop_existing_columns(
|
|
@@ -651,16 +659,15 @@ class Table(SchemaObject):
|
|
|
651
659
|
"""
|
|
652
660
|
assert isinstance(spec, dict)
|
|
653
661
|
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
654
|
-
for k in spec
|
|
662
|
+
for k in spec:
|
|
655
663
|
if k not in valid_keys:
|
|
656
664
|
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
657
665
|
|
|
658
666
|
if 'type' not in spec and 'value' not in spec:
|
|
659
667
|
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
660
668
|
|
|
661
|
-
if 'type' in spec:
|
|
662
|
-
|
|
663
|
-
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
669
|
+
if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
670
|
+
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
664
671
|
|
|
665
672
|
if 'value' in spec:
|
|
666
673
|
value_expr = exprs.Expr.from_object(spec['value'])
|
|
@@ -740,8 +747,8 @@ class Table(SchemaObject):
|
|
|
740
747
|
if col.stored is False and col.has_window_fn_call():
|
|
741
748
|
raise excs.Error(
|
|
742
749
|
(
|
|
743
|
-
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a
|
|
744
|
-
f'function'
|
|
750
|
+
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
|
|
751
|
+
f'streaming function'
|
|
745
752
|
)
|
|
746
753
|
)
|
|
747
754
|
|
|
@@ -787,21 +794,21 @@ class Table(SchemaObject):
|
|
|
787
794
|
if self._tbl_version_path.is_snapshot():
|
|
788
795
|
raise excs.Error('Cannot drop column from a snapshot.')
|
|
789
796
|
col: Column = None
|
|
790
|
-
|
|
797
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
791
798
|
if isinstance(column, str):
|
|
792
799
|
col = self._tbl_version_path.get_column(column, include_bases=False)
|
|
793
800
|
if col is None:
|
|
794
|
-
if
|
|
801
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
795
802
|
raise excs.Error(f'Column {column!r} unknown')
|
|
796
|
-
assert
|
|
803
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
797
804
|
return
|
|
798
805
|
col = self._tbl_version.get().cols_by_name[column]
|
|
799
806
|
else:
|
|
800
807
|
exists = self._tbl_version_path.has_column(column.col, include_bases=False)
|
|
801
808
|
if not exists:
|
|
802
|
-
if
|
|
809
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
803
810
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
804
|
-
assert
|
|
811
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
805
812
|
return
|
|
806
813
|
col = column.col
|
|
807
814
|
|
|
@@ -817,7 +824,7 @@ class Table(SchemaObject):
|
|
|
817
824
|
# (transitive) views of this table.
|
|
818
825
|
dependent_stores = [
|
|
819
826
|
(view, store)
|
|
820
|
-
for view in
|
|
827
|
+
for view in (self, *self._get_views(recursive=True))
|
|
821
828
|
for store in view._tbl_version.get().external_stores.values()
|
|
822
829
|
if col in store.get_local_columns()
|
|
823
830
|
]
|
|
@@ -924,7 +931,8 @@ class Table(SchemaObject):
|
|
|
924
931
|
- `'replace'` or `'replace_force'`: replace the existing index with the new one.
|
|
925
932
|
|
|
926
933
|
Raises:
|
|
927
|
-
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
934
|
+
Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
|
|
935
|
+
the specified column does not exist.
|
|
928
936
|
|
|
929
937
|
Examples:
|
|
930
938
|
Add an index to the `img` column of the table `my_table`:
|
|
@@ -962,18 +970,18 @@ class Table(SchemaObject):
|
|
|
962
970
|
|
|
963
971
|
with Env.get().begin_xact():
|
|
964
972
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
965
|
-
|
|
973
|
+
if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
|
|
966
974
|
# An index with the same name already exists.
|
|
967
975
|
# Handle it according to if_exists.
|
|
968
|
-
if
|
|
976
|
+
if if_exists_ == IfExistsParam.ERROR:
|
|
969
977
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
970
978
|
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
971
979
|
raise excs.Error(
|
|
972
|
-
f'Index `{idx_name}` is not an embedding index. Cannot {
|
|
980
|
+
f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
|
|
973
981
|
)
|
|
974
|
-
if
|
|
982
|
+
if if_exists_ == IfExistsParam.IGNORE:
|
|
975
983
|
return
|
|
976
|
-
assert
|
|
984
|
+
assert if_exists_ in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
|
|
977
985
|
self.drop_index(idx_name=idx_name)
|
|
978
986
|
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
979
987
|
from pixeltable.index import EmbeddingIndex
|
|
@@ -986,7 +994,7 @@ class Table(SchemaObject):
|
|
|
986
994
|
idx = EmbeddingIndex(
|
|
987
995
|
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
988
996
|
)
|
|
989
|
-
|
|
997
|
+
_ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
990
998
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
991
999
|
FileCache.get().emit_eviction_warnings()
|
|
992
1000
|
|
|
@@ -1141,11 +1149,11 @@ class Table(SchemaObject):
|
|
|
1141
1149
|
assert (col is None) != (idx_name is None)
|
|
1142
1150
|
|
|
1143
1151
|
if idx_name is not None:
|
|
1144
|
-
|
|
1152
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1145
1153
|
if idx_name not in self._tbl_version.get().idxs_by_name:
|
|
1146
|
-
if
|
|
1154
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1147
1155
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1148
|
-
assert
|
|
1156
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1149
1157
|
return
|
|
1150
1158
|
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
1151
1159
|
else:
|
|
@@ -1157,10 +1165,10 @@ class Table(SchemaObject):
|
|
|
1157
1165
|
if _idx_class is not None:
|
|
1158
1166
|
idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
|
|
1159
1167
|
if len(idx_info_list) == 0:
|
|
1160
|
-
|
|
1161
|
-
if
|
|
1168
|
+
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1169
|
+
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
1162
1170
|
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
1163
|
-
assert
|
|
1171
|
+
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1164
1172
|
return
|
|
1165
1173
|
if len(idx_info_list) > 1:
|
|
1166
1174
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
@@ -1341,7 +1349,7 @@ class Table(SchemaObject):
|
|
|
1341
1349
|
rows = list(rows)
|
|
1342
1350
|
|
|
1343
1351
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
1344
|
-
pk_col_names =
|
|
1352
|
+
pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
|
|
1345
1353
|
|
|
1346
1354
|
# pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
|
|
1347
1355
|
has_rowid = _ROWID_COLUMN_NAME in rows[0]
|
|
@@ -1351,16 +1359,16 @@ class Table(SchemaObject):
|
|
|
1351
1359
|
|
|
1352
1360
|
for row_spec in rows:
|
|
1353
1361
|
col_vals = self._tbl_version.get()._validate_update_spec(
|
|
1354
|
-
row_spec, allow_pk=not has_rowid, allow_exprs=False
|
|
1362
|
+
row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
|
|
1355
1363
|
)
|
|
1356
1364
|
if has_rowid:
|
|
1357
1365
|
# we expect the _rowid column to be present for each row
|
|
1358
1366
|
assert _ROWID_COLUMN_NAME in row_spec
|
|
1359
1367
|
rowids.append(row_spec[_ROWID_COLUMN_NAME])
|
|
1360
1368
|
else:
|
|
1361
|
-
col_names =
|
|
1369
|
+
col_names = {col.name for col in col_vals}
|
|
1362
1370
|
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1363
|
-
missing_cols = pk_col_names -
|
|
1371
|
+
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1364
1372
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
1365
1373
|
row_updates.append(col_vals)
|
|
1366
1374
|
|
|
@@ -13,9 +13,8 @@ import sqlalchemy as sql
|
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.exprs as exprs
|
|
17
|
-
import pixeltable.index as index
|
|
18
16
|
import pixeltable.type_system as ts
|
|
17
|
+
from pixeltable import exprs, index
|
|
19
18
|
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.iterators import ComponentIterator
|
|
21
20
|
from pixeltable.metadata import schema
|
|
@@ -54,6 +53,7 @@ class TableVersion:
|
|
|
54
53
|
|
|
55
54
|
id: UUID
|
|
56
55
|
name: str
|
|
56
|
+
user: Optional[str]
|
|
57
57
|
effective_version: Optional[int]
|
|
58
58
|
version: int
|
|
59
59
|
comment: str
|
|
@@ -108,6 +108,7 @@ class TableVersion:
|
|
|
108
108
|
):
|
|
109
109
|
self.id = id
|
|
110
110
|
self.name = tbl_md.name
|
|
111
|
+
self.user = tbl_md.user
|
|
111
112
|
self.effective_version = effective_version
|
|
112
113
|
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
113
114
|
self.comment = schema_version_md.comment
|
|
@@ -211,6 +212,7 @@ class TableVersion:
|
|
|
211
212
|
view_md: Optional[schema.ViewMd] = None,
|
|
212
213
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
213
214
|
session = Env.get().session
|
|
215
|
+
user = Env.get().user
|
|
214
216
|
|
|
215
217
|
# assign ids
|
|
216
218
|
cols_by_name: dict[str, Column] = {}
|
|
@@ -229,7 +231,7 @@ class TableVersion:
|
|
|
229
231
|
table_md = schema.TableMd(
|
|
230
232
|
tbl_id=str(tbl_id),
|
|
231
233
|
name=name,
|
|
232
|
-
user=
|
|
234
|
+
user=user,
|
|
233
235
|
current_version=0,
|
|
234
236
|
current_schema_version=0,
|
|
235
237
|
next_col_id=len(cols),
|
|
@@ -341,8 +343,11 @@ class TableVersion:
|
|
|
341
343
|
self.cols = []
|
|
342
344
|
self.cols_by_name = {}
|
|
343
345
|
self.cols_by_id = {}
|
|
344
|
-
|
|
345
|
-
|
|
346
|
+
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
347
|
+
# point backward.
|
|
348
|
+
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
349
|
+
for col_md in sorted_column_md:
|
|
350
|
+
schema_col_md = schema_version_md.columns.get(col_md.id)
|
|
346
351
|
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
347
352
|
media_val = (
|
|
348
353
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
@@ -385,10 +390,8 @@ class TableVersion:
|
|
|
385
390
|
import pixeltable.index as index_module
|
|
386
391
|
|
|
387
392
|
for md in tbl_md.index_md.values():
|
|
388
|
-
if (
|
|
389
|
-
md.
|
|
390
|
-
or md.schema_version_drop is not None
|
|
391
|
-
and md.schema_version_drop <= self.schema_version
|
|
393
|
+
if md.schema_version_add > self.schema_version or (
|
|
394
|
+
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
392
395
|
):
|
|
393
396
|
# index not visible in this schema version
|
|
394
397
|
continue
|
|
@@ -619,11 +622,10 @@ class TableVersion:
|
|
|
619
622
|
cols = list(cols)
|
|
620
623
|
row_count = self.store_tbl.count()
|
|
621
624
|
for col in cols:
|
|
622
|
-
if not col.col_type.nullable and not col.is_computed:
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
)
|
|
625
|
+
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
626
|
+
raise excs.Error(
|
|
627
|
+
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
628
|
+
)
|
|
627
629
|
|
|
628
630
|
num_excs = 0
|
|
629
631
|
cols_with_excs: list[Column] = []
|
|
@@ -662,13 +664,13 @@ class TableVersion:
|
|
|
662
664
|
cols_with_excs.append(col)
|
|
663
665
|
except excs.Error as exc:
|
|
664
666
|
self.cols.pop()
|
|
665
|
-
for
|
|
667
|
+
for c in cols:
|
|
666
668
|
# remove columns that we already added
|
|
667
|
-
if
|
|
669
|
+
if c.id not in self.cols_by_id:
|
|
668
670
|
continue
|
|
669
|
-
if
|
|
670
|
-
del self.cols_by_name[
|
|
671
|
-
del self.cols_by_id[
|
|
671
|
+
if c.name is not None:
|
|
672
|
+
del self.cols_by_name[c.name]
|
|
673
|
+
del self.cols_by_id[c.id]
|
|
672
674
|
# we need to re-initialize the sqlalchemy schema
|
|
673
675
|
self.store_tbl.create_sa_tbl()
|
|
674
676
|
raise exc
|
|
@@ -756,19 +758,20 @@ class TableVersion:
|
|
|
756
758
|
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
757
759
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
758
760
|
|
|
759
|
-
def set_comment(self, new_comment: Optional[str]):
|
|
761
|
+
def set_comment(self, new_comment: Optional[str]) -> None:
|
|
760
762
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
761
763
|
self.comment = new_comment
|
|
762
764
|
self._create_schema_version()
|
|
763
765
|
|
|
764
|
-
def set_num_retained_versions(self, new_num_retained_versions: int):
|
|
766
|
+
def set_num_retained_versions(self, new_num_retained_versions: int) -> None:
|
|
765
767
|
_logger.info(
|
|
766
|
-
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions}
|
|
768
|
+
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} '
|
|
769
|
+
f'(was {self.num_retained_versions})'
|
|
767
770
|
)
|
|
768
771
|
self.num_retained_versions = new_num_retained_versions
|
|
769
772
|
self._create_schema_version()
|
|
770
773
|
|
|
771
|
-
def _create_schema_version(self):
|
|
774
|
+
def _create_schema_version(self) -> None:
|
|
772
775
|
# we're creating a new schema version
|
|
773
776
|
self.version += 1
|
|
774
777
|
preceding_schema_version = self.schema_version
|
|
@@ -858,7 +861,7 @@ class TableVersion:
|
|
|
858
861
|
|
|
859
862
|
from pixeltable.plan import Planner
|
|
860
863
|
|
|
861
|
-
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
864
|
+
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
862
865
|
if where is not None:
|
|
863
866
|
if not isinstance(where, exprs.Expr):
|
|
864
867
|
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
@@ -897,7 +900,6 @@ class TableVersion:
|
|
|
897
900
|
"""
|
|
898
901
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
899
902
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
900
|
-
cols_with_excs: set[str] = set()
|
|
901
903
|
|
|
902
904
|
from pixeltable.plan import Planner
|
|
903
905
|
|
|
@@ -919,7 +921,7 @@ class TableVersion:
|
|
|
919
921
|
return result
|
|
920
922
|
|
|
921
923
|
def _validate_update_spec(
|
|
922
|
-
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
|
|
924
|
+
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool, allow_media: bool
|
|
923
925
|
) -> dict[Column, exprs.Expr]:
|
|
924
926
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
925
927
|
for col_name, val in value_spec.items():
|
|
@@ -939,27 +941,31 @@ class TableVersion:
|
|
|
939
941
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
940
942
|
if col.is_pk and not allow_pk:
|
|
941
943
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
944
|
+
if col.col_type.is_media_type() and not allow_media:
|
|
945
|
+
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
942
946
|
|
|
943
947
|
# make sure that the value is compatible with the column type
|
|
944
948
|
value_expr: exprs.Expr
|
|
945
949
|
try:
|
|
946
950
|
# check if this is a literal
|
|
947
951
|
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
948
|
-
except (TypeError, jsonschema.exceptions.ValidationError):
|
|
952
|
+
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
949
953
|
if not allow_exprs:
|
|
950
954
|
raise excs.Error(
|
|
951
955
|
f'Column {col_name}: value {val!r} is not a valid literal for this column '
|
|
952
956
|
f'(expected {col.col_type})'
|
|
953
|
-
)
|
|
957
|
+
) from exc
|
|
954
958
|
# it's not a literal, let's try to create an expr from it
|
|
955
959
|
value_expr = exprs.Expr.from_object(val)
|
|
956
960
|
if value_expr is None:
|
|
957
|
-
raise excs.Error(
|
|
961
|
+
raise excs.Error(
|
|
962
|
+
f'Column {col_name}: value {val!r} is not a recognized literal or expression'
|
|
963
|
+
) from exc
|
|
958
964
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
959
965
|
raise excs.Error(
|
|
960
966
|
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
961
967
|
f'{col_name} ({col.col_type})'
|
|
962
|
-
)
|
|
968
|
+
) from exc
|
|
963
969
|
update_targets[col] = value_expr
|
|
964
970
|
|
|
965
971
|
return update_targets
|
|
@@ -988,7 +994,7 @@ class TableVersion:
|
|
|
988
994
|
self._update_md(timestamp)
|
|
989
995
|
|
|
990
996
|
if cascade:
|
|
991
|
-
base_versions = [None if plan is None else self.version
|
|
997
|
+
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
992
998
|
# propagate to views
|
|
993
999
|
for view in self.mutable_views:
|
|
994
1000
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
|
|
@@ -1048,11 +1054,9 @@ class TableVersion:
|
|
|
1048
1054
|
# we're creating a new version
|
|
1049
1055
|
self.version += 1
|
|
1050
1056
|
self._update_md(timestamp)
|
|
1051
|
-
else:
|
|
1052
|
-
pass
|
|
1053
1057
|
for view in self.mutable_views:
|
|
1054
1058
|
num_rows += view.get().propagate_delete(
|
|
1055
|
-
where=None, base_versions=[self.version
|
|
1059
|
+
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1056
1060
|
)
|
|
1057
1061
|
return num_rows
|
|
1058
1062
|
|
|
@@ -1232,9 +1236,7 @@ class TableVersion:
|
|
|
1232
1236
|
|
|
1233
1237
|
def is_system_column(self, col: Column) -> bool:
|
|
1234
1238
|
"""Return True if column was created by Pixeltable"""
|
|
1235
|
-
|
|
1236
|
-
return True
|
|
1237
|
-
return False
|
|
1239
|
+
return col.name == _POS_COLUMN_NAME and self.is_component_view
|
|
1238
1240
|
|
|
1239
1241
|
def user_columns(self) -> list[Column]:
|
|
1240
1242
|
"""Return all non-system columns"""
|
|
@@ -1262,7 +1264,7 @@ class TableVersion:
|
|
|
1262
1264
|
|
|
1263
1265
|
def _record_refd_columns(self, col: Column) -> None:
|
|
1264
1266
|
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1265
|
-
|
|
1267
|
+
from pixeltable import exprs
|
|
1266
1268
|
|
|
1267
1269
|
if col.value_expr_dict is not None:
|
|
1268
1270
|
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
@@ -1317,7 +1319,7 @@ class TableVersion:
|
|
|
1317
1319
|
return schema.TableMd(
|
|
1318
1320
|
tbl_id=str(self.id),
|
|
1319
1321
|
name=self.name,
|
|
1320
|
-
user=
|
|
1322
|
+
user=self.user,
|
|
1321
1323
|
current_version=self.version,
|
|
1322
1324
|
current_schema_version=self.schema_version,
|
|
1323
1325
|
next_col_id=self.next_col_id,
|
|
@@ -1364,7 +1366,7 @@ class TableVersion:
|
|
|
1364
1366
|
|
|
1365
1367
|
@classmethod
|
|
1366
1368
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1367
|
-
|
|
1369
|
+
from pixeltable import catalog
|
|
1368
1370
|
|
|
1369
1371
|
id = UUID(d['id'])
|
|
1370
1372
|
effective_version = d['effective_version']
|
|
@@ -31,6 +31,9 @@ class TableVersionHandle:
|
|
|
31
31
|
return False
|
|
32
32
|
return self.id == other.id and self.effective_version == other.effective_version
|
|
33
33
|
|
|
34
|
+
def __hash__(self) -> int:
|
|
35
|
+
return hash((self.id, self.effective_version))
|
|
36
|
+
|
|
34
37
|
@classmethod
|
|
35
38
|
def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
|
|
36
39
|
return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
|
|
@@ -82,7 +82,7 @@ class TableVersionPath:
|
|
|
82
82
|
"""Return all tbl versions"""
|
|
83
83
|
if self.base is None:
|
|
84
84
|
return [self.tbl_version]
|
|
85
|
-
return [self.tbl_version
|
|
85
|
+
return [self.tbl_version, *self.base.get_tbl_versions()]
|
|
86
86
|
|
|
87
87
|
def get_bases(self) -> list[TableVersionHandle]:
|
|
88
88
|
"""Return all tbl versions"""
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,13 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.metadata.schema as md_schema
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
|
-
from pixeltable import
|
|
11
|
+
from pixeltable import exprs, func
|
|
12
12
|
from pixeltable.env import Env
|
|
13
13
|
from pixeltable.iterators import ComponentIterator
|
|
14
14
|
|
|
@@ -98,7 +98,8 @@ class View(Table):
|
|
|
98
98
|
# make sure that the value can be computed in the context of the base
|
|
99
99
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
100
100
|
raise excs.Error(
|
|
101
|
-
f'Column {col.name}: value expression cannot be computed in the context of the
|
|
101
|
+
f'Column {col.name}: value expression cannot be computed in the context of the '
|
|
102
|
+
f'base {base.tbl_name()}'
|
|
102
103
|
)
|
|
103
104
|
|
|
104
105
|
if iterator_cls is not None:
|
|
@@ -111,8 +112,8 @@ class View(Table):
|
|
|
111
112
|
bound_args: dict[str, Any]
|
|
112
113
|
try:
|
|
113
114
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
114
|
-
except TypeError as
|
|
115
|
-
raise excs.Error(f'Invalid iterator arguments: {
|
|
115
|
+
except TypeError as exc:
|
|
116
|
+
raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
|
|
116
117
|
# we ignore 'self'
|
|
117
118
|
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
118
119
|
del bound_args[first_param_name]
|
|
@@ -203,8 +204,8 @@ class View(Table):
|
|
|
203
204
|
|
|
204
205
|
from pixeltable.plan import Planner
|
|
205
206
|
|
|
206
|
-
plan,
|
|
207
|
-
num_rows, num_excs,
|
|
207
|
+
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
208
|
+
num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
208
209
|
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
209
210
|
|
|
210
211
|
session.commit()
|