pixeltable 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +11 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +28 -14
- pixeltable/catalog/insertable_table.py +81 -43
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +140 -109
- pixeltable/catalog/table_version.py +60 -43
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +17 -9
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +109 -43
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +2 -3
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +3 -2
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +39 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +51 -21
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -10
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +6 -21
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +22 -65
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +2 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +22 -11
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +85 -33
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +8 -5
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +35 -48
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +1 -1
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.9.dist-info/METADATA +0 -382
- pixeltable-0.3.9.dist-info/RECORD +0 -175
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
|
@@ -13,9 +13,8 @@ import sqlalchemy as sql
|
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
-
import pixeltable.exprs as exprs
|
|
17
|
-
import pixeltable.index as index
|
|
18
16
|
import pixeltable.type_system as ts
|
|
17
|
+
from pixeltable import exprs, index
|
|
19
18
|
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.iterators import ComponentIterator
|
|
21
20
|
from pixeltable.metadata import schema
|
|
@@ -54,6 +53,7 @@ class TableVersion:
|
|
|
54
53
|
|
|
55
54
|
id: UUID
|
|
56
55
|
name: str
|
|
56
|
+
user: Optional[str]
|
|
57
57
|
effective_version: Optional[int]
|
|
58
58
|
version: int
|
|
59
59
|
comment: str
|
|
@@ -108,6 +108,7 @@ class TableVersion:
|
|
|
108
108
|
):
|
|
109
109
|
self.id = id
|
|
110
110
|
self.name = tbl_md.name
|
|
111
|
+
self.user = tbl_md.user
|
|
111
112
|
self.effective_version = effective_version
|
|
112
113
|
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
113
114
|
self.comment = schema_version_md.comment
|
|
@@ -211,6 +212,7 @@ class TableVersion:
|
|
|
211
212
|
view_md: Optional[schema.ViewMd] = None,
|
|
212
213
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
213
214
|
session = Env.get().session
|
|
215
|
+
user = Env.get().user
|
|
214
216
|
|
|
215
217
|
# assign ids
|
|
216
218
|
cols_by_name: dict[str, Column] = {}
|
|
@@ -225,9 +227,11 @@ class TableVersion:
|
|
|
225
227
|
# create schema.Table
|
|
226
228
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
227
229
|
column_md = cls._create_column_md(cols)
|
|
230
|
+
tbl_id = uuid.uuid4()
|
|
228
231
|
table_md = schema.TableMd(
|
|
232
|
+
tbl_id=str(tbl_id),
|
|
229
233
|
name=name,
|
|
230
|
-
user=
|
|
234
|
+
user=user,
|
|
231
235
|
current_version=0,
|
|
232
236
|
current_schema_version=0,
|
|
233
237
|
next_col_id=len(cols),
|
|
@@ -241,11 +245,12 @@ class TableVersion:
|
|
|
241
245
|
)
|
|
242
246
|
# create a schema.Table here, we need it to call our c'tor;
|
|
243
247
|
# don't add it to the session yet, we might add index metadata
|
|
244
|
-
tbl_id = uuid.uuid4()
|
|
245
248
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
246
249
|
|
|
247
250
|
# create schema.TableVersion
|
|
248
|
-
table_version_md = schema.TableVersionMd(
|
|
251
|
+
table_version_md = schema.TableVersionMd(
|
|
252
|
+
tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
|
|
253
|
+
)
|
|
249
254
|
tbl_version_record = schema.TableVersion(
|
|
250
255
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
251
256
|
)
|
|
@@ -261,6 +266,7 @@ class TableVersion:
|
|
|
261
266
|
schema_col_md[col.id] = md
|
|
262
267
|
|
|
263
268
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
269
|
+
tbl_id=str(tbl_record.id),
|
|
264
270
|
schema_version=0,
|
|
265
271
|
preceding_schema_version=None,
|
|
266
272
|
columns=schema_col_md,
|
|
@@ -337,8 +343,11 @@ class TableVersion:
|
|
|
337
343
|
self.cols = []
|
|
338
344
|
self.cols_by_name = {}
|
|
339
345
|
self.cols_by_id = {}
|
|
340
|
-
|
|
341
|
-
|
|
346
|
+
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
347
|
+
# point backward.
|
|
348
|
+
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
349
|
+
for col_md in sorted_column_md:
|
|
350
|
+
schema_col_md = schema_version_md.columns.get(col_md.id)
|
|
342
351
|
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
343
352
|
media_val = (
|
|
344
353
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
@@ -381,10 +390,8 @@ class TableVersion:
|
|
|
381
390
|
import pixeltable.index as index_module
|
|
382
391
|
|
|
383
392
|
for md in tbl_md.index_md.values():
|
|
384
|
-
if (
|
|
385
|
-
md.
|
|
386
|
-
or md.schema_version_drop is not None
|
|
387
|
-
and md.schema_version_drop <= self.schema_version
|
|
393
|
+
if md.schema_version_add > self.schema_version or (
|
|
394
|
+
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
388
395
|
):
|
|
389
396
|
# index not visible in this schema version
|
|
390
397
|
continue
|
|
@@ -615,11 +622,10 @@ class TableVersion:
|
|
|
615
622
|
cols = list(cols)
|
|
616
623
|
row_count = self.store_tbl.count()
|
|
617
624
|
for col in cols:
|
|
618
|
-
if not col.col_type.nullable and not col.is_computed:
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
)
|
|
625
|
+
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
626
|
+
raise excs.Error(
|
|
627
|
+
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
628
|
+
)
|
|
623
629
|
|
|
624
630
|
num_excs = 0
|
|
625
631
|
cols_with_excs: list[Column] = []
|
|
@@ -658,13 +664,13 @@ class TableVersion:
|
|
|
658
664
|
cols_with_excs.append(col)
|
|
659
665
|
except excs.Error as exc:
|
|
660
666
|
self.cols.pop()
|
|
661
|
-
for
|
|
667
|
+
for c in cols:
|
|
662
668
|
# remove columns that we already added
|
|
663
|
-
if
|
|
669
|
+
if c.id not in self.cols_by_id:
|
|
664
670
|
continue
|
|
665
|
-
if
|
|
666
|
-
del self.cols_by_name[
|
|
667
|
-
del self.cols_by_id[
|
|
671
|
+
if c.name is not None:
|
|
672
|
+
del self.cols_by_name[c.name]
|
|
673
|
+
del self.cols_by_id[c.id]
|
|
668
674
|
# we need to re-initialize the sqlalchemy schema
|
|
669
675
|
self.store_tbl.create_sa_tbl()
|
|
670
676
|
raise exc
|
|
@@ -752,19 +758,20 @@ class TableVersion:
|
|
|
752
758
|
self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
753
759
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
754
760
|
|
|
755
|
-
def set_comment(self, new_comment: Optional[str]):
|
|
761
|
+
def set_comment(self, new_comment: Optional[str]) -> None:
|
|
756
762
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
757
763
|
self.comment = new_comment
|
|
758
764
|
self._create_schema_version()
|
|
759
765
|
|
|
760
|
-
def set_num_retained_versions(self, new_num_retained_versions: int):
|
|
766
|
+
def set_num_retained_versions(self, new_num_retained_versions: int) -> None:
|
|
761
767
|
_logger.info(
|
|
762
|
-
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions}
|
|
768
|
+
f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} '
|
|
769
|
+
f'(was {self.num_retained_versions})'
|
|
763
770
|
)
|
|
764
771
|
self.num_retained_versions = new_num_retained_versions
|
|
765
772
|
self._create_schema_version()
|
|
766
773
|
|
|
767
|
-
def _create_schema_version(self):
|
|
774
|
+
def _create_schema_version(self) -> None:
|
|
768
775
|
# we're creating a new schema version
|
|
769
776
|
self.version += 1
|
|
770
777
|
preceding_schema_version = self.schema_version
|
|
@@ -854,7 +861,7 @@ class TableVersion:
|
|
|
854
861
|
|
|
855
862
|
from pixeltable.plan import Planner
|
|
856
863
|
|
|
857
|
-
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
864
|
+
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
858
865
|
if where is not None:
|
|
859
866
|
if not isinstance(where, exprs.Expr):
|
|
860
867
|
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
@@ -893,7 +900,6 @@ class TableVersion:
|
|
|
893
900
|
"""
|
|
894
901
|
# if we do lookups of rowids, we must have one for each row in the batch
|
|
895
902
|
assert len(rowids) == 0 or len(rowids) == len(batch)
|
|
896
|
-
cols_with_excs: set[str] = set()
|
|
897
903
|
|
|
898
904
|
from pixeltable.plan import Planner
|
|
899
905
|
|
|
@@ -915,7 +921,7 @@ class TableVersion:
|
|
|
915
921
|
return result
|
|
916
922
|
|
|
917
923
|
def _validate_update_spec(
|
|
918
|
-
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
|
|
924
|
+
self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool, allow_media: bool
|
|
919
925
|
) -> dict[Column, exprs.Expr]:
|
|
920
926
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
921
927
|
for col_name, val in value_spec.items():
|
|
@@ -935,27 +941,31 @@ class TableVersion:
|
|
|
935
941
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
936
942
|
if col.is_pk and not allow_pk:
|
|
937
943
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
944
|
+
if col.col_type.is_media_type() and not allow_media:
|
|
945
|
+
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
938
946
|
|
|
939
947
|
# make sure that the value is compatible with the column type
|
|
940
948
|
value_expr: exprs.Expr
|
|
941
949
|
try:
|
|
942
950
|
# check if this is a literal
|
|
943
951
|
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
944
|
-
except (TypeError, jsonschema.exceptions.ValidationError):
|
|
952
|
+
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
945
953
|
if not allow_exprs:
|
|
946
954
|
raise excs.Error(
|
|
947
955
|
f'Column {col_name}: value {val!r} is not a valid literal for this column '
|
|
948
956
|
f'(expected {col.col_type})'
|
|
949
|
-
)
|
|
957
|
+
) from exc
|
|
950
958
|
# it's not a literal, let's try to create an expr from it
|
|
951
959
|
value_expr = exprs.Expr.from_object(val)
|
|
952
960
|
if value_expr is None:
|
|
953
|
-
raise excs.Error(
|
|
961
|
+
raise excs.Error(
|
|
962
|
+
f'Column {col_name}: value {val!r} is not a recognized literal or expression'
|
|
963
|
+
) from exc
|
|
954
964
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
955
965
|
raise excs.Error(
|
|
956
966
|
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
957
967
|
f'{col_name} ({col.col_type})'
|
|
958
|
-
)
|
|
968
|
+
) from exc
|
|
959
969
|
update_targets[col] = value_expr
|
|
960
970
|
|
|
961
971
|
return update_targets
|
|
@@ -984,7 +994,7 @@ class TableVersion:
|
|
|
984
994
|
self._update_md(timestamp)
|
|
985
995
|
|
|
986
996
|
if cascade:
|
|
987
|
-
base_versions = [None if plan is None else self.version
|
|
997
|
+
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
988
998
|
# propagate to views
|
|
989
999
|
for view in self.mutable_views:
|
|
990
1000
|
recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
|
|
@@ -1044,11 +1054,9 @@ class TableVersion:
|
|
|
1044
1054
|
# we're creating a new version
|
|
1045
1055
|
self.version += 1
|
|
1046
1056
|
self._update_md(timestamp)
|
|
1047
|
-
else:
|
|
1048
|
-
pass
|
|
1049
1057
|
for view in self.mutable_views:
|
|
1050
1058
|
num_rows += view.get().propagate_delete(
|
|
1051
|
-
where=None, base_versions=[self.version
|
|
1059
|
+
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1052
1060
|
)
|
|
1053
1061
|
return num_rows
|
|
1054
1062
|
|
|
@@ -1228,9 +1236,7 @@ class TableVersion:
|
|
|
1228
1236
|
|
|
1229
1237
|
def is_system_column(self, col: Column) -> bool:
|
|
1230
1238
|
"""Return True if column was created by Pixeltable"""
|
|
1231
|
-
|
|
1232
|
-
return True
|
|
1233
|
-
return False
|
|
1239
|
+
return col.name == _POS_COLUMN_NAME and self.is_component_view
|
|
1234
1240
|
|
|
1235
1241
|
def user_columns(self) -> list[Column]:
|
|
1236
1242
|
"""Return all non-system columns"""
|
|
@@ -1240,6 +1246,11 @@ class TableVersion:
|
|
|
1240
1246
|
"""Return all non-system columns"""
|
|
1241
1247
|
return [c for c in self.cols if c.is_pk]
|
|
1242
1248
|
|
|
1249
|
+
@property
|
|
1250
|
+
def primary_key(self) -> list[str]:
|
|
1251
|
+
"""Return the names of the primary key columns"""
|
|
1252
|
+
return [c.name for c in self.cols if c.is_pk]
|
|
1253
|
+
|
|
1243
1254
|
def get_required_col_names(self) -> list[str]:
|
|
1244
1255
|
"""Return the names of all columns for which values must be specified in insert()"""
|
|
1245
1256
|
assert not self.is_view
|
|
@@ -1253,7 +1264,7 @@ class TableVersion:
|
|
|
1253
1264
|
|
|
1254
1265
|
def _record_refd_columns(self, col: Column) -> None:
|
|
1255
1266
|
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1256
|
-
|
|
1267
|
+
from pixeltable import exprs
|
|
1257
1268
|
|
|
1258
1269
|
if col.value_expr_dict is not None:
|
|
1259
1270
|
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
@@ -1306,8 +1317,9 @@ class TableVersion:
|
|
|
1306
1317
|
|
|
1307
1318
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1308
1319
|
return schema.TableMd(
|
|
1320
|
+
tbl_id=str(self.id),
|
|
1309
1321
|
name=self.name,
|
|
1310
|
-
user=
|
|
1322
|
+
user=self.user,
|
|
1311
1323
|
current_version=self.version,
|
|
1312
1324
|
current_schema_version=self.schema_version,
|
|
1313
1325
|
next_col_id=self.next_col_id,
|
|
@@ -1322,7 +1334,11 @@ class TableVersion:
|
|
|
1322
1334
|
|
|
1323
1335
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1324
1336
|
return schema.TableVersionMd(
|
|
1325
|
-
|
|
1337
|
+
tbl_id=str(self.id),
|
|
1338
|
+
created_at=timestamp,
|
|
1339
|
+
version=self.version,
|
|
1340
|
+
schema_version=self.schema_version,
|
|
1341
|
+
additional_md={},
|
|
1326
1342
|
)
|
|
1327
1343
|
|
|
1328
1344
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
@@ -1335,6 +1351,7 @@ class TableVersion:
|
|
|
1335
1351
|
)
|
|
1336
1352
|
# preceding_schema_version to be set by the caller
|
|
1337
1353
|
return schema.TableSchemaVersionMd(
|
|
1354
|
+
tbl_id=str(self.id),
|
|
1338
1355
|
schema_version=self.schema_version,
|
|
1339
1356
|
preceding_schema_version=preceding_schema_version,
|
|
1340
1357
|
columns=column_md,
|
|
@@ -1349,7 +1366,7 @@ class TableVersion:
|
|
|
1349
1366
|
|
|
1350
1367
|
@classmethod
|
|
1351
1368
|
def from_dict(cls, d: dict) -> TableVersion:
|
|
1352
|
-
|
|
1369
|
+
from pixeltable import catalog
|
|
1353
1370
|
|
|
1354
1371
|
id = UUID(d['id'])
|
|
1355
1372
|
effective_version = d['effective_version']
|
|
@@ -31,6 +31,9 @@ class TableVersionHandle:
|
|
|
31
31
|
return False
|
|
32
32
|
return self.id == other.id and self.effective_version == other.effective_version
|
|
33
33
|
|
|
34
|
+
def __hash__(self) -> int:
|
|
35
|
+
return hash((self.id, self.effective_version))
|
|
36
|
+
|
|
34
37
|
@classmethod
|
|
35
38
|
def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
|
|
36
39
|
return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
|
|
@@ -82,7 +82,7 @@ class TableVersionPath:
|
|
|
82
82
|
"""Return all tbl versions"""
|
|
83
83
|
if self.base is None:
|
|
84
84
|
return [self.tbl_version]
|
|
85
|
-
return [self.tbl_version
|
|
85
|
+
return [self.tbl_version, *self.base.get_tbl_versions()]
|
|
86
86
|
|
|
87
87
|
def get_bases(self) -> list[TableVersionHandle]:
|
|
88
88
|
"""Return all tbl versions"""
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,13 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.metadata.schema as md_schema
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
|
-
from pixeltable import
|
|
11
|
+
from pixeltable import exprs, func
|
|
12
12
|
from pixeltable.env import Env
|
|
13
13
|
from pixeltable.iterators import ComponentIterator
|
|
14
14
|
|
|
@@ -98,7 +98,8 @@ class View(Table):
|
|
|
98
98
|
# make sure that the value can be computed in the context of the base
|
|
99
99
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
100
100
|
raise excs.Error(
|
|
101
|
-
f'Column {col.name}: value expression cannot be computed in the context of the
|
|
101
|
+
f'Column {col.name}: value expression cannot be computed in the context of the '
|
|
102
|
+
f'base {base.tbl_name()}'
|
|
102
103
|
)
|
|
103
104
|
|
|
104
105
|
if iterator_cls is not None:
|
|
@@ -111,8 +112,8 @@ class View(Table):
|
|
|
111
112
|
bound_args: dict[str, Any]
|
|
112
113
|
try:
|
|
113
114
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
114
|
-
except TypeError as
|
|
115
|
-
raise excs.Error(f'Invalid iterator arguments: {
|
|
115
|
+
except TypeError as exc:
|
|
116
|
+
raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
|
|
116
117
|
# we ignore 'self'
|
|
117
118
|
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
118
119
|
del bound_args[first_param_name]
|
|
@@ -203,8 +204,8 @@ class View(Table):
|
|
|
203
204
|
|
|
204
205
|
from pixeltable.plan import Planner
|
|
205
206
|
|
|
206
|
-
plan,
|
|
207
|
-
num_rows, num_excs,
|
|
207
|
+
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
208
|
+
num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
208
209
|
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
209
210
|
|
|
210
211
|
session.commit()
|
|
@@ -251,13 +252,20 @@ class View(Table):
|
|
|
251
252
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
252
253
|
return md
|
|
253
254
|
|
|
255
|
+
if TYPE_CHECKING:
|
|
256
|
+
import datasets # type: ignore[import-untyped]
|
|
257
|
+
|
|
258
|
+
from pixeltable.globals import RowData, TableDataSource
|
|
259
|
+
|
|
254
260
|
def insert(
|
|
255
261
|
self,
|
|
256
|
-
|
|
262
|
+
source: Optional[TableDataSource] = None,
|
|
257
263
|
/,
|
|
258
264
|
*,
|
|
259
|
-
|
|
265
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
266
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
260
267
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
268
|
+
print_stats: bool = False,
|
|
261
269
|
**kwargs: Any,
|
|
262
270
|
) -> UpdateStatus:
|
|
263
271
|
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
|
pixeltable/dataframe.py
CHANGED
|
@@ -88,12 +88,12 @@ class DataFrameResultSet:
|
|
|
88
88
|
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
89
89
|
return (self._row_to_dict(i) for i in range(len(self)))
|
|
90
90
|
|
|
91
|
-
def __eq__(self, other):
|
|
91
|
+
def __eq__(self, other: object) -> bool:
|
|
92
92
|
if not isinstance(other, DataFrameResultSet):
|
|
93
93
|
return False
|
|
94
94
|
return self.to_pandas().equals(other.to_pandas())
|
|
95
95
|
|
|
96
|
-
def __hash__(self):
|
|
96
|
+
def __hash__(self) -> int:
|
|
97
97
|
return hash(self.to_pandas())
|
|
98
98
|
|
|
99
99
|
|
|
@@ -571,7 +571,7 @@ class DataFrame:
|
|
|
571
571
|
expr = exprs.Expr.from_object(raw_expr)
|
|
572
572
|
if expr is None:
|
|
573
573
|
raise excs.Error(f'Invalid expression: {raw_expr}')
|
|
574
|
-
if expr.col_type.is_invalid_type():
|
|
574
|
+
if expr.col_type.is_invalid_type() and not (isinstance(expr, exprs.Literal) and expr.val is None):
|
|
575
575
|
raise excs.Error(f'Invalid type: {raw_expr}')
|
|
576
576
|
if not expr.is_bound_by(self._from_clause.tbls):
|
|
577
577
|
raise excs.Error(
|
|
@@ -624,6 +624,8 @@ class DataFrame:
|
|
|
624
624
|
|
|
625
625
|
>>> df = person.where(t.age > 30)
|
|
626
626
|
"""
|
|
627
|
+
if self.where_clause is not None:
|
|
628
|
+
raise excs.Error('Where clause already specified')
|
|
627
629
|
if not isinstance(pred, exprs.Expr):
|
|
628
630
|
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
629
631
|
if not pred.col_type.is_bool_type():
|