pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
-
from pixeltable import exceptions as excs, type_system as ts
|
|
7
|
+
from pixeltable import env, exceptions as excs, type_system as ts
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr
|
|
@@ -58,29 +58,36 @@ class ArithmeticExpr(Expr):
|
|
|
58
58
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
59
59
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
60
60
|
|
|
61
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
61
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
62
62
|
assert self.col_type.is_int_type() or self.col_type.is_float_type() or self.col_type.is_json_type()
|
|
63
63
|
left = sql_elements.get(self._op1)
|
|
64
64
|
right = sql_elements.get(self._op2)
|
|
65
65
|
if left is None or right is None:
|
|
66
66
|
return None
|
|
67
|
-
if self.operator
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
if self.operator in (ArithmeticOperator.ADD, ArithmeticOperator.SUB, ArithmeticOperator.MUL):
|
|
68
|
+
if env.Env.get().is_using_cockroachdb and self._op1.col_type != self._op2.col_type:
|
|
69
|
+
if self._op1.col_type != self.col_type:
|
|
70
|
+
left = sql.cast(left, self.col_type.to_sa_type())
|
|
71
|
+
if self._op2.col_type != self.col_type:
|
|
72
|
+
right = sql.cast(right, self.col_type.to_sa_type())
|
|
73
|
+
if self.operator == ArithmeticOperator.ADD:
|
|
74
|
+
return left + right
|
|
75
|
+
if self.operator == ArithmeticOperator.SUB:
|
|
76
|
+
return left - right
|
|
77
|
+
if self.operator == ArithmeticOperator.MUL:
|
|
78
|
+
return left * right
|
|
73
79
|
if self.operator == ArithmeticOperator.DIV:
|
|
74
80
|
assert self.col_type.is_float_type()
|
|
75
|
-
# Avoid
|
|
81
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
76
82
|
# TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return sql.
|
|
83
|
+
# These casts cause the computation to take place in float units, rather than DECIMAL.
|
|
84
|
+
nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
85
|
+
return sql.cast(left, self.col_type.to_sa_type()) / nullif
|
|
80
86
|
if self.operator == ArithmeticOperator.MOD:
|
|
81
87
|
if self.col_type.is_int_type():
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
89
|
+
nullif1 = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
90
|
+
return left % nullif1
|
|
84
91
|
if self.col_type.is_float_type():
|
|
85
92
|
# Postgres does not support modulus for floats
|
|
86
93
|
return None
|
|
@@ -90,11 +97,9 @@ class ArithmeticExpr(Expr):
|
|
|
90
97
|
# We need the behavior to be consistent, so that expressions will evaluate the same way
|
|
91
98
|
# whether or not their operands can be translated to SQL. These SQL clauses should
|
|
92
99
|
# mimic the behavior of Python's // operator.
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if self.col_type.is_float_type():
|
|
97
|
-
return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
|
|
100
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
101
|
+
nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
102
|
+
return sql.func.floor(sql.cast(left, self.col_type.to_sa_type()) / nullif)
|
|
98
103
|
raise AssertionError()
|
|
99
104
|
|
|
100
105
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -113,7 +118,7 @@ class ArithmeticExpr(Expr):
|
|
|
113
118
|
|
|
114
119
|
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
115
120
|
|
|
116
|
-
def eval_nullable(self, op1_val:
|
|
121
|
+
def eval_nullable(self, op1_val: float | None, op2_val: float | None) -> float | None:
|
|
117
122
|
"""
|
|
118
123
|
Return the result of evaluating the expression on two nullable int/float operands,
|
|
119
124
|
None is interpreted as SQL NULL
|
|
@@ -139,7 +144,7 @@ class ArithmeticExpr(Expr):
|
|
|
139
144
|
elif self.operator == ArithmeticOperator.FLOORDIV:
|
|
140
145
|
return op1_val // op2_val
|
|
141
146
|
|
|
142
|
-
def as_literal(self) ->
|
|
147
|
+
def as_literal(self) -> Literal | None:
|
|
143
148
|
op1_lit = self._op1.as_literal()
|
|
144
149
|
if op1_lit is None:
|
|
145
150
|
return None
|
pixeltable/exprs/array_slice.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@ class ArraySlice(Expr):
|
|
|
16
16
|
Slice operation on an array, eg, t.array_col[:, 1:2].
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(self, arr: Expr, index: tuple[
|
|
19
|
+
def __init__(self, arr: Expr, index: tuple[int | slice, ...]):
|
|
20
20
|
assert arr.col_type.is_array_type()
|
|
21
21
|
# determine result type
|
|
22
22
|
super().__init__(arr.col_type)
|
|
@@ -43,7 +43,7 @@ class ArraySlice(Expr):
|
|
|
43
43
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
44
44
|
return [*super()._id_attrs(), ('index', self.index)]
|
|
45
45
|
|
|
46
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
46
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
47
47
|
return None
|
|
48
48
|
|
|
49
49
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -26,6 +26,7 @@ class ColumnPropertyRef(Expr):
|
|
|
26
26
|
ERRORMSG = 1
|
|
27
27
|
FILEURL = 2
|
|
28
28
|
LOCALPATH = 3
|
|
29
|
+
CELLMD = 4 # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
29
30
|
|
|
30
31
|
def __init__(self, col_ref: ColumnRef, prop: Property):
|
|
31
32
|
super().__init__(ts.StringType(nullable=True))
|
|
@@ -33,7 +34,7 @@ class ColumnPropertyRef(Expr):
|
|
|
33
34
|
self.prop = prop
|
|
34
35
|
self.id = self._create_id()
|
|
35
36
|
|
|
36
|
-
def default_column_name(self) ->
|
|
37
|
+
def default_column_name(self) -> str | None:
|
|
37
38
|
return str(self).replace('.', '_')
|
|
38
39
|
|
|
39
40
|
def _equals(self, other: ColumnPropertyRef) -> bool:
|
|
@@ -43,66 +44,69 @@ class ColumnPropertyRef(Expr):
|
|
|
43
44
|
return [*super()._id_attrs(), ('prop', self.prop.value)]
|
|
44
45
|
|
|
45
46
|
@property
|
|
46
|
-
def
|
|
47
|
+
def col_ref(self) -> ColumnRef:
|
|
47
48
|
col_ref = self.components[0]
|
|
48
49
|
assert isinstance(col_ref, ColumnRef)
|
|
49
50
|
return col_ref
|
|
50
51
|
|
|
51
52
|
def __repr__(self) -> str:
|
|
52
|
-
return f'{self.
|
|
53
|
+
return f'{self.col_ref}.{self.prop.name.lower()}'
|
|
53
54
|
|
|
54
|
-
def
|
|
55
|
-
return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
|
|
55
|
+
def is_cellmd_prop(self) -> bool:
|
|
56
|
+
return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
|
|
56
57
|
|
|
57
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
58
|
-
if not self.
|
|
58
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
59
|
+
if not self.col_ref.col_handle.get().is_stored:
|
|
59
60
|
return None
|
|
60
|
-
|
|
61
|
-
# we need to reestablish that we have the correct Column instance, there could have been a metadata
|
|
62
|
-
# reload since init()
|
|
63
|
-
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
64
|
-
# perform runtime checks and update state
|
|
65
|
-
tv = self._col_ref.tbl_version.get()
|
|
66
|
-
assert tv.is_validated
|
|
67
|
-
col = tv.cols_by_id[self._col_ref.col_id]
|
|
68
|
-
# TODO: check for column being dropped
|
|
61
|
+
col = self.col_ref.col_handle.get()
|
|
69
62
|
|
|
70
63
|
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
71
64
|
if (
|
|
72
65
|
col.col_type.is_media_type()
|
|
73
66
|
and col.media_validation == catalog.MediaValidation.ON_READ
|
|
74
|
-
and self.
|
|
67
|
+
and self.is_cellmd_prop()
|
|
75
68
|
):
|
|
76
69
|
return None
|
|
77
70
|
|
|
78
71
|
if self.prop == self.Property.ERRORTYPE:
|
|
79
|
-
|
|
80
|
-
return col.sa_errortype_col
|
|
72
|
+
return col.sa_cellmd_col.op('->>')('errortype')
|
|
81
73
|
if self.prop == self.Property.ERRORMSG:
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
return col.sa_cellmd_col.op('->>')('errormsg')
|
|
75
|
+
if self.prop == self.Property.CELLMD:
|
|
76
|
+
assert col.sa_cellmd_col is not None
|
|
77
|
+
return col.sa_cellmd_col
|
|
84
78
|
if self.prop == self.Property.FILEURL:
|
|
85
79
|
# the file url is stored as the column value
|
|
86
|
-
return sql_elements.get(self.
|
|
80
|
+
return sql_elements.get(self.col_ref)
|
|
87
81
|
return None
|
|
88
82
|
|
|
83
|
+
@classmethod
|
|
84
|
+
def create_cellmd_exc(cls, exc: Exception) -> dict[str, str]:
|
|
85
|
+
"""Create a cellmd value from an exception."""
|
|
86
|
+
return {'errortype': type(exc).__name__, 'errormsg': str(exc)}
|
|
87
|
+
|
|
89
88
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
90
89
|
if self.prop == self.Property.FILEURL:
|
|
91
|
-
assert data_row.has_val[self.
|
|
92
|
-
data_row[self.slot_idx] = data_row.file_urls[self.
|
|
90
|
+
assert data_row.has_val[self.col_ref.slot_idx]
|
|
91
|
+
data_row[self.slot_idx] = data_row.file_urls[self.col_ref.slot_idx]
|
|
93
92
|
return
|
|
94
93
|
elif self.prop == self.Property.LOCALPATH:
|
|
95
|
-
assert data_row.has_val[self.
|
|
96
|
-
data_row[self.slot_idx] = data_row.file_paths[self.
|
|
94
|
+
assert data_row.has_val[self.col_ref.slot_idx]
|
|
95
|
+
data_row[self.slot_idx] = data_row.file_paths[self.col_ref.slot_idx]
|
|
97
96
|
return
|
|
98
|
-
elif self.
|
|
99
|
-
exc = data_row.get_exc(self.
|
|
97
|
+
elif self.is_cellmd_prop():
|
|
98
|
+
exc = data_row.get_exc(self.col_ref.slot_idx)
|
|
100
99
|
if exc is None:
|
|
101
100
|
data_row[self.slot_idx] = None
|
|
102
101
|
elif self.prop == self.Property.ERRORTYPE:
|
|
103
102
|
data_row[self.slot_idx] = type(exc).__name__
|
|
104
|
-
|
|
103
|
+
elif self.prop == self.Property.ERRORMSG:
|
|
105
104
|
data_row[self.slot_idx] = str(exc)
|
|
105
|
+
elif self.prop == self.Property.CELLMD:
|
|
106
|
+
data_row[self.slot_idx] = self.create_cellmd_exc(exc)
|
|
107
|
+
else:
|
|
108
|
+
raise AssertionError(f'Unknown property {self.prop}')
|
|
109
|
+
return
|
|
106
110
|
else:
|
|
107
111
|
raise AssertionError()
|
|
108
112
|
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Any, Optional, Sequence
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Sequence, cast
|
|
5
4
|
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
import sqlalchemy as sql
|
|
8
7
|
|
|
9
|
-
import pixeltable as
|
|
10
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.iterators as iters
|
|
11
11
|
|
|
12
12
|
from ..utils.description_helper import DescriptionHelper
|
|
13
|
+
from ..utils.filecache import FileCache
|
|
13
14
|
from .data_row import DataRow
|
|
14
15
|
from .expr import Expr
|
|
15
16
|
from .row_builder import RowBuilder
|
|
16
17
|
from .sql_element_cache import SqlElementCache
|
|
17
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from pixeltable.dataframe import DataFrame, DataFrameResultSet
|
|
21
|
+
|
|
18
22
|
|
|
19
23
|
class ColumnRef(Expr):
|
|
20
24
|
"""A reference to a table column
|
|
@@ -41,42 +45,36 @@ class ColumnRef(Expr):
|
|
|
41
45
|
insert them into the EvalCtxs as needed
|
|
42
46
|
"""
|
|
43
47
|
|
|
44
|
-
col: catalog.Column
|
|
45
|
-
|
|
48
|
+
col: catalog.Column # TODO: merge with col_handle
|
|
49
|
+
col_handle: catalog.ColumnHandle
|
|
50
|
+
reference_tbl: catalog.TableVersionPath | None
|
|
46
51
|
is_unstored_iter_col: bool
|
|
47
|
-
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
48
|
-
base_rowid_len: int
|
|
49
|
-
base_rowid: Sequence[Optional[Any]]
|
|
50
|
-
iterator: Optional[iters.ComponentIterator]
|
|
51
|
-
pos_idx: Optional[int]
|
|
52
|
-
id: int
|
|
53
52
|
perform_validation: bool # if True, performs media validation
|
|
53
|
+
iter_arg_ctx: RowBuilder.EvalCtx | None
|
|
54
|
+
base_rowid_len: int # number of rowid columns in the base table
|
|
54
55
|
|
|
55
|
-
#
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
# execution state
|
|
57
|
+
base_rowid: Sequence[Any | None]
|
|
58
|
+
iterator: iters.ComponentIterator | None
|
|
59
|
+
pos_idx: int
|
|
58
60
|
|
|
59
61
|
def __init__(
|
|
60
62
|
self,
|
|
61
63
|
col: catalog.Column,
|
|
62
|
-
reference_tbl:
|
|
63
|
-
perform_validation:
|
|
64
|
+
reference_tbl: catalog.TableVersionPath | None = None,
|
|
65
|
+
perform_validation: bool | None = None,
|
|
64
66
|
):
|
|
65
67
|
super().__init__(col.col_type)
|
|
66
|
-
assert col.tbl is not None
|
|
67
68
|
self.col = col
|
|
68
69
|
self.reference_tbl = reference_tbl
|
|
69
|
-
self.
|
|
70
|
-
self.col_id = col.id
|
|
70
|
+
self.col_handle = col.handle
|
|
71
71
|
|
|
72
|
-
self.is_unstored_iter_col = col.
|
|
72
|
+
self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
|
|
73
73
|
self.iter_arg_ctx = None
|
|
74
|
-
|
|
75
|
-
self.
|
|
76
|
-
self.base_rowid = [None] * self.base_rowid_len
|
|
74
|
+
self.base_rowid_len = 0
|
|
75
|
+
self.base_rowid = []
|
|
77
76
|
self.iterator = None
|
|
78
|
-
|
|
79
|
-
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
77
|
+
self.pos_idx = 0
|
|
80
78
|
|
|
81
79
|
self.perform_validation = False
|
|
82
80
|
if col.col_type.is_media_type():
|
|
@@ -102,14 +100,14 @@ class ColumnRef(Expr):
|
|
|
102
100
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
103
101
|
return [
|
|
104
102
|
*super()._id_attrs(),
|
|
105
|
-
('tbl_id', self.col.
|
|
103
|
+
('tbl_id', self.col.tbl_handle.id),
|
|
106
104
|
('col_id', self.col.id),
|
|
107
105
|
('perform_validation', self.perform_validation),
|
|
108
106
|
]
|
|
109
107
|
|
|
110
108
|
# override
|
|
111
109
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
112
|
-
target = tbl_versions[self.col.
|
|
110
|
+
target = tbl_versions[self.col.tbl_handle.id]
|
|
113
111
|
assert self.col.id in target.cols_by_id
|
|
114
112
|
col = target.cols_by_id[self.col.id]
|
|
115
113
|
return ColumnRef(col, self.reference_tbl)
|
|
@@ -118,12 +116,16 @@ class ColumnRef(Expr):
|
|
|
118
116
|
from .column_property_ref import ColumnPropertyRef
|
|
119
117
|
|
|
120
118
|
# resolve column properties
|
|
119
|
+
if name == ColumnPropertyRef.Property.CELLMD.name.lower():
|
|
120
|
+
# This is not user accessible, but used internally to store cell metadata
|
|
121
|
+
return super().__getattr__(name)
|
|
122
|
+
|
|
121
123
|
if (
|
|
122
124
|
name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
|
|
123
125
|
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
|
|
124
126
|
):
|
|
125
|
-
|
|
126
|
-
if not
|
|
127
|
+
is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
|
|
128
|
+
if not is_valid:
|
|
127
129
|
raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
|
|
128
130
|
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
129
131
|
if (
|
|
@@ -143,76 +145,66 @@ class ColumnRef(Expr):
|
|
|
143
145
|
|
|
144
146
|
return super().__getattr__(name)
|
|
145
147
|
|
|
146
|
-
def
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
|
|
162
|
-
if len(embedding_idx_info) > 1:
|
|
163
|
-
if idx_name is None:
|
|
164
|
-
raise excs.Error(
|
|
165
|
-
f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
166
|
-
f'`{method_name}(..., idx=<index_name>)`'
|
|
167
|
-
)
|
|
168
|
-
idx_info = {idx_name: embedding_idx_info[idx_name]}
|
|
169
|
-
else:
|
|
170
|
-
idx_info = embedding_idx_info
|
|
171
|
-
return idx_info
|
|
172
|
-
|
|
173
|
-
def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
|
|
148
|
+
def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
|
|
149
|
+
cat = catalog.Catalog.get()
|
|
150
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
151
|
+
with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
|
|
152
|
+
tbl_version = self.col_handle.tbl_version.get()
|
|
153
|
+
if tbl_version.id != self.reference_tbl.tbl_id:
|
|
154
|
+
raise excs.Error('Cannot recompute column of a base.')
|
|
155
|
+
if tbl_version.is_snapshot:
|
|
156
|
+
raise excs.Error('Cannot recompute column of a snapshot.')
|
|
157
|
+
col_name = self.col_handle.get().name
|
|
158
|
+
status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
|
|
159
|
+
FileCache.get().emit_eviction_warnings()
|
|
160
|
+
return status
|
|
161
|
+
|
|
162
|
+
def similarity(self, item: Any, *, idx: str | None = None) -> Expr:
|
|
174
163
|
from .similarity_expr import SimilarityExpr
|
|
175
164
|
|
|
176
165
|
return SimilarityExpr(self, item, idx_name=idx)
|
|
177
166
|
|
|
178
|
-
def embedding(self, *, idx:
|
|
179
|
-
|
|
180
|
-
assert len(idx_info) == 1
|
|
181
|
-
col = copy.copy(next(iter(idx_info.values())).val_col)
|
|
182
|
-
col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
|
|
183
|
-
# col.create_sa_cols()
|
|
184
|
-
return ColumnRef(col)
|
|
167
|
+
def embedding(self, *, idx: str | None = None) -> ColumnRef:
|
|
168
|
+
from pixeltable.index import EmbeddingIndex
|
|
185
169
|
|
|
186
|
-
|
|
170
|
+
idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
|
|
171
|
+
return ColumnRef(idx_info.val_col)
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def tbl(self) -> catalog.TableVersionHandle:
|
|
175
|
+
return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
|
|
176
|
+
|
|
177
|
+
def default_column_name(self) -> str | None:
|
|
187
178
|
return self.col.name if self.col is not None else None
|
|
188
179
|
|
|
189
180
|
def _equals(self, other: ColumnRef) -> bool:
|
|
190
181
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
191
182
|
|
|
192
|
-
def _df(self) -> '
|
|
193
|
-
|
|
183
|
+
def _df(self) -> 'DataFrame':
|
|
184
|
+
import pixeltable.plan as plan
|
|
185
|
+
from pixeltable.dataframe import DataFrame
|
|
194
186
|
|
|
195
187
|
if self.reference_tbl is None:
|
|
196
188
|
# No reference table; use the current version of the table to which the column belongs
|
|
197
|
-
tbl = catalog.Catalog.get().get_table_by_id(self.col.
|
|
189
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
198
190
|
return tbl.select(self)
|
|
199
191
|
else:
|
|
200
192
|
# Explicit reference table; construct a DataFrame directly from it
|
|
201
|
-
return
|
|
193
|
+
return DataFrame(plan.FromClause([self.reference_tbl])).select(self)
|
|
202
194
|
|
|
203
|
-
def show(self, *args: Any, **kwargs: Any) -> '
|
|
195
|
+
def show(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
204
196
|
return self._df().show(*args, **kwargs)
|
|
205
197
|
|
|
206
|
-
def head(self, *args: Any, **kwargs: Any) -> '
|
|
198
|
+
def head(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
207
199
|
return self._df().head(*args, **kwargs)
|
|
208
200
|
|
|
209
|
-
def tail(self, *args: Any, **kwargs: Any) -> '
|
|
201
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
210
202
|
return self._df().tail(*args, **kwargs)
|
|
211
203
|
|
|
212
204
|
def count(self) -> int:
|
|
213
205
|
return self._df().count()
|
|
214
206
|
|
|
215
|
-
def distinct(self) -> '
|
|
207
|
+
def distinct(self) -> 'DataFrame':
|
|
216
208
|
"""Return distinct values in this column."""
|
|
217
209
|
return self._df().distinct()
|
|
218
210
|
|
|
@@ -229,7 +221,8 @@ class ColumnRef(Expr):
|
|
|
229
221
|
return self._descriptors().to_html()
|
|
230
222
|
|
|
231
223
|
def _descriptors(self) -> DescriptionHelper:
|
|
232
|
-
|
|
224
|
+
with catalog.Catalog.get().begin_xact():
|
|
225
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
233
226
|
helper = DescriptionHelper()
|
|
234
227
|
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
235
228
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
@@ -238,23 +231,21 @@ class ColumnRef(Expr):
|
|
|
238
231
|
helper.append(idxs)
|
|
239
232
|
return helper
|
|
240
233
|
|
|
241
|
-
def
|
|
242
|
-
|
|
234
|
+
def prepare(self) -> None:
|
|
235
|
+
from pixeltable import store
|
|
236
|
+
|
|
237
|
+
if not self.is_unstored_iter_col:
|
|
238
|
+
return
|
|
239
|
+
col = self.col_handle.get()
|
|
240
|
+
self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
|
|
241
|
+
self.base_rowid = [None] * self.base_rowid_len
|
|
242
|
+
assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
|
|
243
|
+
self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
|
|
244
|
+
|
|
245
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
243
246
|
if self.perform_validation:
|
|
244
247
|
return None
|
|
245
|
-
|
|
246
|
-
# reload since init()
|
|
247
|
-
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
248
|
-
# perform runtime checks and update state
|
|
249
|
-
tv = self.tbl_version.get()
|
|
250
|
-
assert tv.is_validated
|
|
251
|
-
self.col = tv.cols_by_id[self.col_id]
|
|
252
|
-
assert self.col.tbl is tv
|
|
253
|
-
# TODO: check for column being dropped
|
|
254
|
-
# print(
|
|
255
|
-
# f'ColumnRef.sql_expr: tbl={tv.id}:{tv.effective_version} sa_tbl={id(self.col.tbl.store_tbl.sa_tbl):x} '
|
|
256
|
-
# f'tv={id(tv):x}'
|
|
257
|
-
# )
|
|
248
|
+
self.col = self.col_handle.get()
|
|
258
249
|
return self.col.sa_col
|
|
259
250
|
|
|
260
251
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -296,29 +287,34 @@ class ColumnRef(Expr):
|
|
|
296
287
|
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
297
288
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
298
289
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
299
|
-
self.iterator = self.col.
|
|
290
|
+
self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
|
|
300
291
|
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
301
292
|
self.iterator.set_pos(data_row.pk[self.pos_idx])
|
|
302
293
|
res = next(self.iterator)
|
|
303
294
|
data_row[self.slot_idx] = res[self.col.name]
|
|
304
295
|
|
|
305
296
|
def _as_dict(self) -> dict:
|
|
306
|
-
|
|
307
|
-
version = tbl.version if tbl.is_snapshot else None
|
|
297
|
+
tbl_handle = self.col.tbl_handle
|
|
308
298
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
309
299
|
# non-validating component ColumnRef
|
|
310
300
|
return {
|
|
311
|
-
'tbl_id': str(
|
|
312
|
-
'tbl_version':
|
|
301
|
+
'tbl_id': str(tbl_handle.id),
|
|
302
|
+
'tbl_version': tbl_handle.effective_version,
|
|
313
303
|
'col_id': self.col.id,
|
|
314
304
|
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
315
305
|
'perform_validation': self.perform_validation,
|
|
316
306
|
}
|
|
317
307
|
|
|
308
|
+
@classmethod
|
|
309
|
+
def get_column_id(cls, d: dict) -> catalog.QColumnId:
|
|
310
|
+
tbl_id, col_id = UUID(d['tbl_id']), d['col_id']
|
|
311
|
+
return catalog.QColumnId(tbl_id, col_id)
|
|
312
|
+
|
|
318
313
|
@classmethod
|
|
319
314
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
320
315
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
321
|
-
|
|
316
|
+
# validate_initialized=False: this gets called as part of TableVersion.init()
|
|
317
|
+
tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version, validate_initialized=False)
|
|
322
318
|
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
323
319
|
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
324
320
|
return col
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -69,8 +69,8 @@ class Comparison(Expr):
|
|
|
69
69
|
def _op2(self) -> Expr:
|
|
70
70
|
return self.components[1]
|
|
71
71
|
|
|
72
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
73
|
-
|
|
72
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
73
|
+
import pixeltable.index as index
|
|
74
74
|
|
|
75
75
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
76
76
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
@@ -81,9 +81,9 @@ class Comparison(Expr):
|
|
|
81
81
|
if self.is_search_arg_comparison:
|
|
82
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
83
83
|
# (indices don't apply to snapshots)
|
|
84
|
-
tbl = self._op1.col.
|
|
84
|
+
tbl = self._op1.col.get_tbl()
|
|
85
85
|
idx_info = [
|
|
86
|
-
info for info in self._op1.col.
|
|
86
|
+
info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
|
|
87
87
|
]
|
|
88
88
|
if len(idx_info) > 0 and not tbl.is_snapshot:
|
|
89
89
|
# there shouldn't be multiple B-tree indices on a column
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
|
|
|
36
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def make_conjunction(cls, operands: list[Expr]) ->
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
|
|
40
|
+
operands = [e for e in operands if e is not None]
|
|
40
41
|
if len(operands) == 0:
|
|
41
42
|
return None
|
|
42
43
|
if len(operands) == 1:
|
|
@@ -60,14 +61,14 @@ class CompoundPredicate(Expr):
|
|
|
60
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
62
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
63
|
|
|
63
|
-
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr],
|
|
64
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
|
|
64
65
|
if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
|
|
65
66
|
return super().split_conjuncts(condition)
|
|
66
67
|
matches = [op for op in self.components if condition(op)]
|
|
67
68
|
non_matches = [op for op in self.components if not condition(op)]
|
|
68
69
|
return (matches, self.make_conjunction(non_matches))
|
|
69
70
|
|
|
70
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
71
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
71
72
|
sql_exprs = [sql_elements.get(op) for op in self.components]
|
|
72
73
|
if any(e is None for e in sql_exprs):
|
|
73
74
|
return None
|