pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import base64
|
|
3
4
|
import datetime
|
|
4
|
-
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
import sqlalchemy as sql
|
|
@@ -16,7 +18,9 @@ from .sql_element_cache import SqlElementCache
|
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
class Literal(Expr):
|
|
19
|
-
|
|
21
|
+
val: Any
|
|
22
|
+
|
|
23
|
+
def __init__(self, val: Any, col_type: ts.ColumnType | None = None):
|
|
20
24
|
if col_type is not None:
|
|
21
25
|
val = col_type.create_literal(val)
|
|
22
26
|
else:
|
|
@@ -40,7 +44,7 @@ class Literal(Expr):
|
|
|
40
44
|
self.val = val
|
|
41
45
|
self.id = self._create_id()
|
|
42
46
|
|
|
43
|
-
def default_column_name(self) ->
|
|
47
|
+
def default_column_name(self) -> str | None:
|
|
44
48
|
return 'Literal'
|
|
45
49
|
|
|
46
50
|
def __str__(self) -> str:
|
|
@@ -53,6 +57,9 @@ class Literal(Expr):
|
|
|
53
57
|
if self.col_type.is_date_type():
|
|
54
58
|
assert isinstance(self.val, datetime.date)
|
|
55
59
|
return f"'{self.val.isoformat()}'"
|
|
60
|
+
if self.col_type.is_uuid_type():
|
|
61
|
+
assert isinstance(self.val, uuid.UUID)
|
|
62
|
+
return f"'{self.val}'"
|
|
56
63
|
if self.col_type.is_array_type():
|
|
57
64
|
assert isinstance(self.val, np.ndarray)
|
|
58
65
|
return str(self.val.tolist())
|
|
@@ -67,7 +74,7 @@ class Literal(Expr):
|
|
|
67
74
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
68
75
|
return [*super()._id_attrs(), ('val', self.val)]
|
|
69
76
|
|
|
70
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
77
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
71
78
|
# Return a sql object so that constants can participate in SQL expressions
|
|
72
79
|
return sql.sql.expression.literal(self.val, type_=self.col_type.to_sa_type())
|
|
73
80
|
|
|
@@ -89,13 +96,21 @@ class Literal(Expr):
|
|
|
89
96
|
assert isinstance(self.val, datetime.date)
|
|
90
97
|
encoded_val = self.val.isoformat()
|
|
91
98
|
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
99
|
+
elif self.col_type.is_uuid_type():
|
|
100
|
+
assert isinstance(self.val, uuid.UUID)
|
|
101
|
+
encoded_val = str(self.val)
|
|
102
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
103
|
+
elif self.col_type.is_binary_type():
|
|
104
|
+
assert isinstance(self.val, bytes)
|
|
105
|
+
encoded_val = base64.b64encode(self.val).decode('utf-8')
|
|
106
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
92
107
|
elif self.col_type.is_array_type():
|
|
93
108
|
assert isinstance(self.val, np.ndarray)
|
|
94
109
|
return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
95
110
|
else:
|
|
96
111
|
return {'val': self.val, **super()._as_dict()}
|
|
97
112
|
|
|
98
|
-
def as_literal(self) ->
|
|
113
|
+
def as_literal(self) -> Literal | None:
|
|
99
114
|
return self
|
|
100
115
|
|
|
101
116
|
@classmethod
|
|
@@ -110,6 +125,13 @@ class Literal(Expr):
|
|
|
110
125
|
dt = datetime.datetime.fromisoformat(d['val'])
|
|
111
126
|
assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
|
|
112
127
|
return cls(dt)
|
|
128
|
+
elif val_t == ts.ColumnType.Type.UUID.name:
|
|
129
|
+
uuid_val = uuid.UUID(d['val'])
|
|
130
|
+
return cls(uuid_val)
|
|
131
|
+
elif val_t == ts.ColumnType.Type.BINARY.name:
|
|
132
|
+
assert isinstance(d['val'], str)
|
|
133
|
+
bytes_val = base64.b64decode(d['val'].encode('utf-8'))
|
|
134
|
+
return cls(bytes_val)
|
|
113
135
|
elif val_t == ts.ColumnType.Type.ARRAY.name:
|
|
114
136
|
arrays = np.array(d['val'])
|
|
115
137
|
return cls(arrays)
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -55,7 +55,7 @@ class MethodRef(Expr):
|
|
|
55
55
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
56
56
|
return [*super()._id_attrs(), ('method_name', self.method_name)]
|
|
57
57
|
|
|
58
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
58
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
59
59
|
return None
|
|
60
60
|
|
|
61
61
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -43,7 +43,7 @@ class ObjectRef(Expr):
|
|
|
43
43
|
def _equals(self, other: ObjectRef) -> bool:
|
|
44
44
|
return self.id == other.id
|
|
45
45
|
|
|
46
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
46
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
47
47
|
return None
|
|
48
48
|
|
|
49
49
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import dataclasses
|
|
3
4
|
import sys
|
|
4
5
|
import time
|
|
5
|
-
from
|
|
6
|
-
from typing import Any, Iterable, Optional, Sequence
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Sequence, TypeVar
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
import sqlalchemy as sql
|
|
10
11
|
|
|
11
|
-
from pixeltable import catalog, exceptions as excs, utils
|
|
12
|
+
from pixeltable import catalog, exceptions as excs, exprs, utils
|
|
12
13
|
from pixeltable.env import Env
|
|
13
|
-
from pixeltable.utils.
|
|
14
|
+
from pixeltable.utils.misc import non_none_dict_factory
|
|
14
15
|
|
|
15
16
|
from .data_row import DataRow
|
|
16
17
|
from .expr import Expr, ExprScope
|
|
17
18
|
from .expr_set import ExprSet
|
|
18
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from .column_ref import ColumnRef
|
|
22
|
+
|
|
19
23
|
|
|
20
24
|
class ExecProfile:
|
|
21
25
|
def __init__(self, row_builder: RowBuilder):
|
|
@@ -35,8 +39,7 @@ class ExecProfile:
|
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
class ColumnSlotIdx:
|
|
42
|
+
class ColumnSlotIdx(NamedTuple):
|
|
40
43
|
"""Info for how to locate materialized column in DataRow
|
|
41
44
|
TODO: can this be integrated into RowBuilder directly?
|
|
42
45
|
"""
|
|
@@ -50,6 +53,12 @@ class RowBuilder:
|
|
|
50
53
|
|
|
51
54
|
For ColumnRefs to unstored iterator columns:
|
|
52
55
|
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
output_exprs: list of Exprs to be evaluated
|
|
59
|
+
columns: list of columns to be materialized
|
|
60
|
+
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
61
|
+
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
53
62
|
"""
|
|
54
63
|
|
|
55
64
|
unique_exprs: ExprSet
|
|
@@ -63,9 +72,13 @@ class RowBuilder:
|
|
|
63
72
|
|
|
64
73
|
input_exprs: ExprSet
|
|
65
74
|
|
|
66
|
-
|
|
75
|
+
tbl: catalog.TableVersion | None # reference table of the RowBuilder; used to identify pk columns for writes
|
|
76
|
+
for_view_load: bool # True if this RowBuilder represents a view load
|
|
77
|
+
|
|
78
|
+
table_columns: dict[catalog.Column, int | None] # value: slot idx, if the result of an expr
|
|
67
79
|
default_eval_ctx: EvalCtx
|
|
68
80
|
unstored_iter_args: dict[UUID, Expr]
|
|
81
|
+
unstored_iter_outputs: dict[UUID, list['ColumnRef']]
|
|
69
82
|
|
|
70
83
|
# transitive dependents for the purpose of exception propagation: an exception for slot i is propagated to
|
|
71
84
|
# _exc_dependents[i]
|
|
@@ -84,7 +97,12 @@ class RowBuilder:
|
|
|
84
97
|
# (a subexpr can be shared across multiple output exprs)
|
|
85
98
|
output_expr_ids: list[set[int]]
|
|
86
99
|
|
|
87
|
-
|
|
100
|
+
img_slot_idxs: list[int] # Indices of image slots
|
|
101
|
+
media_slot_idxs: list[int] # Indices of non-image media slots
|
|
102
|
+
array_slot_idxs: list[int] # Indices of array slots
|
|
103
|
+
json_slot_idxs: list[int] # Indices of json slots
|
|
104
|
+
|
|
105
|
+
@dataclasses.dataclass
|
|
88
106
|
class EvalCtx:
|
|
89
107
|
"""Context for evaluating a set of target exprs"""
|
|
90
108
|
|
|
@@ -93,41 +111,45 @@ class RowBuilder:
|
|
|
93
111
|
target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
|
|
94
112
|
target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
|
|
95
113
|
|
|
96
|
-
def __init__(
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
output_exprs: Sequence[Expr],
|
|
117
|
+
columns: Sequence[catalog.Column],
|
|
118
|
+
input_exprs: Iterable[Expr],
|
|
119
|
+
tbl: catalog.TableVersion | None = None,
|
|
120
|
+
for_view_load: bool = False,
|
|
121
|
+
):
|
|
122
|
+
from .column_property_ref import ColumnPropertyRef
|
|
123
|
+
from .column_ref import ColumnRef
|
|
124
|
+
|
|
104
125
|
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
105
126
|
self.next_slot_idx = 0
|
|
106
127
|
|
|
107
|
-
# record input
|
|
128
|
+
# record input exprs; make copies to avoid reusing execution state
|
|
108
129
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
130
|
+
|
|
109
131
|
self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
|
|
110
132
|
|
|
111
133
|
resolve_cols = set(columns)
|
|
112
134
|
self.output_exprs = ExprSet(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
for e in output_exprs
|
|
116
|
-
]
|
|
135
|
+
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
136
|
+
for e in output_exprs
|
|
117
137
|
)
|
|
118
138
|
|
|
119
139
|
# if init(columns):
|
|
120
|
-
# - we are creating table rows and need to record columns for
|
|
140
|
+
# - we are creating table rows and need to record columns for create_store_table_row()
|
|
121
141
|
# - output_exprs materialize those columns
|
|
122
142
|
# - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
|
|
123
143
|
# - media validation:
|
|
124
144
|
# * for write-validated columns, we need to create validating ColumnRefs
|
|
125
145
|
# * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
|
|
126
|
-
from .column_ref import ColumnRef
|
|
127
146
|
|
|
128
|
-
self.
|
|
147
|
+
self.for_view_load = for_view_load
|
|
148
|
+
self.tbl = tbl
|
|
149
|
+
self.table_columns = {}
|
|
129
150
|
self.input_exprs = ExprSet()
|
|
130
151
|
validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
|
|
152
|
+
|
|
131
153
|
for col in columns:
|
|
132
154
|
expr: Expr
|
|
133
155
|
if col.is_computed:
|
|
@@ -168,24 +190,39 @@ class RowBuilder:
|
|
|
168
190
|
# because that would cause them to be evaluated for every single row
|
|
169
191
|
# - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
|
|
170
192
|
# iterated object changes
|
|
193
|
+
|
|
171
194
|
col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
|
|
172
195
|
|
|
173
196
|
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
174
|
-
tbl = col_ref.col.
|
|
175
|
-
return (
|
|
176
|
-
tbl.get().is_component_view and tbl.get().is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
177
|
-
)
|
|
197
|
+
tbl = col_ref.col.get_tbl()
|
|
198
|
+
return tbl.is_component_view and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
178
199
|
|
|
179
200
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
180
|
-
component_views = [col_ref.col.
|
|
181
|
-
unstored_iter_args = {view.id: view.
|
|
201
|
+
component_views = [col_ref.col.get_tbl() for col_ref in unstored_iter_col_refs]
|
|
202
|
+
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
203
|
+
|
|
204
|
+
# the *stored* output columns of the unstored iterators
|
|
205
|
+
self.unstored_iter_outputs = {
|
|
206
|
+
view.id: [
|
|
207
|
+
self._record_unique_expr(ColumnRef(col), recursive=True)
|
|
208
|
+
for col in view.iterator_columns()
|
|
209
|
+
if col.is_stored
|
|
210
|
+
]
|
|
211
|
+
for view in component_views
|
|
212
|
+
}
|
|
213
|
+
|
|
182
214
|
self.unstored_iter_args = {
|
|
183
|
-
id: self._record_unique_expr(
|
|
215
|
+
id: self._record_unique_expr(args, recursive=True) for id, args in unstored_iter_args.items()
|
|
184
216
|
}
|
|
185
217
|
|
|
218
|
+
unstored_iter_col_refs = [
|
|
219
|
+
self._record_unique_expr(col_ref, recursive=True) for col_ref in unstored_iter_col_refs
|
|
220
|
+
]
|
|
221
|
+
|
|
186
222
|
for col_ref in unstored_iter_col_refs:
|
|
187
|
-
iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.
|
|
188
|
-
col_ref.
|
|
223
|
+
iter_arg_ctx = self.create_eval_ctx([self.unstored_iter_args[col_ref.col.get_tbl().id]])
|
|
224
|
+
iter_outputs = self.unstored_iter_outputs[col_ref.col.get_tbl().id]
|
|
225
|
+
col_ref.set_iter_arg_ctx(iter_arg_ctx, iter_outputs)
|
|
189
226
|
|
|
190
227
|
# we guarantee that we can compute the expr DAG in a single front-to-back pass
|
|
191
228
|
for i, expr in enumerate(self.unique_exprs):
|
|
@@ -196,14 +233,13 @@ class RowBuilder:
|
|
|
196
233
|
# self.dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
|
|
197
234
|
self.dependencies = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
|
|
198
235
|
exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
|
|
199
|
-
from .column_property_ref import ColumnPropertyRef
|
|
200
236
|
|
|
201
237
|
for expr in self.unique_exprs:
|
|
202
238
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
203
239
|
# this is input and therefore doesn't depend on other exprs
|
|
204
240
|
continue
|
|
205
241
|
# error properties don't have exceptions themselves
|
|
206
|
-
if isinstance(expr, ColumnPropertyRef) and expr.
|
|
242
|
+
if isinstance(expr, ColumnPropertyRef) and expr.is_cellmd_prop():
|
|
207
243
|
continue
|
|
208
244
|
dependency_idxs = [d.slot_idx for d in expr.dependencies()]
|
|
209
245
|
self.dependencies[expr.slot_idx, dependency_idxs] = True
|
|
@@ -229,13 +265,32 @@ class RowBuilder:
|
|
|
229
265
|
for e in self.output_exprs:
|
|
230
266
|
self._record_output_expr_id(e, e.slot_idx)
|
|
231
267
|
|
|
268
|
+
self.img_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_image_type()]
|
|
269
|
+
self.media_slot_idxs = [
|
|
270
|
+
e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
|
|
271
|
+
]
|
|
272
|
+
self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
|
|
273
|
+
self.json_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_json_type()]
|
|
274
|
+
|
|
232
275
|
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
233
|
-
"""Record
|
|
234
|
-
self.
|
|
276
|
+
"""Record an output column for which the value is produced via expr evaluation"""
|
|
277
|
+
assert self.tbl is not None
|
|
278
|
+
assert col.is_stored
|
|
279
|
+
self.table_columns[col] = slot_idx
|
|
235
280
|
|
|
236
|
-
def
|
|
237
|
-
"""
|
|
238
|
-
|
|
281
|
+
def add_table_columns(self, cols: list[catalog.Column]) -> None:
|
|
282
|
+
"""Record output columns whose values are materialized into DataRow.cell_vals"""
|
|
283
|
+
for col in cols:
|
|
284
|
+
self.table_columns[col] = None
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def media_output_col_info(self) -> list[ColumnSlotIdx]:
|
|
288
|
+
"""Return slot idxs for media output columns whose values are produced by expr evaluation"""
|
|
289
|
+
return [
|
|
290
|
+
ColumnSlotIdx(col, slot_idx)
|
|
291
|
+
for col, slot_idx in self.table_columns.items()
|
|
292
|
+
if col.col_type.is_media_type() and slot_idx is not None
|
|
293
|
+
]
|
|
239
294
|
|
|
240
295
|
@property
|
|
241
296
|
def num_materialized(self) -> int:
|
|
@@ -250,7 +305,9 @@ class RowBuilder:
|
|
|
250
305
|
self.next_slot_idx += 1
|
|
251
306
|
return result
|
|
252
307
|
|
|
253
|
-
|
|
308
|
+
T = TypeVar('T', bound=Expr)
|
|
309
|
+
|
|
310
|
+
def _record_unique_expr(self, expr: T, recursive: bool) -> T:
|
|
254
311
|
"""Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
|
|
255
312
|
Returns:
|
|
256
313
|
the unique expr
|
|
@@ -279,7 +336,7 @@ class RowBuilder:
|
|
|
279
336
|
self._record_output_expr_id(d, output_expr_id)
|
|
280
337
|
|
|
281
338
|
def _compute_dependencies(
|
|
282
|
-
self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope:
|
|
339
|
+
self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: ExprScope | None = None
|
|
283
340
|
) -> list[int]:
|
|
284
341
|
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
|
|
285
342
|
|
|
@@ -333,7 +390,7 @@ class RowBuilder:
|
|
|
333
390
|
self.__set_slot_idxs_aux(c)
|
|
334
391
|
|
|
335
392
|
def get_dependencies(
|
|
336
|
-
self, targets: Iterable[Expr], exclude:
|
|
393
|
+
self, targets: Iterable[Expr], exclude: Iterable[Expr] | None = None, limit_scope: bool = True
|
|
337
394
|
) -> list[Expr]:
|
|
338
395
|
"""
|
|
339
396
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
@@ -351,7 +408,7 @@ class RowBuilder:
|
|
|
351
408
|
return []
|
|
352
409
|
# make sure we only refer to recorded exprs
|
|
353
410
|
targets = [self.unique_exprs[e] for e in targets]
|
|
354
|
-
target_scope:
|
|
411
|
+
target_scope: ExprScope | None = None
|
|
355
412
|
if limit_scope:
|
|
356
413
|
# make sure all targets are from the same scope
|
|
357
414
|
target_scopes = {e.scope() for e in targets}
|
|
@@ -369,7 +426,7 @@ class RowBuilder:
|
|
|
369
426
|
return [self.unique_exprs[id] for id in result_ids]
|
|
370
427
|
|
|
371
428
|
def create_eval_ctx(
|
|
372
|
-
self, targets: Iterable[Expr], exclude:
|
|
429
|
+
self, targets: Iterable[Expr], exclude: Iterable[Expr] | None = None, limit_scope: bool = True
|
|
373
430
|
) -> EvalCtx:
|
|
374
431
|
"""Return EvalCtx for targets"""
|
|
375
432
|
targets = list(targets)
|
|
@@ -398,9 +455,9 @@ class RowBuilder:
|
|
|
398
455
|
self,
|
|
399
456
|
data_row: DataRow,
|
|
400
457
|
ctx: EvalCtx,
|
|
401
|
-
profile:
|
|
458
|
+
profile: ExecProfile | None = None,
|
|
402
459
|
ignore_errors: bool = False,
|
|
403
|
-
force_eval:
|
|
460
|
+
force_eval: ExprScope | None = None,
|
|
404
461
|
) -> None:
|
|
405
462
|
"""
|
|
406
463
|
Populates the slots in data_row given in ctx.
|
|
@@ -429,33 +486,76 @@ class RowBuilder:
|
|
|
429
486
|
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
|
|
430
487
|
) from exc
|
|
431
488
|
|
|
432
|
-
def
|
|
433
|
-
|
|
489
|
+
def create_store_table_row(
|
|
490
|
+
self, data_row: DataRow, cols_with_excs: set[int] | None, pk: tuple[int, ...]
|
|
491
|
+
) -> tuple[list[Any], int]:
|
|
492
|
+
"""Create a store table row from the slots that have an output column assigned
|
|
434
493
|
|
|
435
|
-
Return tuple[
|
|
494
|
+
Return tuple[list of row values in `self.table_columns` order, # of exceptions]
|
|
436
495
|
This excludes system columns.
|
|
496
|
+
Row values are converted to their store type.
|
|
437
497
|
"""
|
|
498
|
+
from pixeltable.exprs.column_property_ref import ColumnPropertyRef
|
|
499
|
+
|
|
438
500
|
num_excs = 0
|
|
439
|
-
table_row:
|
|
440
|
-
|
|
441
|
-
|
|
501
|
+
table_row: list[Any] = list(pk)
|
|
502
|
+
# Nulls in JSONB columns need to be stored as sql.sql.null(), otherwise it stores a json 'null'
|
|
503
|
+
for col, slot_idx in self.table_columns.items():
|
|
504
|
+
if col.id in data_row.cell_vals:
|
|
505
|
+
table_row.append(data_row.cell_vals[col.id])
|
|
506
|
+
if col.stores_cellmd:
|
|
507
|
+
if data_row.cell_md[col.id] is None:
|
|
508
|
+
table_row.append(sql.sql.null())
|
|
509
|
+
else:
|
|
510
|
+
# we want to minimize the size of the stored dict and use dict_factory to remove Nones
|
|
511
|
+
md = dataclasses.asdict(data_row.cell_md[col.id], dict_factory=non_none_dict_factory)
|
|
512
|
+
assert len(md) > 0
|
|
513
|
+
table_row.append(md)
|
|
514
|
+
if slot_idx is not None and data_row.has_exc(slot_idx):
|
|
515
|
+
num_excs += 1
|
|
516
|
+
if cols_with_excs is not None:
|
|
517
|
+
cols_with_excs.add(col.id)
|
|
518
|
+
continue
|
|
519
|
+
|
|
442
520
|
if data_row.has_exc(slot_idx):
|
|
443
|
-
# exceptions get stored in the errortype/-msg columns
|
|
444
521
|
exc = data_row.get_exc(slot_idx)
|
|
445
522
|
num_excs += 1
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
table_row
|
|
449
|
-
|
|
523
|
+
if cols_with_excs is not None:
|
|
524
|
+
cols_with_excs.add(col.id)
|
|
525
|
+
table_row.append(sql.sql.null() if col.col_type.is_json_type() else None)
|
|
526
|
+
if col.stores_cellmd:
|
|
527
|
+
# exceptions get stored in the errortype/-msg properties of the cellmd column
|
|
528
|
+
table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
|
|
450
529
|
else:
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
val = data_row.get_stored_val(slot_idx, col.sa_col.type)
|
|
456
|
-
table_row[col.store_name()] = val
|
|
457
|
-
# we unfortunately need to set these, even if there are no errors
|
|
458
|
-
table_row[col.errortype_store_name()] = None
|
|
459
|
-
table_row[col.errormsg_store_name()] = None
|
|
530
|
+
val = data_row.get_stored_val(slot_idx, col.sa_col_type)
|
|
531
|
+
table_row.append(val)
|
|
532
|
+
if col.stores_cellmd:
|
|
533
|
+
table_row.append(sql.sql.null()) # placeholder for cellmd column
|
|
460
534
|
|
|
461
535
|
return table_row, num_excs
|
|
536
|
+
|
|
537
|
+
def store_column_names(self) -> list[str]:
|
|
538
|
+
"""
|
|
539
|
+
Returns the list of store column names corresponding to the table_columns of this RowBuilder.
|
|
540
|
+
The second tuple element of the return value is a dictionary containing all media columns in the
|
|
541
|
+
table; it's the mapping {list_index: column}.
|
|
542
|
+
"""
|
|
543
|
+
assert self.tbl is not None, self.table_columns
|
|
544
|
+
store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
|
|
545
|
+
|
|
546
|
+
for col in self.table_columns:
|
|
547
|
+
store_col_names.append(col.store_name())
|
|
548
|
+
if col.stores_cellmd:
|
|
549
|
+
store_col_names.append(col.cellmd_store_name())
|
|
550
|
+
|
|
551
|
+
return store_col_names
|
|
552
|
+
|
|
553
|
+
def make_row(self) -> exprs.DataRow:
|
|
554
|
+
"""Creates a new DataRow with the current row_builder's configuration."""
|
|
555
|
+
return exprs.DataRow(
|
|
556
|
+
size=self.num_materialized,
|
|
557
|
+
img_slot_idxs=self.img_slot_idxs,
|
|
558
|
+
media_slot_idxs=self.media_slot_idxs,
|
|
559
|
+
array_slot_idxs=self.array_slot_idxs,
|
|
560
|
+
json_slot_idxs=self.json_slot_idxs,
|
|
561
|
+
)
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, cast
|
|
4
5
|
from uuid import UUID
|
|
5
6
|
|
|
6
7
|
import sqlalchemy as sql
|
|
7
8
|
|
|
8
9
|
from pixeltable import catalog, type_system as ts
|
|
10
|
+
from pixeltable.catalog.table_version import TableVersionKey
|
|
9
11
|
|
|
10
12
|
from .data_row import DataRow
|
|
11
13
|
from .expr import Expr
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
15
|
from .sql_element_cache import SqlElementCache
|
|
14
16
|
|
|
17
|
+
_logger = logging.getLogger('pixeltable')
|
|
18
|
+
|
|
15
19
|
|
|
16
20
|
class RowidRef(Expr):
|
|
17
21
|
"""A reference to a part of a table rowid
|
|
@@ -22,18 +26,18 @@ class RowidRef(Expr):
|
|
|
22
26
|
(with and without a TableVersion).
|
|
23
27
|
"""
|
|
24
28
|
|
|
25
|
-
tbl:
|
|
26
|
-
normalized_base:
|
|
29
|
+
tbl: catalog.TableVersionHandle | None
|
|
30
|
+
normalized_base: catalog.TableVersionHandle | None
|
|
27
31
|
tbl_id: UUID
|
|
28
32
|
normalized_base_id: UUID
|
|
29
33
|
rowid_component_idx: int
|
|
30
34
|
|
|
31
35
|
def __init__(
|
|
32
36
|
self,
|
|
33
|
-
tbl:
|
|
37
|
+
tbl: catalog.TableVersionHandle | None,
|
|
34
38
|
idx: int,
|
|
35
|
-
tbl_id:
|
|
36
|
-
normalized_base_id:
|
|
39
|
+
tbl_id: UUID | None = None,
|
|
40
|
+
normalized_base_id: UUID | None = None,
|
|
37
41
|
):
|
|
38
42
|
super().__init__(ts.IntType(nullable=False))
|
|
39
43
|
self.tbl = tbl
|
|
@@ -54,7 +58,7 @@ class RowidRef(Expr):
|
|
|
54
58
|
self.rowid_component_idx = idx
|
|
55
59
|
self.id = self._create_id()
|
|
56
60
|
|
|
57
|
-
def default_column_name(self) ->
|
|
61
|
+
def default_column_name(self) -> str | None:
|
|
58
62
|
return str(self)
|
|
59
63
|
|
|
60
64
|
def _equals(self, other: RowidRef) -> bool:
|
|
@@ -74,7 +78,11 @@ class RowidRef(Expr):
|
|
|
74
78
|
# check if this is the pos column of a component view
|
|
75
79
|
from pixeltable import store
|
|
76
80
|
|
|
77
|
-
tbl =
|
|
81
|
+
tbl = (
|
|
82
|
+
self.tbl.get()
|
|
83
|
+
if self.tbl is not None
|
|
84
|
+
else catalog.Catalog.get().get_tbl_version(TableVersionKey(self.tbl_id, None, None))
|
|
85
|
+
)
|
|
78
86
|
if (
|
|
79
87
|
tbl.is_component_view
|
|
80
88
|
and self.rowid_component_idx == cast(store.StoreComponentView, tbl.store_tbl).pos_col_idx
|
|
@@ -95,8 +103,13 @@ class RowidRef(Expr):
|
|
|
95
103
|
self.tbl = tbl.tbl_version
|
|
96
104
|
self.tbl_id = self.tbl.id
|
|
97
105
|
|
|
98
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
99
|
-
tbl =
|
|
106
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
107
|
+
tbl = (
|
|
108
|
+
self.tbl.get()
|
|
109
|
+
if self.tbl is not None
|
|
110
|
+
else catalog.Catalog.get().get_tbl_version(TableVersionKey(self.tbl_id, None, None))
|
|
111
|
+
)
|
|
112
|
+
assert tbl.is_validated
|
|
100
113
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
101
114
|
assert self.rowid_component_idx <= len(rowid_cols), (
|
|
102
115
|
f'{self.rowid_component_idx} not consistent with {rowid_cols}'
|
|
@@ -107,6 +120,8 @@ class RowidRef(Expr):
|
|
|
107
120
|
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
108
121
|
|
|
109
122
|
def _as_dict(self) -> dict:
|
|
123
|
+
# TODO: Serialize the full TableVersionHandle, not just the UUID
|
|
124
|
+
assert self.tbl is None or self.tbl.anchor_tbl_id is None # TODO: support anchor_tbl_id for view-over-replica
|
|
110
125
|
return {
|
|
111
126
|
'tbl_id': str(self.tbl_id),
|
|
112
127
|
'normalized_base_id': str(self.normalized_base_id),
|