pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -20,16 +20,84 @@ class JsonMapper(Expr):
|
|
|
20
20
|
JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
|
|
21
21
|
The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
|
|
22
22
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
23
|
+
|
|
24
|
+
JsonMapper is executed in two phases:
|
|
25
|
+
- the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
|
|
26
|
+
list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
|
|
27
|
+
slot of JsonMapperDispatch)
|
|
28
|
+
- JsonMapper.eval() collects the slot values of the target expr into its result list
|
|
23
29
|
"""
|
|
24
30
|
|
|
25
31
|
target_expr_scope: ExprScope
|
|
26
32
|
parent_mapper: Optional[JsonMapper]
|
|
27
33
|
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
28
34
|
|
|
29
|
-
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
35
|
+
def __init__(self, src_expr: Optional[Expr], target_expr: Optional[Expr]):
|
|
30
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
31
37
|
super().__init__(ts.JsonType())
|
|
32
38
|
|
|
39
|
+
dispatch = JsonMapperDispatch(src_expr, target_expr)
|
|
40
|
+
self.components.append(dispatch)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def _src_expr(self) -> Expr:
|
|
48
|
+
return self.components[0].src_expr
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _target_expr(self) -> Expr:
|
|
52
|
+
return self.components[0].target_expr
|
|
53
|
+
|
|
54
|
+
def _equals(self, _: JsonMapper) -> bool:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
61
|
+
from ..exec.expr_eval.evaluators import NestedRowList
|
|
62
|
+
|
|
63
|
+
dispatch_slot_idx = self.components[0].slot_idx
|
|
64
|
+
nested_rows = data_row.vals[dispatch_slot_idx]
|
|
65
|
+
if nested_rows is None:
|
|
66
|
+
data_row[self.slot_idx] = None
|
|
67
|
+
return
|
|
68
|
+
assert isinstance(nested_rows, NestedRowList)
|
|
69
|
+
# TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
|
|
70
|
+
data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> dict:
|
|
73
|
+
"""
|
|
74
|
+
We only serialize src and target exprs, everything else is re-created at runtime.
|
|
75
|
+
"""
|
|
76
|
+
return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
|
|
80
|
+
assert len(components) == 2
|
|
81
|
+
src_expr, target_expr = components[0], components[1]
|
|
82
|
+
return cls(src_expr, target_expr)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class JsonMapperDispatch(Expr):
|
|
86
|
+
"""
|
|
87
|
+
An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
|
|
88
|
+
its input DataRows. It has the same dependencies as the originating JsonMapper.
|
|
89
|
+
|
|
90
|
+
- The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
|
|
91
|
+
- It stores a NestedRowList instance in its slot.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
target_expr_scope: ExprScope
|
|
95
|
+
parent_mapper: Optional[JsonMapperDispatch]
|
|
96
|
+
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
97
|
+
|
|
98
|
+
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
|
+
super().__init__(ts.InvalidType())
|
|
100
|
+
|
|
33
101
|
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
34
102
|
# this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
|
|
35
103
|
self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
|
|
@@ -40,28 +108,36 @@ class JsonMapper(Expr):
|
|
|
40
108
|
self.parent_mapper = None
|
|
41
109
|
self.target_expr_eval_ctx = None
|
|
42
110
|
|
|
43
|
-
# Intentionally create the id now, before adding the scope anchor; this ensures that
|
|
44
|
-
# be recognized as equal so long as they have the same src_expr and target_expr.
|
|
111
|
+
# Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
|
|
112
|
+
# instances will be recognized as equal so long as they have the same src_expr and target_expr.
|
|
45
113
|
# TODO: Might this cause problems after certain substitutions?
|
|
46
114
|
self.id = self._create_id()
|
|
47
115
|
|
|
48
116
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
49
117
|
self.components.append(scope_anchor)
|
|
50
118
|
|
|
51
|
-
def _bind_rel_paths(self, mapper: Optional[
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
119
|
+
def _bind_rel_paths(self, mapper: Optional[JsonMapperDispatch] = None) -> None:
|
|
120
|
+
self.src_expr._bind_rel_paths(mapper)
|
|
121
|
+
self.target_expr._bind_rel_paths(self)
|
|
54
122
|
self.parent_mapper = mapper
|
|
55
123
|
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
56
124
|
self.target_expr_scope.parent = parent_scope
|
|
57
125
|
|
|
126
|
+
def equals(self, other: Expr) -> bool:
|
|
127
|
+
"""
|
|
128
|
+
We override equals() because we need to avoid comparing our scope anchor.
|
|
129
|
+
"""
|
|
130
|
+
if type(self) is not type(other):
|
|
131
|
+
return False
|
|
132
|
+
return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
|
|
133
|
+
|
|
58
134
|
def scope(self) -> ExprScope:
|
|
59
135
|
# need to ignore target_expr
|
|
60
|
-
return self.
|
|
136
|
+
return self.src_expr.scope()
|
|
61
137
|
|
|
62
138
|
def dependencies(self) -> list[Expr]:
|
|
63
|
-
result = [self.
|
|
64
|
-
result.extend(self._target_dependencies(self.
|
|
139
|
+
result = [self.src_expr]
|
|
140
|
+
result.extend(self._target_dependencies(self.target_expr))
|
|
65
141
|
return result
|
|
66
142
|
|
|
67
143
|
def _target_dependencies(self, e: Expr) -> list[Expr]:
|
|
@@ -77,23 +153,12 @@ class JsonMapper(Expr):
|
|
|
77
153
|
result.extend(self._target_dependencies(c))
|
|
78
154
|
return result
|
|
79
155
|
|
|
80
|
-
def equals(self, other: Expr) -> bool:
|
|
81
|
-
"""
|
|
82
|
-
We override equals() because we need to avoid comparing our scope anchor.
|
|
83
|
-
"""
|
|
84
|
-
if type(self) is not type(other):
|
|
85
|
-
return False
|
|
86
|
-
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
87
|
-
|
|
88
|
-
def __repr__(self) -> str:
|
|
89
|
-
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
90
|
-
|
|
91
156
|
@property
|
|
92
|
-
def
|
|
157
|
+
def src_expr(self) -> Expr:
|
|
93
158
|
return self.components[0]
|
|
94
159
|
|
|
95
160
|
@property
|
|
96
|
-
def
|
|
161
|
+
def target_expr(self) -> Expr:
|
|
97
162
|
return self.components[1]
|
|
98
163
|
|
|
99
164
|
@property
|
|
@@ -104,37 +169,19 @@ class JsonMapper(Expr):
|
|
|
104
169
|
assert isinstance(result, ObjectRef)
|
|
105
170
|
return result
|
|
106
171
|
|
|
107
|
-
def
|
|
108
|
-
return
|
|
109
|
-
|
|
110
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
111
|
-
return None
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return 'JsonMapperDispatch()'
|
|
112
174
|
|
|
113
175
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
if not isinstance(src, list):
|
|
117
|
-
# invalid/non-list src path
|
|
118
|
-
data_row[self.slot_idx] = None
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
result = [None] * len(src)
|
|
122
|
-
if self.target_expr_eval_ctx is None:
|
|
123
|
-
self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
|
|
124
|
-
for i, val in enumerate(src):
|
|
125
|
-
data_row[self.scope_anchor.slot_idx] = val
|
|
126
|
-
# stored target_expr
|
|
127
|
-
row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
|
|
128
|
-
result[i] = data_row[self._target_expr.slot_idx]
|
|
129
|
-
data_row[self.slot_idx] = result
|
|
176
|
+
# eval is handled by JsonMapperDispatcher
|
|
177
|
+
raise AssertionError('this should never be called')
|
|
130
178
|
|
|
131
179
|
def _as_dict(self) -> dict:
|
|
132
180
|
"""
|
|
133
|
-
|
|
181
|
+
JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
|
|
134
182
|
"""
|
|
135
|
-
|
|
183
|
+
raise AssertionError('this should never be called')
|
|
136
184
|
|
|
137
185
|
@classmethod
|
|
138
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
139
|
-
|
|
140
|
-
return cls(components[0], components[1])
|
|
186
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
|
|
187
|
+
raise AssertionError('this should never be called')
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -5,13 +5,12 @@ from typing import Any, Optional, Union
|
|
|
5
5
|
import jmespath
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import pixeltable as pxt
|
|
9
8
|
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
10
9
|
|
|
11
10
|
from .data_row import DataRow
|
|
12
11
|
from .expr import Expr
|
|
13
12
|
from .globals import print_slice
|
|
14
|
-
from .json_mapper import
|
|
13
|
+
from .json_mapper import JsonMapperDispatch
|
|
15
14
|
from .object_ref import ObjectRef
|
|
16
15
|
from .row_builder import RowBuilder
|
|
17
16
|
from .sql_element_cache import SqlElementCache
|
|
@@ -19,10 +18,7 @@ from .sql_element_cache import SqlElementCache
|
|
|
19
18
|
|
|
20
19
|
class JsonPath(Expr):
|
|
21
20
|
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
anchor: Optional['pxt.exprs.Expr'],
|
|
24
|
-
path_elements: Optional[list[Union[str, int, slice]]] = None,
|
|
25
|
-
scope_idx: int = 0,
|
|
21
|
+
self, anchor: Optional[Expr], path_elements: Optional[list[Union[str, int, slice]]] = None, scope_idx: int = 0
|
|
26
22
|
) -> None:
|
|
27
23
|
"""
|
|
28
24
|
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
@@ -80,11 +76,10 @@ class JsonPath(Expr):
|
|
|
80
76
|
def is_relative_path(self) -> bool:
|
|
81
77
|
return self._anchor is None
|
|
82
78
|
|
|
83
|
-
@property
|
|
84
79
|
def _has_relative_path(self) -> bool:
|
|
85
|
-
return self.is_relative_path() or super()._has_relative_path
|
|
80
|
+
return self.is_relative_path() or super()._has_relative_path()
|
|
86
81
|
|
|
87
|
-
def _bind_rel_paths(self, mapper: Optional['
|
|
82
|
+
def _bind_rel_paths(self, mapper: Optional['JsonMapperDispatch'] = None) -> None:
|
|
88
83
|
if self.is_relative_path():
|
|
89
84
|
# TODO: take scope_idx into account
|
|
90
85
|
self.set_anchor(mapper.scope_anchor)
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -23,7 +23,7 @@ class MethodRef(Expr):
|
|
|
23
23
|
# TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
|
|
24
24
|
# converted to a `FunctionCall` by binding any remaining parameters).
|
|
25
25
|
|
|
26
|
-
def __init__(self, base_expr: Expr, method_name: str):
|
|
26
|
+
def __init__(self, base_expr: Expr, method_name: str) -> None:
|
|
27
27
|
super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
|
|
28
28
|
self.base_expr = base_expr
|
|
29
29
|
self.method_name = method_name
|
|
@@ -43,7 +43,7 @@ class MethodRef(Expr):
|
|
|
43
43
|
assert len(components) == 1
|
|
44
44
|
return cls(components[0], d['method_name'])
|
|
45
45
|
|
|
46
|
-
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
46
|
+
def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
|
|
47
47
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
48
48
|
assert isinstance(result, FunctionCall)
|
|
49
49
|
result.is_method_call = True
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr, ExprScope
|
|
11
|
-
from .json_mapper import
|
|
11
|
+
from .json_mapper import JsonMapperDispatch
|
|
12
12
|
from .row_builder import RowBuilder
|
|
13
13
|
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
@@ -19,7 +19,7 @@ class ObjectRef(Expr):
|
|
|
19
19
|
The object is generated/materialized elsewhere and establishes a new scope.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, scope: ExprScope, owner:
|
|
22
|
+
def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
|
|
23
23
|
# TODO: do we need an Unknown type after all?
|
|
24
24
|
super().__init__(ts.JsonType()) # JsonType: this could be anything
|
|
25
25
|
self._scope = scope
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -77,6 +77,8 @@ class RowBuilder:
|
|
|
77
77
|
transitive_dependents: np.ndarray # of bool
|
|
78
78
|
# dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
|
|
79
79
|
dependencies: np.ndarray # of bool
|
|
80
|
+
# num_dependencies[i] = number of direct dependencies of expr with slot idx i
|
|
81
|
+
num_dependencies: np.ndarray # of int
|
|
80
82
|
|
|
81
83
|
# records the output_expr that a subexpr belongs to
|
|
82
84
|
# (a subexpr can be shared across multiple output exprs)
|
|
@@ -209,6 +211,7 @@ class RowBuilder:
|
|
|
209
211
|
exc_dependencies[expr.slot_idx].add(d.slot_idx)
|
|
210
212
|
exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
|
|
211
213
|
|
|
214
|
+
self.num_dependencies = np.sum(self.dependencies, axis=1)
|
|
212
215
|
self.dependents = self.dependencies.T
|
|
213
216
|
self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
|
|
214
217
|
for i in reversed(range(self.num_materialized)):
|
|
@@ -275,8 +278,14 @@ class RowBuilder:
|
|
|
275
278
|
for d in e.dependencies():
|
|
276
279
|
self._record_output_expr_id(d, output_expr_id)
|
|
277
280
|
|
|
278
|
-
def _compute_dependencies(
|
|
279
|
-
|
|
281
|
+
def _compute_dependencies(
|
|
282
|
+
self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: Optional[ExprScope] = None
|
|
283
|
+
) -> list[int]:
|
|
284
|
+
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
|
|
285
|
+
|
|
286
|
+
If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
|
|
287
|
+
immediate dependents that aren't in target_scope, but doesn't resolve those).
|
|
288
|
+
"""
|
|
280
289
|
dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
281
290
|
# doing this front-to-back ensures that we capture transitive dependencies
|
|
282
291
|
max_target_slot_idx = max(target_slot_idxs)
|
|
@@ -289,6 +298,9 @@ class RowBuilder:
|
|
|
289
298
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
290
299
|
# this is input and therefore doesn't depend on other exprs
|
|
291
300
|
continue
|
|
301
|
+
if target_scope is not None and expr.scope() != target_scope:
|
|
302
|
+
# don't resolve dependencies outside of target_scope
|
|
303
|
+
continue
|
|
292
304
|
for d in expr.dependencies():
|
|
293
305
|
assert d.slot_idx is not None, f'{expr}, {d}'
|
|
294
306
|
if d.slot_idx in excluded_slot_idxs:
|
|
@@ -320,10 +332,15 @@ class RowBuilder:
|
|
|
320
332
|
for c in e.components:
|
|
321
333
|
self.__set_slot_idxs_aux(c)
|
|
322
334
|
|
|
323
|
-
def get_dependencies(
|
|
335
|
+
def get_dependencies(
|
|
336
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
337
|
+
) -> list[Expr]:
|
|
324
338
|
"""
|
|
325
339
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
326
340
|
The exprs given in 'exclude' are excluded.
|
|
341
|
+
If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
|
|
342
|
+
dependencies from enclosing scopes.
|
|
343
|
+
|
|
327
344
|
Returns:
|
|
328
345
|
list of Exprs from unique_exprs (= with slot_idx set)
|
|
329
346
|
"""
|
|
@@ -334,23 +351,33 @@ class RowBuilder:
|
|
|
334
351
|
return []
|
|
335
352
|
# make sure we only refer to recorded exprs
|
|
336
353
|
targets = [self.unique_exprs[e] for e in targets]
|
|
354
|
+
target_scope: Optional[ExprScope] = None
|
|
355
|
+
if limit_scope:
|
|
356
|
+
# make sure all targets are from the same scope
|
|
357
|
+
target_scopes = {e.scope() for e in targets}
|
|
358
|
+
assert len(target_scopes) == 1
|
|
359
|
+
target_scope = target_scopes.pop()
|
|
337
360
|
exclude = [self.unique_exprs[e] for e in exclude]
|
|
338
361
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
339
362
|
excluded_slot_idxs = [e.slot_idx for e in exclude]
|
|
340
|
-
all_dependencies = set(
|
|
363
|
+
all_dependencies = set(
|
|
364
|
+
self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
|
|
365
|
+
)
|
|
341
366
|
all_dependencies.update(target_slot_idxs)
|
|
342
367
|
result_ids = list(all_dependencies)
|
|
343
368
|
result_ids.sort()
|
|
344
369
|
return [self.unique_exprs[id] for id in result_ids]
|
|
345
370
|
|
|
346
|
-
def create_eval_ctx(
|
|
371
|
+
def create_eval_ctx(
|
|
372
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
373
|
+
) -> EvalCtx:
|
|
347
374
|
"""Return EvalCtx for targets"""
|
|
348
375
|
targets = list(targets)
|
|
349
376
|
if exclude is None:
|
|
350
377
|
exclude = []
|
|
351
378
|
if len(targets) == 0:
|
|
352
379
|
return self.EvalCtx([], [], [], [])
|
|
353
|
-
dependencies = self.get_dependencies(targets, exclude)
|
|
380
|
+
dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
|
|
354
381
|
targets = [self.unique_exprs[e] for e in targets]
|
|
355
382
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
356
383
|
ctx_slot_idxs = [e.slot_idx for e in dependencies]
|
|
@@ -47,7 +47,7 @@ class SimilarityExpr(Expr):
|
|
|
47
47
|
def __repr__(self) -> str:
|
|
48
48
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
49
49
|
|
|
50
|
-
def _id_attrs(self):
|
|
50
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
51
51
|
return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
|
|
52
52
|
|
|
53
53
|
def default_column_name(self) -> str:
|
|
@@ -17,7 +17,7 @@ class SqlElementCache:
|
|
|
17
17
|
for e, el in elements.items():
|
|
18
18
|
self.cache[e.id] = el
|
|
19
19
|
|
|
20
|
-
def extend(self, elements: ExprDict[sql.ColumnElement]):
|
|
20
|
+
def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
|
|
21
21
|
for e, el in elements.items():
|
|
22
22
|
self.cache[e.id] = el
|
|
23
23
|
|
pixeltable/exprs/string_op.py
CHANGED
|
@@ -26,7 +26,7 @@ class StringOp(Expr):
|
|
|
26
26
|
self.operator = operator
|
|
27
27
|
self.components = [op1, op2]
|
|
28
28
|
assert op1.col_type.is_string_type()
|
|
29
|
-
if operator in
|
|
29
|
+
if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
|
|
30
30
|
if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
|
|
31
31
|
raise excs.Error(
|
|
32
32
|
f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
|
|
@@ -89,7 +89,7 @@ class StringOp(Expr):
|
|
|
89
89
|
"""
|
|
90
90
|
Return the result of evaluating the expression on two int/float operands
|
|
91
91
|
"""
|
|
92
|
-
assert self.operator in
|
|
92
|
+
assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
|
|
93
93
|
if self.operator == StringOperator.CONCAT:
|
|
94
94
|
assert isinstance(op2_val, str)
|
|
95
95
|
return op1_val + op2_val
|
pixeltable/ext/__init__.py
CHANGED
pixeltable/func/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ from .callable_function import CallableFunction
|
|
|
5
5
|
from .expr_template_function import ExprTemplateFunction
|
|
6
6
|
from .function import Function, InvalidFunction
|
|
7
7
|
from .function_registry import FunctionRegistry
|
|
8
|
-
from .query_template_function import QueryTemplateFunction, query
|
|
8
|
+
from .query_template_function import QueryTemplateFunction, query, retrieval_udf
|
|
9
9
|
from .signature import Batch, Parameter, Signature
|
|
10
10
|
from .tools import Tool, ToolChoice, Tools
|
|
11
11
|
from .udf import expr_udf, make_function, udf
|
|
@@ -159,7 +159,7 @@ class AggregateFunction(Function):
|
|
|
159
159
|
self.init_param_names.append(init_param_names)
|
|
160
160
|
return self
|
|
161
161
|
|
|
162
|
-
def
|
|
162
|
+
def comment(self) -> Optional[str]:
|
|
163
163
|
return inspect.getdoc(self.agg_classes[0])
|
|
164
164
|
|
|
165
165
|
def help_str(self) -> str:
|
|
@@ -252,7 +252,7 @@ def uda(
|
|
|
252
252
|
) -> Callable[[type[Aggregator]], AggregateFunction]: ...
|
|
253
253
|
|
|
254
254
|
|
|
255
|
-
def uda(*args, **kwargs):
|
|
255
|
+
def uda(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
256
256
|
"""Decorator for user-defined aggregate functions.
|
|
257
257
|
|
|
258
258
|
The decorated class must inherit from Aggregator and implement the following methods:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import inspect
|
|
5
4
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
|
|
6
5
|
from uuid import UUID
|
|
@@ -61,7 +60,7 @@ class CallableFunction(Function):
|
|
|
61
60
|
def is_async(self) -> bool:
|
|
62
61
|
return inspect.iscoroutinefunction(self.py_fn)
|
|
63
62
|
|
|
64
|
-
def
|
|
63
|
+
def comment(self) -> Optional[str]:
|
|
65
64
|
return inspect.getdoc(self.py_fns[0])
|
|
66
65
|
|
|
67
66
|
@property
|
|
@@ -127,12 +126,10 @@ class CallableFunction(Function):
|
|
|
127
126
|
"""
|
|
128
127
|
assert self.is_batched
|
|
129
128
|
assert not self.is_polymorphic
|
|
129
|
+
assert not self.is_async
|
|
130
130
|
# Unpack the constant parameters
|
|
131
131
|
constant_kwargs, batched_kwargs = self.create_batch_kwargs(kwargs)
|
|
132
|
-
|
|
133
|
-
return asyncio.run(self.py_fn(*args, **constant_kwargs, **batched_kwargs))
|
|
134
|
-
else:
|
|
135
|
-
return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
|
|
132
|
+
return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
|
|
136
133
|
|
|
137
134
|
def create_batch_kwargs(self, kwargs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, list[Any]]]:
|
|
138
135
|
"""Converts kwargs containing lists into constant and batched kwargs in the format expected by a batched udf."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Any, Optional, Sequence
|
|
2
2
|
|
|
3
|
-
from pixeltable import exceptions as excs, exprs
|
|
3
|
+
from pixeltable import exceptions as excs, exprs, type_system as ts
|
|
4
4
|
|
|
5
5
|
from .function import Function
|
|
6
6
|
from .signature import Signature
|
|
@@ -76,12 +76,28 @@ class ExprTemplateFunction(Function):
|
|
|
76
76
|
arg_expr = arg
|
|
77
77
|
arg_exprs[param_expr] = arg_expr
|
|
78
78
|
result = result.substitute(arg_exprs)
|
|
79
|
-
assert not result._contains(exprs.Variable)
|
|
80
79
|
return result
|
|
81
80
|
|
|
82
|
-
def
|
|
81
|
+
def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
|
|
82
|
+
"""
|
|
83
|
+
The call_return_type of an ExprTemplateFunction is derived from the template expression's col_type after
|
|
84
|
+
substitution (unlike for UDFs, whose call_return_type is derived from an explicitly specified
|
|
85
|
+
conditional_return_type).
|
|
86
|
+
"""
|
|
87
|
+
assert not self.is_polymorphic
|
|
88
|
+
template = self.template
|
|
89
|
+
with_defaults = bound_args.copy()
|
|
90
|
+
with_defaults.update(
|
|
91
|
+
{param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
|
|
92
|
+
)
|
|
93
|
+
substituted_expr = self.template.expr.copy().substitute(
|
|
94
|
+
{template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
95
|
+
)
|
|
96
|
+
return substituted_expr.col_type
|
|
97
|
+
|
|
98
|
+
def comment(self) -> Optional[str]:
|
|
83
99
|
if isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
84
|
-
return self.templates[0].expr.fn.
|
|
100
|
+
return self.templates[0].expr.fn.comment()
|
|
85
101
|
return None
|
|
86
102
|
|
|
87
103
|
def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
|
|
@@ -97,6 +113,10 @@ class ExprTemplateFunction(Function):
|
|
|
97
113
|
|
|
98
114
|
@property
|
|
99
115
|
def display_name(self) -> str:
|
|
116
|
+
if not self.self_name and isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
117
|
+
# In the common case where the templated expression is itself a FunctionCall,
|
|
118
|
+
# fall back on the display name of the underlying FunctionCall
|
|
119
|
+
return self.templates[0].expr.fn.display_name
|
|
100
120
|
return self.self_name
|
|
101
121
|
|
|
102
122
|
@property
|
pixeltable/func/function.py
CHANGED
|
@@ -10,9 +10,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
|
|
|
10
10
|
import sqlalchemy as sql
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
import pixeltable.exceptions as excs
|
|
15
|
-
import pixeltable.type_system as ts
|
|
13
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
16
14
|
|
|
17
15
|
from .globals import resolve_symbol
|
|
18
16
|
from .signature import Signature
|
|
@@ -107,11 +105,11 @@ class Function(ABC):
|
|
|
107
105
|
@abstractmethod
|
|
108
106
|
def is_async(self) -> bool: ...
|
|
109
107
|
|
|
110
|
-
def
|
|
108
|
+
def comment(self) -> Optional[str]:
|
|
111
109
|
return None
|
|
112
110
|
|
|
113
111
|
def help_str(self) -> str:
|
|
114
|
-
docstring = self.
|
|
112
|
+
docstring = self.comment()
|
|
115
113
|
display = self.display_name + str(self.signatures[0])
|
|
116
114
|
if docstring is None:
|
|
117
115
|
return display
|
|
@@ -155,7 +153,7 @@ class Function(ABC):
|
|
|
155
153
|
"""
|
|
156
154
|
raise NotImplementedError()
|
|
157
155
|
|
|
158
|
-
def __call__(self, *args: Any, **kwargs: Any) -> '
|
|
156
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
|
|
159
157
|
from pixeltable import exprs
|
|
160
158
|
|
|
161
159
|
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
@@ -246,7 +244,7 @@ class Function(ABC):
|
|
|
246
244
|
# `None` when any of its non-nullable inputs are `None`.
|
|
247
245
|
for arg_name, arg in bound_args.items():
|
|
248
246
|
param = self.signature.parameters[arg_name]
|
|
249
|
-
if param.kind in
|
|
247
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
250
248
|
continue
|
|
251
249
|
if arg.col_type.nullable and not param.col_type.nullable:
|
|
252
250
|
return_type = return_type.copy(nullable=True)
|
|
@@ -385,10 +383,10 @@ class Function(ABC):
|
|
|
385
383
|
else:
|
|
386
384
|
var = exprs.Variable(name, param.col_type)
|
|
387
385
|
bindings[name] = var
|
|
388
|
-
if args_ok and param.kind in
|
|
386
|
+
if args_ok and param.kind in (
|
|
389
387
|
inspect.Parameter.POSITIONAL_ONLY,
|
|
390
388
|
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
391
|
-
|
|
389
|
+
):
|
|
392
390
|
template_args.append(var)
|
|
393
391
|
else:
|
|
394
392
|
template_kwargs[name] = var
|
|
@@ -31,7 +31,7 @@ class FunctionRegistry:
|
|
|
31
31
|
cls._instance = FunctionRegistry()
|
|
32
32
|
return cls._instance
|
|
33
33
|
|
|
34
|
-
def __init__(self):
|
|
34
|
+
def __init__(self) -> None:
|
|
35
35
|
self.stored_fns_by_id: dict[UUID, Function] = {}
|
|
36
36
|
self.module_fns: dict[str, Function] = {} # fqn -> Function
|
|
37
37
|
self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
|