pixeltable 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +12 -14
- pixeltable/catalog/insertable_table.py +4 -7
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +64 -56
- pixeltable/catalog/table_version.py +42 -40
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +8 -7
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -4
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +5 -3
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -20,16 +20,84 @@ class JsonMapper(Expr):
|
|
|
20
20
|
JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
|
|
21
21
|
The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
|
|
22
22
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
23
|
+
|
|
24
|
+
JsonMapper is executed in two phases:
|
|
25
|
+
- the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
|
|
26
|
+
list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
|
|
27
|
+
slot of JsonMapperDispatch)
|
|
28
|
+
- JsonMapper.eval() collects the slot values of the target expr into its result list
|
|
23
29
|
"""
|
|
24
30
|
|
|
25
31
|
target_expr_scope: ExprScope
|
|
26
32
|
parent_mapper: Optional[JsonMapper]
|
|
27
33
|
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
28
34
|
|
|
29
|
-
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
35
|
+
def __init__(self, src_expr: Optional[Expr], target_expr: Optional[Expr]):
|
|
30
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
31
37
|
super().__init__(ts.JsonType())
|
|
32
38
|
|
|
39
|
+
dispatch = JsonMapperDispatch(src_expr, target_expr)
|
|
40
|
+
self.components.append(dispatch)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def _src_expr(self) -> Expr:
|
|
48
|
+
return self.components[0].src_expr
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _target_expr(self) -> Expr:
|
|
52
|
+
return self.components[0].target_expr
|
|
53
|
+
|
|
54
|
+
def _equals(self, _: JsonMapper) -> bool:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
61
|
+
from ..exec.expr_eval.evaluators import NestedRowList
|
|
62
|
+
|
|
63
|
+
dispatch_slot_idx = self.components[0].slot_idx
|
|
64
|
+
nested_rows = data_row.vals[dispatch_slot_idx]
|
|
65
|
+
if nested_rows is None:
|
|
66
|
+
data_row[self.slot_idx] = None
|
|
67
|
+
return
|
|
68
|
+
assert isinstance(nested_rows, NestedRowList)
|
|
69
|
+
# TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
|
|
70
|
+
data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> dict:
|
|
73
|
+
"""
|
|
74
|
+
We only serialize src and target exprs, everything else is re-created at runtime.
|
|
75
|
+
"""
|
|
76
|
+
return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
|
|
80
|
+
assert len(components) == 2
|
|
81
|
+
src_expr, target_expr = components[0], components[1]
|
|
82
|
+
return cls(src_expr, target_expr)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class JsonMapperDispatch(Expr):
|
|
86
|
+
"""
|
|
87
|
+
An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
|
|
88
|
+
its input DataRows. It has the same dependencies as the originating JsonMapper.
|
|
89
|
+
|
|
90
|
+
- The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
|
|
91
|
+
- It stores a NestedRowList instance in its slot.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
target_expr_scope: ExprScope
|
|
95
|
+
parent_mapper: Optional[JsonMapperDispatch]
|
|
96
|
+
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
97
|
+
|
|
98
|
+
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
|
+
super().__init__(ts.InvalidType())
|
|
100
|
+
|
|
33
101
|
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
34
102
|
# this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
|
|
35
103
|
self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
|
|
@@ -40,28 +108,36 @@ class JsonMapper(Expr):
|
|
|
40
108
|
self.parent_mapper = None
|
|
41
109
|
self.target_expr_eval_ctx = None
|
|
42
110
|
|
|
43
|
-
# Intentionally create the id now, before adding the scope anchor; this ensures that
|
|
44
|
-
# be recognized as equal so long as they have the same src_expr and target_expr.
|
|
111
|
+
# Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
|
|
112
|
+
# instances will be recognized as equal so long as they have the same src_expr and target_expr.
|
|
45
113
|
# TODO: Might this cause problems after certain substitutions?
|
|
46
114
|
self.id = self._create_id()
|
|
47
115
|
|
|
48
116
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
49
117
|
self.components.append(scope_anchor)
|
|
50
118
|
|
|
51
|
-
def _bind_rel_paths(self, mapper: Optional[
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
119
|
+
def _bind_rel_paths(self, mapper: Optional[JsonMapperDispatch] = None) -> None:
|
|
120
|
+
self.src_expr._bind_rel_paths(mapper)
|
|
121
|
+
self.target_expr._bind_rel_paths(self)
|
|
54
122
|
self.parent_mapper = mapper
|
|
55
123
|
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
56
124
|
self.target_expr_scope.parent = parent_scope
|
|
57
125
|
|
|
126
|
+
def equals(self, other: Expr) -> bool:
|
|
127
|
+
"""
|
|
128
|
+
We override equals() because we need to avoid comparing our scope anchor.
|
|
129
|
+
"""
|
|
130
|
+
if type(self) is not type(other):
|
|
131
|
+
return False
|
|
132
|
+
return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
|
|
133
|
+
|
|
58
134
|
def scope(self) -> ExprScope:
|
|
59
135
|
# need to ignore target_expr
|
|
60
|
-
return self.
|
|
136
|
+
return self.src_expr.scope()
|
|
61
137
|
|
|
62
138
|
def dependencies(self) -> list[Expr]:
|
|
63
|
-
result = [self.
|
|
64
|
-
result.extend(self._target_dependencies(self.
|
|
139
|
+
result = [self.src_expr]
|
|
140
|
+
result.extend(self._target_dependencies(self.target_expr))
|
|
65
141
|
return result
|
|
66
142
|
|
|
67
143
|
def _target_dependencies(self, e: Expr) -> list[Expr]:
|
|
@@ -77,23 +153,12 @@ class JsonMapper(Expr):
|
|
|
77
153
|
result.extend(self._target_dependencies(c))
|
|
78
154
|
return result
|
|
79
155
|
|
|
80
|
-
def equals(self, other: Expr) -> bool:
|
|
81
|
-
"""
|
|
82
|
-
We override equals() because we need to avoid comparing our scope anchor.
|
|
83
|
-
"""
|
|
84
|
-
if type(self) is not type(other):
|
|
85
|
-
return False
|
|
86
|
-
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
87
|
-
|
|
88
|
-
def __repr__(self) -> str:
|
|
89
|
-
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
90
|
-
|
|
91
156
|
@property
|
|
92
|
-
def
|
|
157
|
+
def src_expr(self) -> Expr:
|
|
93
158
|
return self.components[0]
|
|
94
159
|
|
|
95
160
|
@property
|
|
96
|
-
def
|
|
161
|
+
def target_expr(self) -> Expr:
|
|
97
162
|
return self.components[1]
|
|
98
163
|
|
|
99
164
|
@property
|
|
@@ -104,37 +169,19 @@ class JsonMapper(Expr):
|
|
|
104
169
|
assert isinstance(result, ObjectRef)
|
|
105
170
|
return result
|
|
106
171
|
|
|
107
|
-
def
|
|
108
|
-
return
|
|
109
|
-
|
|
110
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
111
|
-
return None
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return 'JsonMapperDispatch()'
|
|
112
174
|
|
|
113
175
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
if not isinstance(src, list):
|
|
117
|
-
# invalid/non-list src path
|
|
118
|
-
data_row[self.slot_idx] = None
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
result = [None] * len(src)
|
|
122
|
-
if self.target_expr_eval_ctx is None:
|
|
123
|
-
self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
|
|
124
|
-
for i, val in enumerate(src):
|
|
125
|
-
data_row[self.scope_anchor.slot_idx] = val
|
|
126
|
-
# stored target_expr
|
|
127
|
-
row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
|
|
128
|
-
result[i] = data_row[self._target_expr.slot_idx]
|
|
129
|
-
data_row[self.slot_idx] = result
|
|
176
|
+
# eval is handled by JsonMapperDispatcher
|
|
177
|
+
raise AssertionError('this should never be called')
|
|
130
178
|
|
|
131
179
|
def _as_dict(self) -> dict:
|
|
132
180
|
"""
|
|
133
|
-
|
|
181
|
+
JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
|
|
134
182
|
"""
|
|
135
|
-
|
|
183
|
+
raise AssertionError('this should never be called')
|
|
136
184
|
|
|
137
185
|
@classmethod
|
|
138
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
139
|
-
|
|
140
|
-
return cls(components[0], components[1])
|
|
186
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
|
|
187
|
+
raise AssertionError('this should never be called')
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -11,7 +11,7 @@ from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
|
11
11
|
from .data_row import DataRow
|
|
12
12
|
from .expr import Expr
|
|
13
13
|
from .globals import print_slice
|
|
14
|
-
from .json_mapper import
|
|
14
|
+
from .json_mapper import JsonMapperDispatch
|
|
15
15
|
from .object_ref import ObjectRef
|
|
16
16
|
from .row_builder import RowBuilder
|
|
17
17
|
from .sql_element_cache import SqlElementCache
|
|
@@ -80,11 +80,10 @@ class JsonPath(Expr):
|
|
|
80
80
|
def is_relative_path(self) -> bool:
|
|
81
81
|
return self._anchor is None
|
|
82
82
|
|
|
83
|
-
@property
|
|
84
83
|
def _has_relative_path(self) -> bool:
|
|
85
|
-
return self.is_relative_path() or super()._has_relative_path
|
|
84
|
+
return self.is_relative_path() or super()._has_relative_path()
|
|
86
85
|
|
|
87
|
-
def _bind_rel_paths(self, mapper: Optional['
|
|
86
|
+
def _bind_rel_paths(self, mapper: Optional['JsonMapperDispatch'] = None) -> None:
|
|
88
87
|
if self.is_relative_path():
|
|
89
88
|
# TODO: take scope_idx into account
|
|
90
89
|
self.set_anchor(mapper.scope_anchor)
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -23,7 +23,7 @@ class MethodRef(Expr):
|
|
|
23
23
|
# TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
|
|
24
24
|
# converted to a `FunctionCall` by binding any remaining parameters).
|
|
25
25
|
|
|
26
|
-
def __init__(self, base_expr: Expr, method_name: str):
|
|
26
|
+
def __init__(self, base_expr: Expr, method_name: str) -> None:
|
|
27
27
|
super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
|
|
28
28
|
self.base_expr = base_expr
|
|
29
29
|
self.method_name = method_name
|
|
@@ -43,7 +43,7 @@ class MethodRef(Expr):
|
|
|
43
43
|
assert len(components) == 1
|
|
44
44
|
return cls(components[0], d['method_name'])
|
|
45
45
|
|
|
46
|
-
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
46
|
+
def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
|
|
47
47
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
48
48
|
assert isinstance(result, FunctionCall)
|
|
49
49
|
result.is_method_call = True
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr, ExprScope
|
|
11
|
-
from .json_mapper import
|
|
11
|
+
from .json_mapper import JsonMapperDispatch
|
|
12
12
|
from .row_builder import RowBuilder
|
|
13
13
|
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
@@ -19,7 +19,7 @@ class ObjectRef(Expr):
|
|
|
19
19
|
The object is generated/materialized elsewhere and establishes a new scope.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, scope: ExprScope, owner:
|
|
22
|
+
def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
|
|
23
23
|
# TODO: do we need an Unknown type after all?
|
|
24
24
|
super().__init__(ts.JsonType()) # JsonType: this could be anything
|
|
25
25
|
self._scope = scope
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -77,6 +77,8 @@ class RowBuilder:
|
|
|
77
77
|
transitive_dependents: np.ndarray # of bool
|
|
78
78
|
# dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
|
|
79
79
|
dependencies: np.ndarray # of bool
|
|
80
|
+
# num_dependencies[i] = number of direct dependencies of expr with slot idx i
|
|
81
|
+
num_dependencies: np.ndarray # of int
|
|
80
82
|
|
|
81
83
|
# records the output_expr that a subexpr belongs to
|
|
82
84
|
# (a subexpr can be shared across multiple output exprs)
|
|
@@ -209,6 +211,7 @@ class RowBuilder:
|
|
|
209
211
|
exc_dependencies[expr.slot_idx].add(d.slot_idx)
|
|
210
212
|
exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
|
|
211
213
|
|
|
214
|
+
self.num_dependencies = np.sum(self.dependencies, axis=1)
|
|
212
215
|
self.dependents = self.dependencies.T
|
|
213
216
|
self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
|
|
214
217
|
for i in reversed(range(self.num_materialized)):
|
|
@@ -275,8 +278,14 @@ class RowBuilder:
|
|
|
275
278
|
for d in e.dependencies():
|
|
276
279
|
self._record_output_expr_id(d, output_expr_id)
|
|
277
280
|
|
|
278
|
-
def _compute_dependencies(
|
|
279
|
-
|
|
281
|
+
def _compute_dependencies(
|
|
282
|
+
self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: Optional[ExprScope] = None
|
|
283
|
+
) -> list[int]:
|
|
284
|
+
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
|
|
285
|
+
|
|
286
|
+
If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
|
|
287
|
+
immediate dependents that aren't in target_scope, but doesn't resolve those).
|
|
288
|
+
"""
|
|
280
289
|
dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
281
290
|
# doing this front-to-back ensures that we capture transitive dependencies
|
|
282
291
|
max_target_slot_idx = max(target_slot_idxs)
|
|
@@ -289,6 +298,9 @@ class RowBuilder:
|
|
|
289
298
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
290
299
|
# this is input and therefore doesn't depend on other exprs
|
|
291
300
|
continue
|
|
301
|
+
if target_scope is not None and expr.scope() != target_scope:
|
|
302
|
+
# don't resolve dependencies outside of target_scope
|
|
303
|
+
continue
|
|
292
304
|
for d in expr.dependencies():
|
|
293
305
|
assert d.slot_idx is not None, f'{expr}, {d}'
|
|
294
306
|
if d.slot_idx in excluded_slot_idxs:
|
|
@@ -320,10 +332,15 @@ class RowBuilder:
|
|
|
320
332
|
for c in e.components:
|
|
321
333
|
self.__set_slot_idxs_aux(c)
|
|
322
334
|
|
|
323
|
-
def get_dependencies(
|
|
335
|
+
def get_dependencies(
|
|
336
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
337
|
+
) -> list[Expr]:
|
|
324
338
|
"""
|
|
325
339
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
326
340
|
The exprs given in 'exclude' are excluded.
|
|
341
|
+
If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
|
|
342
|
+
dependencies from enclosing scopes.
|
|
343
|
+
|
|
327
344
|
Returns:
|
|
328
345
|
list of Exprs from unique_exprs (= with slot_idx set)
|
|
329
346
|
"""
|
|
@@ -334,23 +351,33 @@ class RowBuilder:
|
|
|
334
351
|
return []
|
|
335
352
|
# make sure we only refer to recorded exprs
|
|
336
353
|
targets = [self.unique_exprs[e] for e in targets]
|
|
354
|
+
target_scope: Optional[ExprScope] = None
|
|
355
|
+
if limit_scope:
|
|
356
|
+
# make sure all targets are from the same scope
|
|
357
|
+
target_scopes = {e.scope() for e in targets}
|
|
358
|
+
assert len(target_scopes) == 1
|
|
359
|
+
target_scope = target_scopes.pop()
|
|
337
360
|
exclude = [self.unique_exprs[e] for e in exclude]
|
|
338
361
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
339
362
|
excluded_slot_idxs = [e.slot_idx for e in exclude]
|
|
340
|
-
all_dependencies = set(
|
|
363
|
+
all_dependencies = set(
|
|
364
|
+
self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
|
|
365
|
+
)
|
|
341
366
|
all_dependencies.update(target_slot_idxs)
|
|
342
367
|
result_ids = list(all_dependencies)
|
|
343
368
|
result_ids.sort()
|
|
344
369
|
return [self.unique_exprs[id] for id in result_ids]
|
|
345
370
|
|
|
346
|
-
def create_eval_ctx(
|
|
371
|
+
def create_eval_ctx(
|
|
372
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
373
|
+
) -> EvalCtx:
|
|
347
374
|
"""Return EvalCtx for targets"""
|
|
348
375
|
targets = list(targets)
|
|
349
376
|
if exclude is None:
|
|
350
377
|
exclude = []
|
|
351
378
|
if len(targets) == 0:
|
|
352
379
|
return self.EvalCtx([], [], [], [])
|
|
353
|
-
dependencies = self.get_dependencies(targets, exclude)
|
|
380
|
+
dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
|
|
354
381
|
targets = [self.unique_exprs[e] for e in targets]
|
|
355
382
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
356
383
|
ctx_slot_idxs = [e.slot_idx for e in dependencies]
|
|
@@ -47,7 +47,7 @@ class SimilarityExpr(Expr):
|
|
|
47
47
|
def __repr__(self) -> str:
|
|
48
48
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
49
49
|
|
|
50
|
-
def _id_attrs(self):
|
|
50
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
51
51
|
return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
|
|
52
52
|
|
|
53
53
|
def default_column_name(self) -> str:
|
|
@@ -17,7 +17,7 @@ class SqlElementCache:
|
|
|
17
17
|
for e, el in elements.items():
|
|
18
18
|
self.cache[e.id] = el
|
|
19
19
|
|
|
20
|
-
def extend(self, elements: ExprDict[sql.ColumnElement]):
|
|
20
|
+
def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
|
|
21
21
|
for e, el in elements.items():
|
|
22
22
|
self.cache[e.id] = el
|
|
23
23
|
|
pixeltable/exprs/string_op.py
CHANGED
|
@@ -26,7 +26,7 @@ class StringOp(Expr):
|
|
|
26
26
|
self.operator = operator
|
|
27
27
|
self.components = [op1, op2]
|
|
28
28
|
assert op1.col_type.is_string_type()
|
|
29
|
-
if operator in
|
|
29
|
+
if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
|
|
30
30
|
if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
|
|
31
31
|
raise excs.Error(
|
|
32
32
|
f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
|
|
@@ -89,7 +89,7 @@ class StringOp(Expr):
|
|
|
89
89
|
"""
|
|
90
90
|
Return the result of evaluating the expression on two int/float operands
|
|
91
91
|
"""
|
|
92
|
-
assert self.operator in
|
|
92
|
+
assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
|
|
93
93
|
if self.operator == StringOperator.CONCAT:
|
|
94
94
|
assert isinstance(op2_val, str)
|
|
95
95
|
return op1_val + op2_val
|
pixeltable/ext/__init__.py
CHANGED
|
@@ -252,7 +252,7 @@ def uda(
|
|
|
252
252
|
) -> Callable[[type[Aggregator]], AggregateFunction]: ...
|
|
253
253
|
|
|
254
254
|
|
|
255
|
-
def uda(*args, **kwargs):
|
|
255
|
+
def uda(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
256
256
|
"""Decorator for user-defined aggregate functions.
|
|
257
257
|
|
|
258
258
|
The decorated class must inherit from Aggregator and implement the following methods:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import inspect
|
|
5
4
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
|
|
6
5
|
from uuid import UUID
|
|
@@ -127,12 +126,10 @@ class CallableFunction(Function):
|
|
|
127
126
|
"""
|
|
128
127
|
assert self.is_batched
|
|
129
128
|
assert not self.is_polymorphic
|
|
129
|
+
assert not self.is_async
|
|
130
130
|
# Unpack the constant parameters
|
|
131
131
|
constant_kwargs, batched_kwargs = self.create_batch_kwargs(kwargs)
|
|
132
|
-
|
|
133
|
-
return asyncio.run(self.py_fn(*args, **constant_kwargs, **batched_kwargs))
|
|
134
|
-
else:
|
|
135
|
-
return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
|
|
132
|
+
return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
|
|
136
133
|
|
|
137
134
|
def create_batch_kwargs(self, kwargs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, list[Any]]]:
|
|
138
135
|
"""Converts kwargs containing lists into constant and batched kwargs in the format expected by a batched udf."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Any, Optional, Sequence
|
|
2
2
|
|
|
3
|
-
from pixeltable import exceptions as excs, exprs
|
|
3
|
+
from pixeltable import exceptions as excs, exprs, type_system as ts
|
|
4
4
|
|
|
5
5
|
from .function import Function
|
|
6
6
|
from .signature import Signature
|
|
@@ -76,9 +76,25 @@ class ExprTemplateFunction(Function):
|
|
|
76
76
|
arg_expr = arg
|
|
77
77
|
arg_exprs[param_expr] = arg_expr
|
|
78
78
|
result = result.substitute(arg_exprs)
|
|
79
|
-
assert not result._contains(exprs.Variable)
|
|
80
79
|
return result
|
|
81
80
|
|
|
81
|
+
def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
|
|
82
|
+
"""
|
|
83
|
+
The call_return_type of an ExprTemplateFunction is derived from the template expression's col_type after
|
|
84
|
+
substitution (unlike for UDFs, whose call_return_type is derived from an explicitly specified
|
|
85
|
+
conditional_return_type).
|
|
86
|
+
"""
|
|
87
|
+
assert not self.is_polymorphic
|
|
88
|
+
template = self.template
|
|
89
|
+
with_defaults = bound_args.copy()
|
|
90
|
+
with_defaults.update(
|
|
91
|
+
{param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
|
|
92
|
+
)
|
|
93
|
+
substituted_expr = self.template.expr.copy().substitute(
|
|
94
|
+
{template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
95
|
+
)
|
|
96
|
+
return substituted_expr.col_type
|
|
97
|
+
|
|
82
98
|
def _docstring(self) -> Optional[str]:
|
|
83
99
|
if isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
84
100
|
return self.templates[0].expr.fn._docstring()
|
|
@@ -97,6 +113,10 @@ class ExprTemplateFunction(Function):
|
|
|
97
113
|
|
|
98
114
|
@property
|
|
99
115
|
def display_name(self) -> str:
|
|
116
|
+
if not self.self_name and isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
117
|
+
# In the common case where the templated expression is itself a FunctionCall,
|
|
118
|
+
# fall back on the display name of the underlying FunctionCall
|
|
119
|
+
return self.templates[0].expr.fn.display_name
|
|
100
120
|
return self.self_name
|
|
101
121
|
|
|
102
122
|
@property
|
pixeltable/func/function.py
CHANGED
|
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
|
|
|
10
10
|
import sqlalchemy as sql
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
|
-
import pixeltable as pxt
|
|
14
13
|
import pixeltable.exceptions as excs
|
|
15
14
|
import pixeltable.type_system as ts
|
|
16
15
|
|
|
@@ -155,7 +154,7 @@ class Function(ABC):
|
|
|
155
154
|
"""
|
|
156
155
|
raise NotImplementedError()
|
|
157
156
|
|
|
158
|
-
def __call__(self, *args: Any, **kwargs: Any) -> '
|
|
157
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
|
|
159
158
|
from pixeltable import exprs
|
|
160
159
|
|
|
161
160
|
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
@@ -246,7 +245,7 @@ class Function(ABC):
|
|
|
246
245
|
# `None` when any of its non-nullable inputs are `None`.
|
|
247
246
|
for arg_name, arg in bound_args.items():
|
|
248
247
|
param = self.signature.parameters[arg_name]
|
|
249
|
-
if param.kind in
|
|
248
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
250
249
|
continue
|
|
251
250
|
if arg.col_type.nullable and not param.col_type.nullable:
|
|
252
251
|
return_type = return_type.copy(nullable=True)
|
|
@@ -385,10 +384,10 @@ class Function(ABC):
|
|
|
385
384
|
else:
|
|
386
385
|
var = exprs.Variable(name, param.col_type)
|
|
387
386
|
bindings[name] = var
|
|
388
|
-
if args_ok and param.kind in
|
|
387
|
+
if args_ok and param.kind in (
|
|
389
388
|
inspect.Parameter.POSITIONAL_ONLY,
|
|
390
389
|
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
391
|
-
|
|
390
|
+
):
|
|
392
391
|
template_args.append(var)
|
|
393
392
|
else:
|
|
394
393
|
template_kwargs[name] = var
|
|
@@ -31,7 +31,7 @@ class FunctionRegistry:
|
|
|
31
31
|
cls._instance = FunctionRegistry()
|
|
32
32
|
return cls._instance
|
|
33
33
|
|
|
34
|
-
def __init__(self):
|
|
34
|
+
def __init__(self) -> None:
|
|
35
35
|
self.stored_fns_by_id: dict[UUID, Function] = {}
|
|
36
36
|
self.module_fns: dict[str, Function] = {} # fqn -> Function
|
|
37
37
|
self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
|
pixeltable/func/signature.py
CHANGED
|
@@ -253,7 +253,7 @@ class Signature:
|
|
|
253
253
|
continue # skip 'self' or 'cls' parameter
|
|
254
254
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
255
255
|
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
256
|
-
if param.kind in
|
|
256
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
257
257
|
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
258
258
|
continue
|
|
259
259
|
|
pixeltable/func/udf.py
CHANGED
|
@@ -43,7 +43,7 @@ def udf(
|
|
|
43
43
|
) -> ExprTemplateFunction: ...
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def udf(*args, **kwargs):
|
|
46
|
+
def udf(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
47
47
|
"""A decorator to create a Function from a function definition.
|
|
48
48
|
|
|
49
49
|
Examples:
|
|
@@ -79,7 +79,7 @@ def udf(*args, **kwargs):
|
|
|
79
79
|
if len(args) > 0:
|
|
80
80
|
raise excs.Error('Unexpected @udf decorator arguments.')
|
|
81
81
|
|
|
82
|
-
def decorator(decorated_fn: Callable):
|
|
82
|
+
def decorator(decorated_fn: Callable) -> CallableFunction:
|
|
83
83
|
return make_function(
|
|
84
84
|
decorated_fn,
|
|
85
85
|
batch_size=batch_size,
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -39,7 +39,7 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
42
|
-
def __init__(self):
|
|
42
|
+
def __init__(self) -> None:
|
|
43
43
|
super().__init__(self._get_request_resources)
|
|
44
44
|
|
|
45
45
|
def _get_request_resources(self, messages: dict, max_tokens: int) -> dict[str, int]:
|
|
@@ -236,5 +236,5 @@ def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
|
|
|
236
236
|
__all__ = local_public_names(__name__)
|
|
237
237
|
|
|
238
238
|
|
|
239
|
-
def __dir__():
|
|
239
|
+
def __dir__() -> list[str]:
|
|
240
240
|
return __all__
|
pixeltable/functions/audio.py
CHANGED
pixeltable/functions/deepseek.py
CHANGED