pixeltable 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +11 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +28 -14
- pixeltable/catalog/insertable_table.py +81 -43
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +140 -109
- pixeltable/catalog/table_version.py +60 -43
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +17 -9
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +109 -43
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +2 -3
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +3 -2
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +39 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +51 -21
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -10
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +6 -21
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +22 -65
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +2 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +22 -11
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +85 -33
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +8 -5
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +35 -48
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +1 -1
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.9.dist-info/METADATA +0 -382
- pixeltable-0.3.9.dist-info/RECORD +0 -175
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
|
@@ -205,6 +205,10 @@ class FunctionCall(Expr):
|
|
|
205
205
|
def has_group_by(self) -> bool:
|
|
206
206
|
return self.group_by_stop_idx != 0
|
|
207
207
|
|
|
208
|
+
@property
|
|
209
|
+
def is_async(self) -> bool:
|
|
210
|
+
return self.fn.is_async
|
|
211
|
+
|
|
208
212
|
@property
|
|
209
213
|
def group_by(self) -> list[Expr]:
|
|
210
214
|
return self.components[self.group_by_start_idx : self.group_by_stop_idx]
|
|
@@ -272,6 +276,34 @@ class FunctionCall(Expr):
|
|
|
272
276
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
273
277
|
self.aggregator = self.fn.agg_class(**self.agg_init_args)
|
|
274
278
|
|
|
279
|
+
@property
|
|
280
|
+
def bound_args(self) -> dict[str, Expr]:
|
|
281
|
+
"""
|
|
282
|
+
Reconstructs bound arguments from the components of this FunctionCall.
|
|
283
|
+
"""
|
|
284
|
+
bound_args: dict[str, Expr] = {}
|
|
285
|
+
for name, idx in self.bound_idxs.items():
|
|
286
|
+
if isinstance(idx, int):
|
|
287
|
+
bound_args[name] = self.components[idx]
|
|
288
|
+
elif isinstance(idx, Sequence):
|
|
289
|
+
bound_args[name] = Expr.from_object([self.components[i] for i in idx])
|
|
290
|
+
elif isinstance(idx, dict):
|
|
291
|
+
bound_args[name] = Expr.from_object({k: self.components[i] for k, i in idx.items()})
|
|
292
|
+
else:
|
|
293
|
+
raise AssertionError(f'{name}: {idx} (of type `{type(idx)}`)')
|
|
294
|
+
return bound_args
|
|
295
|
+
|
|
296
|
+
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
297
|
+
"""
|
|
298
|
+
Substitution of FunctionCall arguments could cause the return value to become more specific, in the case
|
|
299
|
+
where a variable is replaced with a specific value.
|
|
300
|
+
"""
|
|
301
|
+
res = super().substitute(spec)
|
|
302
|
+
assert res is self
|
|
303
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
304
|
+
self.col_type = self.return_type
|
|
305
|
+
return self
|
|
306
|
+
|
|
275
307
|
def update(self, data_row: DataRow) -> None:
|
|
276
308
|
"""
|
|
277
309
|
Update agg state
|
|
@@ -289,7 +321,7 @@ class FunctionCall(Expr):
|
|
|
289
321
|
if (
|
|
290
322
|
val is None
|
|
291
323
|
and parameters_by_pos[idx].kind
|
|
292
|
-
in
|
|
324
|
+
in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
293
325
|
and not parameters_by_pos[idx].col_type.nullable
|
|
294
326
|
):
|
|
295
327
|
return None
|
|
@@ -302,7 +334,7 @@ class FunctionCall(Expr):
|
|
|
302
334
|
if (
|
|
303
335
|
val is None
|
|
304
336
|
and parameters[param_name].kind
|
|
305
|
-
in
|
|
337
|
+
in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
306
338
|
and not parameters[param_name].col_type.nullable
|
|
307
339
|
):
|
|
308
340
|
return None
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -87,3 +87,15 @@ class ArithmeticOperator(enum.Enum):
|
|
|
87
87
|
if self == self.FLOORDIV:
|
|
88
88
|
return '//'
|
|
89
89
|
raise AssertionError()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class StringOperator(enum.Enum):
|
|
93
|
+
CONCAT = 0
|
|
94
|
+
REPEAT = 1
|
|
95
|
+
|
|
96
|
+
def __str__(self) -> str:
|
|
97
|
+
if self == self.CONCAT:
|
|
98
|
+
return '+'
|
|
99
|
+
if self == self.REPEAT:
|
|
100
|
+
return '*'
|
|
101
|
+
raise AssertionError()
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -20,16 +20,84 @@ class JsonMapper(Expr):
|
|
|
20
20
|
JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
|
|
21
21
|
The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
|
|
22
22
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
23
|
+
|
|
24
|
+
JsonMapper is executed in two phases:
|
|
25
|
+
- the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
|
|
26
|
+
list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
|
|
27
|
+
slot of JsonMapperDispatch)
|
|
28
|
+
- JsonMapper.eval() collects the slot values of the target expr into its result list
|
|
23
29
|
"""
|
|
24
30
|
|
|
25
31
|
target_expr_scope: ExprScope
|
|
26
32
|
parent_mapper: Optional[JsonMapper]
|
|
27
33
|
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
28
34
|
|
|
29
|
-
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
35
|
+
def __init__(self, src_expr: Optional[Expr], target_expr: Optional[Expr]):
|
|
30
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
31
37
|
super().__init__(ts.JsonType())
|
|
32
38
|
|
|
39
|
+
dispatch = JsonMapperDispatch(src_expr, target_expr)
|
|
40
|
+
self.components.append(dispatch)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def _src_expr(self) -> Expr:
|
|
48
|
+
return self.components[0].src_expr
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _target_expr(self) -> Expr:
|
|
52
|
+
return self.components[0].target_expr
|
|
53
|
+
|
|
54
|
+
def _equals(self, _: JsonMapper) -> bool:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
61
|
+
from ..exec.expr_eval.evaluators import NestedRowList
|
|
62
|
+
|
|
63
|
+
dispatch_slot_idx = self.components[0].slot_idx
|
|
64
|
+
nested_rows = data_row.vals[dispatch_slot_idx]
|
|
65
|
+
if nested_rows is None:
|
|
66
|
+
data_row[self.slot_idx] = None
|
|
67
|
+
return
|
|
68
|
+
assert isinstance(nested_rows, NestedRowList)
|
|
69
|
+
# TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
|
|
70
|
+
data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> dict:
|
|
73
|
+
"""
|
|
74
|
+
We only serialize src and target exprs, everything else is re-created at runtime.
|
|
75
|
+
"""
|
|
76
|
+
return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
|
|
80
|
+
assert len(components) == 2
|
|
81
|
+
src_expr, target_expr = components[0], components[1]
|
|
82
|
+
return cls(src_expr, target_expr)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class JsonMapperDispatch(Expr):
|
|
86
|
+
"""
|
|
87
|
+
An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
|
|
88
|
+
its input DataRows. It has the same dependencies as the originating JsonMapper.
|
|
89
|
+
|
|
90
|
+
- The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
|
|
91
|
+
- It stores a NestedRowList instance in its slot.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
target_expr_scope: ExprScope
|
|
95
|
+
parent_mapper: Optional[JsonMapperDispatch]
|
|
96
|
+
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
97
|
+
|
|
98
|
+
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
|
+
super().__init__(ts.InvalidType())
|
|
100
|
+
|
|
33
101
|
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
34
102
|
# this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
|
|
35
103
|
self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
|
|
@@ -40,28 +108,36 @@ class JsonMapper(Expr):
|
|
|
40
108
|
self.parent_mapper = None
|
|
41
109
|
self.target_expr_eval_ctx = None
|
|
42
110
|
|
|
43
|
-
# Intentionally create the id now, before adding the scope anchor; this ensures that
|
|
44
|
-
# be recognized as equal so long as they have the same src_expr and target_expr.
|
|
111
|
+
# Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
|
|
112
|
+
# instances will be recognized as equal so long as they have the same src_expr and target_expr.
|
|
45
113
|
# TODO: Might this cause problems after certain substitutions?
|
|
46
114
|
self.id = self._create_id()
|
|
47
115
|
|
|
48
116
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
49
117
|
self.components.append(scope_anchor)
|
|
50
118
|
|
|
51
|
-
def _bind_rel_paths(self, mapper: Optional[
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
119
|
+
def _bind_rel_paths(self, mapper: Optional[JsonMapperDispatch] = None) -> None:
|
|
120
|
+
self.src_expr._bind_rel_paths(mapper)
|
|
121
|
+
self.target_expr._bind_rel_paths(self)
|
|
54
122
|
self.parent_mapper = mapper
|
|
55
123
|
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
56
124
|
self.target_expr_scope.parent = parent_scope
|
|
57
125
|
|
|
126
|
+
def equals(self, other: Expr) -> bool:
|
|
127
|
+
"""
|
|
128
|
+
We override equals() because we need to avoid comparing our scope anchor.
|
|
129
|
+
"""
|
|
130
|
+
if type(self) is not type(other):
|
|
131
|
+
return False
|
|
132
|
+
return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
|
|
133
|
+
|
|
58
134
|
def scope(self) -> ExprScope:
|
|
59
135
|
# need to ignore target_expr
|
|
60
|
-
return self.
|
|
136
|
+
return self.src_expr.scope()
|
|
61
137
|
|
|
62
138
|
def dependencies(self) -> list[Expr]:
|
|
63
|
-
result = [self.
|
|
64
|
-
result.extend(self._target_dependencies(self.
|
|
139
|
+
result = [self.src_expr]
|
|
140
|
+
result.extend(self._target_dependencies(self.target_expr))
|
|
65
141
|
return result
|
|
66
142
|
|
|
67
143
|
def _target_dependencies(self, e: Expr) -> list[Expr]:
|
|
@@ -77,23 +153,12 @@ class JsonMapper(Expr):
|
|
|
77
153
|
result.extend(self._target_dependencies(c))
|
|
78
154
|
return result
|
|
79
155
|
|
|
80
|
-
def equals(self, other: Expr) -> bool:
|
|
81
|
-
"""
|
|
82
|
-
We override equals() because we need to avoid comparing our scope anchor.
|
|
83
|
-
"""
|
|
84
|
-
if type(self) is not type(other):
|
|
85
|
-
return False
|
|
86
|
-
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
87
|
-
|
|
88
|
-
def __repr__(self) -> str:
|
|
89
|
-
return f'{self._src_expr} >> {self._target_expr}'
|
|
90
|
-
|
|
91
156
|
@property
|
|
92
|
-
def
|
|
157
|
+
def src_expr(self) -> Expr:
|
|
93
158
|
return self.components[0]
|
|
94
159
|
|
|
95
160
|
@property
|
|
96
|
-
def
|
|
161
|
+
def target_expr(self) -> Expr:
|
|
97
162
|
return self.components[1]
|
|
98
163
|
|
|
99
164
|
@property
|
|
@@ -104,37 +169,19 @@ class JsonMapper(Expr):
|
|
|
104
169
|
assert isinstance(result, ObjectRef)
|
|
105
170
|
return result
|
|
106
171
|
|
|
107
|
-
def
|
|
108
|
-
return
|
|
109
|
-
|
|
110
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
111
|
-
return None
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return 'JsonMapperDispatch()'
|
|
112
174
|
|
|
113
175
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
if not isinstance(src, list):
|
|
117
|
-
# invalid/non-list src path
|
|
118
|
-
data_row[self.slot_idx] = None
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
result = [None] * len(src)
|
|
122
|
-
if self.target_expr_eval_ctx is None:
|
|
123
|
-
self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
|
|
124
|
-
for i, val in enumerate(src):
|
|
125
|
-
data_row[self.scope_anchor.slot_idx] = val
|
|
126
|
-
# stored target_expr
|
|
127
|
-
row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
|
|
128
|
-
result[i] = data_row[self._target_expr.slot_idx]
|
|
129
|
-
data_row[self.slot_idx] = result
|
|
176
|
+
# eval is handled by JsonMapperDispatcher
|
|
177
|
+
raise AssertionError('this should never be called')
|
|
130
178
|
|
|
131
179
|
def _as_dict(self) -> dict:
|
|
132
180
|
"""
|
|
133
|
-
|
|
181
|
+
JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
|
|
134
182
|
"""
|
|
135
|
-
|
|
183
|
+
raise AssertionError('this should never be called')
|
|
136
184
|
|
|
137
185
|
@classmethod
|
|
138
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
139
|
-
|
|
140
|
-
return cls(components[0], components[1])
|
|
186
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
|
|
187
|
+
raise AssertionError('this should never be called')
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -11,7 +11,7 @@ from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
|
11
11
|
from .data_row import DataRow
|
|
12
12
|
from .expr import Expr
|
|
13
13
|
from .globals import print_slice
|
|
14
|
-
from .json_mapper import
|
|
14
|
+
from .json_mapper import JsonMapperDispatch
|
|
15
15
|
from .object_ref import ObjectRef
|
|
16
16
|
from .row_builder import RowBuilder
|
|
17
17
|
from .sql_element_cache import SqlElementCache
|
|
@@ -80,11 +80,10 @@ class JsonPath(Expr):
|
|
|
80
80
|
def is_relative_path(self) -> bool:
|
|
81
81
|
return self._anchor is None
|
|
82
82
|
|
|
83
|
-
@property
|
|
84
83
|
def _has_relative_path(self) -> bool:
|
|
85
|
-
return self.is_relative_path() or super()._has_relative_path
|
|
84
|
+
return self.is_relative_path() or super()._has_relative_path()
|
|
86
85
|
|
|
87
|
-
def _bind_rel_paths(self, mapper: Optional['
|
|
86
|
+
def _bind_rel_paths(self, mapper: Optional['JsonMapperDispatch'] = None) -> None:
|
|
88
87
|
if self.is_relative_path():
|
|
89
88
|
# TODO: take scope_idx into account
|
|
90
89
|
self.set_anchor(mapper.scope_anchor)
|
|
@@ -110,12 +109,6 @@ class JsonPath(Expr):
|
|
|
110
109
|
return JsonPath(self._anchor, [*self.path_elements, index])
|
|
111
110
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
112
111
|
|
|
113
|
-
def __rshift__(self, other: object) -> 'JsonMapper':
|
|
114
|
-
rhs_expr = Expr.from_object(other)
|
|
115
|
-
if rhs_expr is None:
|
|
116
|
-
raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
|
|
117
|
-
return JsonMapper(self, rhs_expr)
|
|
118
|
-
|
|
119
112
|
def default_column_name(self) -> Optional[str]:
|
|
120
113
|
anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
|
|
121
114
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -23,7 +23,7 @@ class MethodRef(Expr):
|
|
|
23
23
|
# TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
|
|
24
24
|
# converted to a `FunctionCall` by binding any remaining parameters).
|
|
25
25
|
|
|
26
|
-
def __init__(self, base_expr: Expr, method_name: str):
|
|
26
|
+
def __init__(self, base_expr: Expr, method_name: str) -> None:
|
|
27
27
|
super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
|
|
28
28
|
self.base_expr = base_expr
|
|
29
29
|
self.method_name = method_name
|
|
@@ -43,7 +43,7 @@ class MethodRef(Expr):
|
|
|
43
43
|
assert len(components) == 1
|
|
44
44
|
return cls(components[0], d['method_name'])
|
|
45
45
|
|
|
46
|
-
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
46
|
+
def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
|
|
47
47
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
48
48
|
assert isinstance(result, FunctionCall)
|
|
49
49
|
result.is_method_call = True
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr, ExprScope
|
|
11
|
-
from .json_mapper import
|
|
11
|
+
from .json_mapper import JsonMapperDispatch
|
|
12
12
|
from .row_builder import RowBuilder
|
|
13
13
|
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
@@ -19,7 +19,7 @@ class ObjectRef(Expr):
|
|
|
19
19
|
The object is generated/materialized elsewhere and establishes a new scope.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, scope: ExprScope, owner:
|
|
22
|
+
def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
|
|
23
23
|
# TODO: do we need an Unknown type after all?
|
|
24
24
|
super().__init__(ts.JsonType()) # JsonType: this could be anything
|
|
25
25
|
self._scope = scope
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -77,6 +77,8 @@ class RowBuilder:
|
|
|
77
77
|
transitive_dependents: np.ndarray # of bool
|
|
78
78
|
# dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
|
|
79
79
|
dependencies: np.ndarray # of bool
|
|
80
|
+
# num_dependencies[i] = number of direct dependencies of expr with slot idx i
|
|
81
|
+
num_dependencies: np.ndarray # of int
|
|
80
82
|
|
|
81
83
|
# records the output_expr that a subexpr belongs to
|
|
82
84
|
# (a subexpr can be shared across multiple output exprs)
|
|
@@ -209,6 +211,7 @@ class RowBuilder:
|
|
|
209
211
|
exc_dependencies[expr.slot_idx].add(d.slot_idx)
|
|
210
212
|
exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
|
|
211
213
|
|
|
214
|
+
self.num_dependencies = np.sum(self.dependencies, axis=1)
|
|
212
215
|
self.dependents = self.dependencies.T
|
|
213
216
|
self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
|
|
214
217
|
for i in reversed(range(self.num_materialized)):
|
|
@@ -275,8 +278,14 @@ class RowBuilder:
|
|
|
275
278
|
for d in e.dependencies():
|
|
276
279
|
self._record_output_expr_id(d, output_expr_id)
|
|
277
280
|
|
|
278
|
-
def _compute_dependencies(
|
|
279
|
-
|
|
281
|
+
def _compute_dependencies(
|
|
282
|
+
self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: Optional[ExprScope] = None
|
|
283
|
+
) -> list[int]:
|
|
284
|
+
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
|
|
285
|
+
|
|
286
|
+
If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
|
|
287
|
+
immediate dependents that aren't in target_scope, but doesn't resolve those).
|
|
288
|
+
"""
|
|
280
289
|
dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
281
290
|
# doing this front-to-back ensures that we capture transitive dependencies
|
|
282
291
|
max_target_slot_idx = max(target_slot_idxs)
|
|
@@ -289,6 +298,9 @@ class RowBuilder:
|
|
|
289
298
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
290
299
|
# this is input and therefore doesn't depend on other exprs
|
|
291
300
|
continue
|
|
301
|
+
if target_scope is not None and expr.scope() != target_scope:
|
|
302
|
+
# don't resolve dependencies outside of target_scope
|
|
303
|
+
continue
|
|
292
304
|
for d in expr.dependencies():
|
|
293
305
|
assert d.slot_idx is not None, f'{expr}, {d}'
|
|
294
306
|
if d.slot_idx in excluded_slot_idxs:
|
|
@@ -320,10 +332,15 @@ class RowBuilder:
|
|
|
320
332
|
for c in e.components:
|
|
321
333
|
self.__set_slot_idxs_aux(c)
|
|
322
334
|
|
|
323
|
-
def get_dependencies(
|
|
335
|
+
def get_dependencies(
|
|
336
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
337
|
+
) -> list[Expr]:
|
|
324
338
|
"""
|
|
325
339
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
326
340
|
The exprs given in 'exclude' are excluded.
|
|
341
|
+
If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
|
|
342
|
+
dependencies from enclosing scopes.
|
|
343
|
+
|
|
327
344
|
Returns:
|
|
328
345
|
list of Exprs from unique_exprs (= with slot_idx set)
|
|
329
346
|
"""
|
|
@@ -334,23 +351,33 @@ class RowBuilder:
|
|
|
334
351
|
return []
|
|
335
352
|
# make sure we only refer to recorded exprs
|
|
336
353
|
targets = [self.unique_exprs[e] for e in targets]
|
|
354
|
+
target_scope: Optional[ExprScope] = None
|
|
355
|
+
if limit_scope:
|
|
356
|
+
# make sure all targets are from the same scope
|
|
357
|
+
target_scopes = {e.scope() for e in targets}
|
|
358
|
+
assert len(target_scopes) == 1
|
|
359
|
+
target_scope = target_scopes.pop()
|
|
337
360
|
exclude = [self.unique_exprs[e] for e in exclude]
|
|
338
361
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
339
362
|
excluded_slot_idxs = [e.slot_idx for e in exclude]
|
|
340
|
-
all_dependencies = set(
|
|
363
|
+
all_dependencies = set(
|
|
364
|
+
self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
|
|
365
|
+
)
|
|
341
366
|
all_dependencies.update(target_slot_idxs)
|
|
342
367
|
result_ids = list(all_dependencies)
|
|
343
368
|
result_ids.sort()
|
|
344
369
|
return [self.unique_exprs[id] for id in result_ids]
|
|
345
370
|
|
|
346
|
-
def create_eval_ctx(
|
|
371
|
+
def create_eval_ctx(
|
|
372
|
+
self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
|
|
373
|
+
) -> EvalCtx:
|
|
347
374
|
"""Return EvalCtx for targets"""
|
|
348
375
|
targets = list(targets)
|
|
349
376
|
if exclude is None:
|
|
350
377
|
exclude = []
|
|
351
378
|
if len(targets) == 0:
|
|
352
379
|
return self.EvalCtx([], [], [], [])
|
|
353
|
-
dependencies = self.get_dependencies(targets, exclude)
|
|
380
|
+
dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
|
|
354
381
|
targets = [self.unique_exprs[e] for e in targets]
|
|
355
382
|
target_slot_idxs = [e.slot_idx for e in targets]
|
|
356
383
|
ctx_slot_idxs = [e.slot_idx for e in dependencies]
|
|
@@ -23,26 +23,12 @@ class SimilarityExpr(Expr):
|
|
|
23
23
|
|
|
24
24
|
self.components = [col_ref, item_expr]
|
|
25
25
|
|
|
26
|
-
# determine index to use
|
|
27
|
-
idx_info = col_ref.col.get_idx_info()
|
|
28
26
|
from pixeltable import index
|
|
29
27
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
raise excs.Error(f'No index found for column {col_ref.col!r}')
|
|
35
|
-
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
36
|
-
raise excs.Error(f'Index {idx_name!r} not found for column {col_ref.col.name!r}')
|
|
37
|
-
if len(embedding_idx_info) > 1:
|
|
38
|
-
if idx_name is None:
|
|
39
|
-
raise excs.Error(
|
|
40
|
-
f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
41
|
-
f'`{col_ref.col.name}.similarity(..., idx=<name>)`'
|
|
42
|
-
)
|
|
43
|
-
self.idx_info = embedding_idx_info[idx_name]
|
|
44
|
-
else:
|
|
45
|
-
self.idx_info = next(iter(embedding_idx_info.values()))
|
|
28
|
+
# determine index to use
|
|
29
|
+
idx_dict = ColumnRef.find_embedding_index(col_ref.col, idx_name, 'similarity')
|
|
30
|
+
assert len(idx_dict) == 1
|
|
31
|
+
self.idx_info = next(iter(idx_dict.values()))
|
|
46
32
|
idx = self.idx_info.idx
|
|
47
33
|
assert isinstance(idx, index.EmbeddingIndex)
|
|
48
34
|
|
|
@@ -61,7 +47,7 @@ class SimilarityExpr(Expr):
|
|
|
61
47
|
def __repr__(self) -> str:
|
|
62
48
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
63
49
|
|
|
64
|
-
def _id_attrs(self):
|
|
50
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
65
51
|
return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
|
|
66
52
|
|
|
67
53
|
def default_column_name(self) -> str:
|
|
@@ -86,8 +72,7 @@ class SimilarityExpr(Expr):
|
|
|
86
72
|
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
87
73
|
|
|
88
74
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
89
|
-
|
|
90
|
-
raise AssertionError()
|
|
75
|
+
raise excs.Error('similarity(): cannot be used in a computed column')
|
|
91
76
|
|
|
92
77
|
def _as_dict(self) -> dict:
|
|
93
78
|
return {'idx_name': self.idx_info.name, **super()._as_dict()}
|
|
@@ -17,7 +17,7 @@ class SqlElementCache:
|
|
|
17
17
|
for e, el in elements.items():
|
|
18
18
|
self.cache[e.id] = el
|
|
19
19
|
|
|
20
|
-
def extend(self, elements: ExprDict[sql.ColumnElement]):
|
|
20
|
+
def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
|
|
21
21
|
for e, el in elements.items():
|
|
22
22
|
self.cache[e.id] = el
|
|
23
23
|
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
12
|
+
from .globals import StringOperator
|
|
13
|
+
from .row_builder import RowBuilder
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StringOp(Expr):
|
|
18
|
+
"""
|
|
19
|
+
Allows operations on strings
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
operator: StringOperator
|
|
23
|
+
|
|
24
|
+
def __init__(self, operator: StringOperator, op1: Expr, op2: Expr):
|
|
25
|
+
super().__init__(ts.StringType(nullable=op1.col_type.nullable))
|
|
26
|
+
self.operator = operator
|
|
27
|
+
self.components = [op1, op2]
|
|
28
|
+
assert op1.col_type.is_string_type()
|
|
29
|
+
if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
|
|
30
|
+
if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
|
|
31
|
+
raise excs.Error(
|
|
32
|
+
f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
|
|
33
|
+
)
|
|
34
|
+
if operator == StringOperator.REPEAT and not op2.col_type.is_int_type():
|
|
35
|
+
raise excs.Error(f'{self}: {operator} on strings requires int type, but {op2} has type {op2.col_type}')
|
|
36
|
+
else:
|
|
37
|
+
raise excs.Error(
|
|
38
|
+
f'{self}: invalid operation {operator} on strings; '
|
|
39
|
+
f'only operators {StringOperator.CONCAT} and {StringOperator.REPEAT} are supported'
|
|
40
|
+
)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def _op1(self) -> Expr:
|
|
45
|
+
return self.components[0]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def _op2(self) -> Expr:
|
|
49
|
+
return self.components[1]
|
|
50
|
+
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
# add parentheses around operands that are StringOpExpr to express precedence
|
|
53
|
+
op1_str = f'({self._op1})' if isinstance(self._op1, StringOp) else str(self._op1)
|
|
54
|
+
op2_str = f'({self._op2})' if isinstance(self._op2, StringOp) else str(self._op2)
|
|
55
|
+
return f'{op1_str} {self.operator} {op2_str}'
|
|
56
|
+
|
|
57
|
+
def _equals(self, other: StringOp) -> bool:
|
|
58
|
+
return self.operator == other.operator
|
|
59
|
+
|
|
60
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
|
+
|
|
63
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
64
|
+
left = sql_elements.get(self._op1)
|
|
65
|
+
right = sql_elements.get(self._op2)
|
|
66
|
+
if left is None or right is None:
|
|
67
|
+
return None
|
|
68
|
+
if self.operator == StringOperator.CONCAT:
|
|
69
|
+
return left.concat(right)
|
|
70
|
+
if self.operator == StringOperator.REPEAT:
|
|
71
|
+
return sql.func.repeat(sql.cast(left, sql.String), sql.cast(right, sql.Integer))
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
75
|
+
op1_val = data_row[self._op1.slot_idx]
|
|
76
|
+
op2_val = data_row[self._op2.slot_idx]
|
|
77
|
+
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
78
|
+
|
|
79
|
+
def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
|
|
80
|
+
"""
|
|
81
|
+
Return the result of evaluating the expression on two nullable int/float operands,
|
|
82
|
+
None is interpreted as SQL NULL
|
|
83
|
+
"""
|
|
84
|
+
if op1_val is None or op2_val is None:
|
|
85
|
+
return None
|
|
86
|
+
return self.eval_non_null(op1_val, op2_val)
|
|
87
|
+
|
|
88
|
+
def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Return the result of evaluating the expression on two int/float operands
|
|
91
|
+
"""
|
|
92
|
+
assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
|
|
93
|
+
if self.operator == StringOperator.CONCAT:
|
|
94
|
+
assert isinstance(op2_val, str)
|
|
95
|
+
return op1_val + op2_val
|
|
96
|
+
else:
|
|
97
|
+
assert isinstance(op2_val, int)
|
|
98
|
+
return op1_val * op2_val
|
|
99
|
+
|
|
100
|
+
def _as_dict(self) -> dict:
|
|
101
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> StringOp:
|
|
105
|
+
assert 'operator' in d
|
|
106
|
+
assert len(components) == 2
|
|
107
|
+
return cls(StringOperator(d['operator']), components[0], components[1])
|
pixeltable/ext/__init__.py
CHANGED