pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -87
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1085 -262
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -126
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.0.dist-info/LICENSE +18 -0
- pixeltable-0.2.0.dist-info/METADATA +117 -0
- pixeltable-0.2.0.dist-info/RECORD +125 -0
- {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.1.dist-info/METADATA +0 -31
- pixeltable-0.1.1.dist-info/RECORD +0 -36
pixeltable/exprs/expr.py
ADDED
|
@@ -0,0 +1,586 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import hashlib
|
|
5
|
+
import importlib
|
|
6
|
+
import inspect
|
|
7
|
+
import json
|
|
8
|
+
import sys
|
|
9
|
+
import typing
|
|
10
|
+
from itertools import islice
|
|
11
|
+
from typing import Union, Optional, List, Callable, Any, Dict, Tuple, Set, Generator, Type
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
import sqlalchemy as sql
|
|
15
|
+
|
|
16
|
+
import pixeltable
|
|
17
|
+
import pixeltable.catalog as catalog
|
|
18
|
+
import pixeltable.exceptions as excs
|
|
19
|
+
import pixeltable.type_system as ts
|
|
20
|
+
import pixeltable.func as func
|
|
21
|
+
from .data_row import DataRow
|
|
22
|
+
from .globals import ComparisonOperator, LogicalOperator, LiteralPythonTypes, ArithmeticOperator
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ExprScope:
|
|
26
|
+
"""
|
|
27
|
+
Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
|
|
28
|
+
parent is None: outermost scope
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, parent: Optional[ExprScope]):
|
|
31
|
+
self.parent = parent
|
|
32
|
+
|
|
33
|
+
def is_contained_in(self, other: ExprScope) -> bool:
|
|
34
|
+
if self == other:
|
|
35
|
+
return True
|
|
36
|
+
if self.parent is None:
|
|
37
|
+
return False
|
|
38
|
+
return self.parent.is_contained_in(other)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_GLOBAL_SCOPE = ExprScope(None)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Expr(abc.ABC):
|
|
45
|
+
"""
|
|
46
|
+
Rules for using state in subclasses:
|
|
47
|
+
- all state except for components and slot_idx is shared between copies of an Expr
|
|
48
|
+
- slot_idx is set during analysis (DataFrame.show())
|
|
49
|
+
- during eval(), components can only be accessed via self.components; any Exprs outside of that won't
|
|
50
|
+
have slot_idx set
|
|
51
|
+
"""
|
|
52
|
+
def __init__(self, col_type: ts.ColumnType):
|
|
53
|
+
self.col_type = col_type
|
|
54
|
+
|
|
55
|
+
# each instance has an id that is used for equality comparisons
|
|
56
|
+
# - set by the subclass's __init__()
|
|
57
|
+
# - produced by _create_id()
|
|
58
|
+
# - not expected to survive a serialize()/deserialize() roundtrip
|
|
59
|
+
self.id: Optional[int] = None
|
|
60
|
+
|
|
61
|
+
# index of the expr's value in the data row:
|
|
62
|
+
# - set for all materialized exprs
|
|
63
|
+
# - -1: not executable
|
|
64
|
+
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
65
|
+
self.slot_idx = -1
|
|
66
|
+
self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
|
|
67
|
+
|
|
68
|
+
def dependencies(self) -> List[Expr]:
|
|
69
|
+
"""
|
|
70
|
+
Returns all exprs that need to have been evaluated before eval() can be called on this one.
|
|
71
|
+
"""
|
|
72
|
+
return self.components
|
|
73
|
+
|
|
74
|
+
def scope(self) -> ExprScope:
|
|
75
|
+
# by default this is the innermost scope of any of our components
|
|
76
|
+
result = _GLOBAL_SCOPE
|
|
77
|
+
for c in self.components:
|
|
78
|
+
c_scope = c.scope()
|
|
79
|
+
if c_scope.is_contained_in(result):
|
|
80
|
+
result = c_scope
|
|
81
|
+
return result
|
|
82
|
+
|
|
83
|
+
def bind_rel_paths(self, mapper: Optional['pixeltable.exprs.JsonMapper'] = None) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Binds relative JsonPaths to mapper.
|
|
86
|
+
This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
|
|
87
|
+
by the immediately containing JsonMapper during initialization.
|
|
88
|
+
"""
|
|
89
|
+
for c in self.components:
|
|
90
|
+
c.bind_rel_paths(mapper)
|
|
91
|
+
|
|
92
|
+
def default_column_name(self) -> Optional[str]:
|
|
93
|
+
"""
|
|
94
|
+
Returns:
|
|
95
|
+
None if this expression lacks a default name,
|
|
96
|
+
or a valid identifier (according to catalog.is_valid_identifer) otherwise.
|
|
97
|
+
"""
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
def equals(self, other: Expr) -> bool:
|
|
101
|
+
"""
|
|
102
|
+
Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
|
|
103
|
+
"""
|
|
104
|
+
if type(self) != type(other):
|
|
105
|
+
return False
|
|
106
|
+
if len(self.components) != len(other.components):
|
|
107
|
+
return False
|
|
108
|
+
for i in range(len(self.components)):
|
|
109
|
+
if not self.components[i].equals(other.components[i]):
|
|
110
|
+
return False
|
|
111
|
+
return self._equals(other)
|
|
112
|
+
|
|
113
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
114
|
+
"""Returns attribute name/value pairs that are used to construct the instance id.
|
|
115
|
+
|
|
116
|
+
Attribute values must be immutable and have str() defined.
|
|
117
|
+
"""
|
|
118
|
+
return [('classname', self.__class__.__name__)]
|
|
119
|
+
|
|
120
|
+
def _create_id(self) -> int:
|
|
121
|
+
hasher = hashlib.sha256()
|
|
122
|
+
for attr, value in self._id_attrs():
|
|
123
|
+
hasher.update(attr.encode('utf-8'))
|
|
124
|
+
hasher.update(str(value).encode('utf-8'))
|
|
125
|
+
for expr in self.components:
|
|
126
|
+
hasher.update(str(expr.id).encode('utf-8'))
|
|
127
|
+
# truncate to machine's word size
|
|
128
|
+
return int(hasher.hexdigest(), 16) & sys.maxsize
|
|
129
|
+
|
|
130
|
+
def __hash__(self) -> int:
|
|
131
|
+
assert self.id is not None
|
|
132
|
+
return self.id
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def list_equals(cls, a: List[Expr], b: List[Expr]) -> bool:
|
|
136
|
+
if len(a) != len(b):
|
|
137
|
+
return False
|
|
138
|
+
for i in range(len(a)):
|
|
139
|
+
if not a[i].equals(b[i]):
|
|
140
|
+
return False
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
def copy(self) -> Expr:
|
|
144
|
+
"""
|
|
145
|
+
Creates a copy that can be evaluated separately: it doesn't share any eval context (slot_idx)
|
|
146
|
+
but shares everything else (catalog objects, etc.)
|
|
147
|
+
"""
|
|
148
|
+
cls = self.__class__
|
|
149
|
+
result = cls.__new__(cls)
|
|
150
|
+
result.__dict__.update(self.__dict__)
|
|
151
|
+
result.slot_idx = -1
|
|
152
|
+
result.components = [c.copy() for c in self.components]
|
|
153
|
+
return result
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def copy_list(cls, expr_list: List[Expr]) -> List[Expr]:
|
|
157
|
+
return [e.copy() for e in expr_list]
|
|
158
|
+
|
|
159
|
+
def __deepcopy__(self, memo=None) -> Expr:
|
|
160
|
+
# we don't need to create an actual deep copy because all state other than execution state is read-only
|
|
161
|
+
if memo is None:
|
|
162
|
+
memo = {}
|
|
163
|
+
result = self.copy()
|
|
164
|
+
memo[id(self)] = result
|
|
165
|
+
return result
|
|
166
|
+
|
|
167
|
+
def substitute(self, old: Expr, new: Expr) -> Expr:
|
|
168
|
+
"""
|
|
169
|
+
Replace 'old' with 'new' recursively.
|
|
170
|
+
"""
|
|
171
|
+
if self.equals(old):
|
|
172
|
+
return new.copy()
|
|
173
|
+
for i in range(len(self.components)):
|
|
174
|
+
self.components[i] = self.components[i].substitute(old, new)
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
|
|
178
|
+
"""
|
|
179
|
+
Recursively replace ColRefs to unstored computed columns with their value exprs.
|
|
180
|
+
Also replaces references to stored computed columns in resolve_cols.
|
|
181
|
+
"""
|
|
182
|
+
from .expr_set import ExprSet
|
|
183
|
+
from .column_ref import ColumnRef
|
|
184
|
+
if resolve_cols is None:
|
|
185
|
+
resolve_cols = set()
|
|
186
|
+
result = self
|
|
187
|
+
while True:
|
|
188
|
+
target_col_refs = ExprSet([
|
|
189
|
+
e for e in result.subexprs()
|
|
190
|
+
if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
|
|
191
|
+
])
|
|
192
|
+
if len(target_col_refs) == 0:
|
|
193
|
+
return result
|
|
194
|
+
for ref in target_col_refs:
|
|
195
|
+
assert ref.col.value_expr is not None
|
|
196
|
+
result = result.substitute(ref, ref.col.value_expr)
|
|
197
|
+
|
|
198
|
+
def is_bound_by(self, tbl: catalog.TableVersionPath) -> bool:
|
|
199
|
+
"""Returns True if this expr can be evaluated in the context of tbl."""
|
|
200
|
+
from .column_ref import ColumnRef
|
|
201
|
+
col_refs = self.subexprs(ColumnRef)
|
|
202
|
+
for col_ref in col_refs:
|
|
203
|
+
if not tbl.has_column(col_ref.col):
|
|
204
|
+
return False
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
def retarget(self, tbl: catalog.TableVersionPath) -> Expr:
|
|
208
|
+
"""Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
|
|
209
|
+
tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
|
|
210
|
+
return self._retarget(tbl_versions)
|
|
211
|
+
|
|
212
|
+
def _retarget(self, tbl_versions: Dict[UUID, catalog.TableVersion]) -> Expr:
|
|
213
|
+
from .column_ref import ColumnRef
|
|
214
|
+
if isinstance(self, ColumnRef):
|
|
215
|
+
target = tbl_versions[self.col.tbl.id]
|
|
216
|
+
assert self.col.id in target.cols_by_id
|
|
217
|
+
col = target.cols_by_id[self.col.id]
|
|
218
|
+
return ColumnRef(col)
|
|
219
|
+
for i in range (len(self.components)):
|
|
220
|
+
self.components[i] = self.components[i]._retarget(tbl_versions)
|
|
221
|
+
return self
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def list_substitute(cls, expr_list: List[Expr], old: Expr, new: Expr) -> None:
|
|
225
|
+
for i in range(len(expr_list)):
|
|
226
|
+
expr_list[i] = expr_list[i].substitute(old, new)
|
|
227
|
+
|
|
228
|
+
@abc.abstractmethod
|
|
229
|
+
def __str__(self) -> str:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
def display_str(self, inline: bool = True) -> str:
|
|
233
|
+
"""
|
|
234
|
+
inline: if False, use line breaks where appropriate; otherwise don't use linebreaks
|
|
235
|
+
"""
|
|
236
|
+
return str(self)
|
|
237
|
+
|
|
238
|
+
@classmethod
|
|
239
|
+
def print_list(cls, expr_list: List[Expr]) -> str:
|
|
240
|
+
if len(expr_list) == 1:
|
|
241
|
+
return str(expr_list[0])
|
|
242
|
+
return f'({", ".join([str(e) for e in expr_list])})'
|
|
243
|
+
|
|
244
|
+
def subexprs(
|
|
245
|
+
self, expr_class: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None,
|
|
246
|
+
traverse_matches: bool = True
|
|
247
|
+
) -> Generator[Expr, None, None]:
|
|
248
|
+
"""
|
|
249
|
+
Iterate over all subexprs, including self.
|
|
250
|
+
"""
|
|
251
|
+
assert expr_class is None or filter is None # at most one of them
|
|
252
|
+
if expr_class is not None:
|
|
253
|
+
filter = lambda e: isinstance(e, expr_class)
|
|
254
|
+
is_match = filter is None or filter(self)
|
|
255
|
+
if not is_match or traverse_matches:
|
|
256
|
+
for c in self.components:
|
|
257
|
+
yield from c.subexprs(filter=filter, traverse_matches=traverse_matches)
|
|
258
|
+
if is_match:
|
|
259
|
+
yield self
|
|
260
|
+
|
|
261
|
+
def contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
|
|
262
|
+
"""
|
|
263
|
+
Returns True if any subexpr is an instance of cls.
|
|
264
|
+
"""
|
|
265
|
+
assert (cls is not None) != (filter is not None) # need one of them
|
|
266
|
+
if cls is not None:
|
|
267
|
+
filter = lambda e: isinstance(e, cls)
|
|
268
|
+
try:
|
|
269
|
+
_ = next(self.subexprs(filter=filter, traverse_matches=False))
|
|
270
|
+
return True
|
|
271
|
+
except StopIteration:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
@classmethod
|
|
275
|
+
def list_subexprs(
|
|
276
|
+
cls, expr_list: List[Expr], expr_class: Optional[Type[Expr]] = None,
|
|
277
|
+
filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
278
|
+
) -> Generator[Expr, None, None]:
|
|
279
|
+
"""Produce subexprs for all exprs in list. Can contain duplicates."""
|
|
280
|
+
for e in expr_list:
|
|
281
|
+
yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
|
|
282
|
+
|
|
283
|
+
def tbl_ids(self) -> Set[UUID]:
|
|
284
|
+
"""Returns table ids referenced by this expr."""
|
|
285
|
+
from .column_ref import ColumnRef
|
|
286
|
+
from .rowid_ref import RowidRef
|
|
287
|
+
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
288
|
+
|
|
289
|
+
@classmethod
|
|
290
|
+
def list_tbl_ids(cls, expr_list: List[Expr]) -> Set[UUID]:
|
|
291
|
+
ids: Set[UUID] = set()
|
|
292
|
+
for e in expr_list:
|
|
293
|
+
ids.update(e.tbl_ids())
|
|
294
|
+
return ids
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def from_object(cls, o: object) -> Optional[Expr]:
|
|
298
|
+
"""
|
|
299
|
+
Try to turn a literal object into an Expr.
|
|
300
|
+
"""
|
|
301
|
+
if isinstance(o, Expr):
|
|
302
|
+
return o
|
|
303
|
+
# try to create a literal
|
|
304
|
+
obj_type = ts.ColumnType.infer_literal_type(o)
|
|
305
|
+
if obj_type is not None:
|
|
306
|
+
from .literal import Literal
|
|
307
|
+
return Literal(o, col_type=obj_type)
|
|
308
|
+
if isinstance(o, dict):
|
|
309
|
+
from .inline_dict import InlineDict
|
|
310
|
+
return InlineDict(o)
|
|
311
|
+
elif isinstance(o, list):
|
|
312
|
+
from .inline_array import InlineArray
|
|
313
|
+
return InlineArray(tuple(o))
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
@abc.abstractmethod
|
|
317
|
+
def _equals(self, other: Expr) -> bool:
|
|
318
|
+
pass
|
|
319
|
+
|
|
320
|
+
@abc.abstractmethod
|
|
321
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
322
|
+
"""
|
|
323
|
+
If this expr can be materialized directly in SQL:
|
|
324
|
+
- returns a ClauseElement
|
|
325
|
+
- eval() will not be called (exception: Literal)
|
|
326
|
+
Otherwise
|
|
327
|
+
- returns None
|
|
328
|
+
- eval() will be called
|
|
329
|
+
"""
|
|
330
|
+
pass
|
|
331
|
+
|
|
332
|
+
@abc.abstractmethod
|
|
333
|
+
def eval(self, data_row: DataRow, row_builder: 'pixeltable.exprs.RowBuilder') -> None:
|
|
334
|
+
"""
|
|
335
|
+
Compute the expr value for data_row and store the result in data_row[slot_idx].
|
|
336
|
+
Not called if sql_expr() != None (exception: Literal).
|
|
337
|
+
"""
|
|
338
|
+
pass
|
|
339
|
+
|
|
340
|
+
def release(self) -> None:
|
|
341
|
+
"""
|
|
342
|
+
Allow Expr class to tear down execution state. This is called after the last eval() call.
|
|
343
|
+
"""
|
|
344
|
+
for c in self.components:
|
|
345
|
+
c.release()
|
|
346
|
+
|
|
347
|
+
@classmethod
|
|
348
|
+
def release_list(cls, expr_list: List[Expr]) -> None:
|
|
349
|
+
for e in expr_list:
|
|
350
|
+
e.release()
|
|
351
|
+
|
|
352
|
+
def serialize(self) -> str:
|
|
353
|
+
return json.dumps(self.as_dict())
|
|
354
|
+
|
|
355
|
+
def as_dict(self) -> Dict:
|
|
356
|
+
"""
|
|
357
|
+
Turn Expr object into a dict that can be passed to json.dumps().
|
|
358
|
+
Subclasses override _as_dict().
|
|
359
|
+
"""
|
|
360
|
+
return {
|
|
361
|
+
'_classname': self.__class__.__name__,
|
|
362
|
+
**self._as_dict(),
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
@classmethod
|
|
366
|
+
def as_dict_list(self, expr_list: List[Expr]) -> List[Dict]:
|
|
367
|
+
return [e.as_dict() for e in expr_list]
|
|
368
|
+
|
|
369
|
+
def _as_dict(self) -> Dict:
|
|
370
|
+
if len(self.components) > 0:
|
|
371
|
+
return {'components': [c.as_dict() for c in self.components]}
|
|
372
|
+
return {}
|
|
373
|
+
|
|
374
|
+
@classmethod
|
|
375
|
+
def deserialize(cls, dict_str: str) -> Expr:
|
|
376
|
+
return cls.from_dict(json.loads(dict_str))
|
|
377
|
+
|
|
378
|
+
@classmethod
|
|
379
|
+
def from_dict(cls, d: Dict) -> Expr:
|
|
380
|
+
"""
|
|
381
|
+
Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
|
|
382
|
+
"""
|
|
383
|
+
assert '_classname' in d
|
|
384
|
+
exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
|
|
385
|
+
type_class = getattr(exprs_module, d['_classname'])
|
|
386
|
+
components: List[Expr] = []
|
|
387
|
+
if 'components' in d:
|
|
388
|
+
components = [cls.from_dict(component_dict) for component_dict in d['components']]
|
|
389
|
+
return type_class._from_dict(d, components)
|
|
390
|
+
|
|
391
|
+
@classmethod
|
|
392
|
+
def from_dict_list(cls, dict_list: List[Dict]) -> List[Expr]:
|
|
393
|
+
return [cls.from_dict(d) for d in dict_list]
|
|
394
|
+
|
|
395
|
+
@classmethod
|
|
396
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
397
|
+
assert False, 'not implemented'
|
|
398
|
+
|
|
399
|
+
def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
|
|
400
|
+
from pixeltable.exprs import TypeCast
|
|
401
|
+
return TypeCast(self, new_type)
|
|
402
|
+
|
|
403
|
+
def apply(self, fn: Callable, *, col_type: Optional[ts.ColumnType] = None) -> 'pixeltable.exprs.FunctionCall':
|
|
404
|
+
function = self._make_applicator_function(fn, col_type)
|
|
405
|
+
# Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
|
|
406
|
+
return function(self)
|
|
407
|
+
|
|
408
|
+
def __getitem__(self, index: object) -> Expr:
|
|
409
|
+
if self.col_type.is_json_type():
|
|
410
|
+
from .json_path import JsonPath
|
|
411
|
+
return JsonPath(self).__getitem__(index)
|
|
412
|
+
if self.col_type.is_array_type():
|
|
413
|
+
from .array_slice import ArraySlice
|
|
414
|
+
return ArraySlice(self, index)
|
|
415
|
+
raise excs.Error(f'Type {self.col_type} is not subscriptable')
|
|
416
|
+
|
|
417
|
+
def __getattr__(self, name: str) -> Union['pixeltable.exprs.ImageMemberAccess', 'pixeltable.exprs.JsonPath']:
|
|
418
|
+
"""
|
|
419
|
+
ex.: <img col>.rotate(60)
|
|
420
|
+
"""
|
|
421
|
+
if self.col_type.is_image_type():
|
|
422
|
+
from .image_member_access import ImageMemberAccess
|
|
423
|
+
return ImageMemberAccess(name, self)
|
|
424
|
+
if self.col_type.is_json_type():
|
|
425
|
+
from .json_path import JsonPath
|
|
426
|
+
return JsonPath(self).__getattr__(name)
|
|
427
|
+
raise excs.Error(f'Member access not supported on type {self.col_type}: {name}')
|
|
428
|
+
|
|
429
|
+
def __bool__(self) -> bool:
|
|
430
|
+
raise TypeError(
|
|
431
|
+
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
|
|
432
|
+
|
|
433
|
+
def __lt__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
434
|
+
return self._make_comparison(ComparisonOperator.LT, other)
|
|
435
|
+
|
|
436
|
+
def __le__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
437
|
+
return self._make_comparison(ComparisonOperator.LE, other)
|
|
438
|
+
|
|
439
|
+
def __eq__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
440
|
+
if other is None:
|
|
441
|
+
from .is_null import IsNull
|
|
442
|
+
return IsNull(self)
|
|
443
|
+
return self._make_comparison(ComparisonOperator.EQ, other)
|
|
444
|
+
|
|
445
|
+
def __ne__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
446
|
+
if other is None:
|
|
447
|
+
from .compound_predicate import CompoundPredicate
|
|
448
|
+
from .is_null import IsNull
|
|
449
|
+
return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
|
|
450
|
+
return self._make_comparison(ComparisonOperator.NE, other)
|
|
451
|
+
|
|
452
|
+
def __gt__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
453
|
+
return self._make_comparison(ComparisonOperator.GT, other)
|
|
454
|
+
|
|
455
|
+
def __ge__(self, other: object) -> 'pixeltable.exprs.Comparison':
|
|
456
|
+
return self._make_comparison(ComparisonOperator.GE, other)
|
|
457
|
+
|
|
458
|
+
def _make_comparison(self, op: ComparisonOperator, other: object) -> 'pixeltable.exprs.Comparison':
|
|
459
|
+
"""
|
|
460
|
+
other: Union[Expr, LiteralPythonTypes]
|
|
461
|
+
"""
|
|
462
|
+
# TODO: check for compatibility
|
|
463
|
+
from .comparison import Comparison
|
|
464
|
+
from .literal import Literal
|
|
465
|
+
if isinstance(other, Expr):
|
|
466
|
+
return Comparison(op, self, other)
|
|
467
|
+
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
468
|
+
return Comparison(op, self, Literal(other)) # type: ignore[arg-type]
|
|
469
|
+
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
470
|
+
|
|
471
|
+
def __add__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
472
|
+
return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
|
|
473
|
+
|
|
474
|
+
def __sub__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
475
|
+
return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
|
|
476
|
+
|
|
477
|
+
def __mul__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
478
|
+
return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
|
|
479
|
+
|
|
480
|
+
def __truediv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
481
|
+
return self._make_arithmetic_expr(ArithmeticOperator.DIV, other)
|
|
482
|
+
|
|
483
|
+
def __mod__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
484
|
+
return self._make_arithmetic_expr(ArithmeticOperator.MOD, other)
|
|
485
|
+
|
|
486
|
+
def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
|
|
487
|
+
"""
|
|
488
|
+
other: Union[Expr, LiteralPythonTypes]
|
|
489
|
+
"""
|
|
490
|
+
# TODO: check for compatibility
|
|
491
|
+
from .arithmetic_expr import ArithmeticExpr
|
|
492
|
+
from .literal import Literal
|
|
493
|
+
if isinstance(other, Expr):
|
|
494
|
+
return ArithmeticExpr(op, self, other)
|
|
495
|
+
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
496
|
+
return ArithmeticExpr(op, self, Literal(other)) # type: ignore[arg-type]
|
|
497
|
+
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
498
|
+
|
|
499
|
+
def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'pixeltable.func.Function':
|
|
500
|
+
"""
|
|
501
|
+
Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
|
|
502
|
+
the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
fn: The `Callable` to encapsulate. Must have at least one parameter, and at most one required
|
|
506
|
+
parameter.
|
|
507
|
+
col_type: The pixeltable result type of the new `Function`.
|
|
508
|
+
"""
|
|
509
|
+
if col_type is not None:
|
|
510
|
+
# col_type is specified explicitly
|
|
511
|
+
fn_type = col_type
|
|
512
|
+
elif fn in _known_applicator_types:
|
|
513
|
+
# For convenience, various built-ins and other Python functions that don't
|
|
514
|
+
# have type hints are hardcoded
|
|
515
|
+
fn_type = _known_applicator_types[fn]
|
|
516
|
+
elif 'return' in typing.get_type_hints(fn):
|
|
517
|
+
# Attempt to infer the column type from the return type of the callable;
|
|
518
|
+
# this will set fn_type to None if it cannot be inferred
|
|
519
|
+
return_type = typing.get_type_hints(fn)['return']
|
|
520
|
+
fn_type = ts.ColumnType.from_python_type(return_type)
|
|
521
|
+
else:
|
|
522
|
+
# No type hint
|
|
523
|
+
fn_type = None
|
|
524
|
+
|
|
525
|
+
if fn_type is None:
|
|
526
|
+
raise excs.Error(
|
|
527
|
+
f'Column type of `{fn.__name__}` cannot be inferred. '
|
|
528
|
+
f'Use `.apply({fn.__name__}, col_type=...)` to specify.')
|
|
529
|
+
|
|
530
|
+
# TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
|
|
531
|
+
# and all optional parameters take their default values. Should we provide a more
|
|
532
|
+
# flexible API? For example, by defining
|
|
533
|
+
# expr.apply(fn, my_kw=my_arg)
|
|
534
|
+
# to mean: transform each x by calling
|
|
535
|
+
# fn(x, my_kw=my_arg)
|
|
536
|
+
# In the current implementation, a lambda is needed in order to specify this pattern:
|
|
537
|
+
# expr.apply(lambda x: fn(x, my_kw=my_arg))
|
|
538
|
+
|
|
539
|
+
try:
|
|
540
|
+
# If `fn` is not a builtin, we can do some basic validation to ensure it's
|
|
541
|
+
# compatible with `apply`.
|
|
542
|
+
params = inspect.signature(fn).parameters
|
|
543
|
+
params_iter = iter(params.values())
|
|
544
|
+
first_param = next(params_iter) if len(params) >= 1 else None
|
|
545
|
+
second_param = next(params_iter) if len(params) >= 2 else None
|
|
546
|
+
# Check that fn has at least one positional parameter
|
|
547
|
+
if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
|
|
548
|
+
raise excs.Error(
|
|
549
|
+
f'Function `{fn.__name__}` has no positional parameters.'
|
|
550
|
+
)
|
|
551
|
+
# Check that fn has at most one required parameter, i.e., its second parameter
|
|
552
|
+
# has no default and is not a varargs
|
|
553
|
+
if len(params) >= 2 and \
|
|
554
|
+
second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) and \
|
|
555
|
+
second_param.default == inspect.Parameter.empty:
|
|
556
|
+
raise excs.Error(
|
|
557
|
+
f'Function `{fn.__name__}` has multiple required parameters.'
|
|
558
|
+
)
|
|
559
|
+
except ValueError:
|
|
560
|
+
# inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
|
|
561
|
+
# know of any way to get the signature of a builtin, nor to check for this in
|
|
562
|
+
# advance (without the try/except pattern). For now, builtins will not be
|
|
563
|
+
# validated.
|
|
564
|
+
pass
|
|
565
|
+
|
|
566
|
+
# Since `fn` might have optional parameters, we wrap it in a lambda to get a unary
|
|
567
|
+
# equivalent, so that its signature is understood by `make_function`. This also
|
|
568
|
+
# ensures that `decorated_fn` is never a builtin.
|
|
569
|
+
# We also set the display_name explicitly, so that the `FunctionCall` gets the
|
|
570
|
+
# name of `decorated_fn`, not the lambda.
|
|
571
|
+
return func.make_function(
|
|
572
|
+
decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
# A dictionary of result types of various stdlib functions that are
|
|
576
|
+
# commonly used in computed columns. stdlib does not have type hints, so these
|
|
577
|
+
# are used to infer their result types (as pixeltable types) to avoid having
|
|
578
|
+
# to specify them explicitly in Expr.apply().
|
|
579
|
+
# This is purely for convenience and does not impact the supported functionality
|
|
580
|
+
# (it's always possible to specify a result type explicitly for a function
|
|
581
|
+
# that does not have type hints and is not present in this dict).
|
|
582
|
+
_known_applicator_types: dict[Callable, ts.ColumnType] = {
|
|
583
|
+
str: ts.StringType(),
|
|
584
|
+
json.dumps: ts.StringType(),
|
|
585
|
+
json.loads: ts.JsonType(),
|
|
586
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, Dict, Iterable, Iterator
|
|
3
|
+
|
|
4
|
+
from .expr import Expr
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExprSet:
|
|
8
|
+
"""A set that also supports indexed lookup (by slot_idx and Expr.id)"""
|
|
9
|
+
def __init__(self, elements: Optional[Iterable[Expr]] = None):
|
|
10
|
+
self.exprs: Dict[int, Expr] = {} # Expr.id -> Expr
|
|
11
|
+
if elements is not None:
|
|
12
|
+
for e in elements:
|
|
13
|
+
self.append(e)
|
|
14
|
+
|
|
15
|
+
def append(self, expr: Expr) -> None:
|
|
16
|
+
if expr.id in self.exprs:
|
|
17
|
+
return
|
|
18
|
+
self.exprs[expr.id] = expr
|
|
19
|
+
|
|
20
|
+
def extend(self, elements: Iterable[Expr]) -> None:
|
|
21
|
+
for e in elements:
|
|
22
|
+
self.append(e)
|
|
23
|
+
|
|
24
|
+
def __contains__(self, item: Expr) -> bool:
|
|
25
|
+
return item.id in self.exprs
|
|
26
|
+
|
|
27
|
+
def __len__(self) -> int:
|
|
28
|
+
return len(self.exprs)
|
|
29
|
+
|
|
30
|
+
def __iter__(self) -> Iterator[Expr]:
|
|
31
|
+
return iter(self.exprs.values())
|
|
32
|
+
|
|
33
|
+
def __getitem__(self, index: object) -> Optional[Expr]:
|
|
34
|
+
assert isinstance(index, int) or isinstance(index, Expr)
|
|
35
|
+
if isinstance(index, int):
|
|
36
|
+
# return expr with matching slot_idx
|
|
37
|
+
return list(self.exprs.values())[index]
|
|
38
|
+
else:
|
|
39
|
+
return self.exprs.get(index.id)
|