pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +20 -21
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +201 -108
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +62 -54
- pixeltable/utils/arrow.py +1 -2
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,11 +7,11 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
-
from typing_extensions import
|
|
14
|
+
from typing_extensions import Self, _AnnotatedAlias
|
|
15
15
|
|
|
16
16
|
import pixeltable.catalog as catalog
|
|
17
17
|
import pixeltable.exceptions as excs
|
|
@@ -24,11 +24,13 @@ from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes,
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from pixeltable import exprs
|
|
26
26
|
|
|
27
|
+
|
|
27
28
|
class ExprScope:
|
|
28
29
|
"""
|
|
29
30
|
Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
|
|
30
31
|
parent is None: outermost scope
|
|
31
32
|
"""
|
|
33
|
+
|
|
32
34
|
def __init__(self, parent: Optional[ExprScope]):
|
|
33
35
|
self.parent = parent
|
|
34
36
|
|
|
@@ -137,6 +139,9 @@ class Expr(abc.ABC):
|
|
|
137
139
|
for attr, value in self._id_attrs():
|
|
138
140
|
hasher.update(attr.encode('utf-8'))
|
|
139
141
|
hasher.update(str(value).encode('utf-8'))
|
|
142
|
+
# Include the col_type of the expression to avoid expressions with identical str() representations
|
|
143
|
+
# but different types being considered the same expression, e.g. str(int(4)) == "4"
|
|
144
|
+
hasher.update(repr(self.col_type).encode('utf-8'))
|
|
140
145
|
for expr in self.components:
|
|
141
146
|
hasher.update(str(expr.id).encode('utf-8'))
|
|
142
147
|
# truncate to machine's word size
|
|
@@ -183,13 +188,19 @@ class Expr(abc.ABC):
|
|
|
183
188
|
|
|
184
189
|
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
185
190
|
"""
|
|
186
|
-
Replace 'old' with 'new' recursively
|
|
191
|
+
Replace 'old' with 'new' recursively, and return a new version of the expression
|
|
192
|
+
This method must be used in the form: expr = expr.substitute(spec)
|
|
187
193
|
"""
|
|
194
|
+
from .literal import Literal
|
|
195
|
+
|
|
196
|
+
if isinstance(self, Literal):
|
|
197
|
+
return self
|
|
188
198
|
for old, new in spec.items():
|
|
189
199
|
if self.equals(old):
|
|
190
200
|
return new.copy()
|
|
191
201
|
for i in range(len(self.components)):
|
|
192
202
|
self.components[i] = self.components[i].substitute(spec)
|
|
203
|
+
self = self.maybe_literal()
|
|
193
204
|
self.id = self._create_id()
|
|
194
205
|
return self
|
|
195
206
|
|
|
@@ -205,14 +216,18 @@ class Expr(abc.ABC):
|
|
|
205
216
|
"""
|
|
206
217
|
from .column_ref import ColumnRef
|
|
207
218
|
from .expr_set import ExprSet
|
|
219
|
+
|
|
208
220
|
if resolve_cols is None:
|
|
209
221
|
resolve_cols = set()
|
|
210
222
|
result = self
|
|
211
223
|
while True:
|
|
212
|
-
target_col_refs = ExprSet(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
224
|
+
target_col_refs = ExprSet(
|
|
225
|
+
[
|
|
226
|
+
e
|
|
227
|
+
for e in result.subexprs()
|
|
228
|
+
if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
|
|
229
|
+
]
|
|
230
|
+
)
|
|
216
231
|
if len(target_col_refs) == 0:
|
|
217
232
|
return result
|
|
218
233
|
result = result.substitute({ref: ref.col.value_expr for ref in target_col_refs})
|
|
@@ -220,6 +235,7 @@ class Expr(abc.ABC):
|
|
|
220
235
|
def is_bound_by(self, tbls: list[catalog.TableVersionPath]) -> bool:
|
|
221
236
|
"""Returns True if this expr can be evaluated in the context of tbls."""
|
|
222
237
|
from .column_ref import ColumnRef
|
|
238
|
+
|
|
223
239
|
col_refs = self.subexprs(ColumnRef)
|
|
224
240
|
for col_ref in col_refs:
|
|
225
241
|
if not any(tbl.has_column(col_ref.col) for tbl in tbls):
|
|
@@ -232,7 +248,7 @@ class Expr(abc.ABC):
|
|
|
232
248
|
return self._retarget(tbl_versions)
|
|
233
249
|
|
|
234
250
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
235
|
-
for i in range
|
|
251
|
+
for i in range(len(self.components)):
|
|
236
252
|
self.components[i] = self.components[i]._retarget(tbl_versions)
|
|
237
253
|
return self
|
|
238
254
|
|
|
@@ -264,13 +280,14 @@ class Expr(abc.ABC):
|
|
|
264
280
|
|
|
265
281
|
@overload
|
|
266
282
|
def subexprs(
|
|
267
|
-
self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
|
|
268
|
-
traverse_matches: bool = True
|
|
283
|
+
self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
269
284
|
) -> Iterator[T]: ...
|
|
270
285
|
|
|
271
286
|
def subexprs(
|
|
272
|
-
self,
|
|
273
|
-
|
|
287
|
+
self,
|
|
288
|
+
expr_class: Optional[type[T]] = None,
|
|
289
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
290
|
+
traverse_matches: bool = True,
|
|
274
291
|
) -> Iterator[T]:
|
|
275
292
|
"""
|
|
276
293
|
Iterate over all subexprs, including self.
|
|
@@ -288,20 +305,30 @@ class Expr(abc.ABC):
|
|
|
288
305
|
@overload
|
|
289
306
|
@classmethod
|
|
290
307
|
def list_subexprs(
|
|
291
|
-
cls,
|
|
308
|
+
cls,
|
|
309
|
+
expr_list: Iterable[Expr],
|
|
310
|
+
*,
|
|
311
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
312
|
+
traverse_matches: bool = True,
|
|
292
313
|
) -> Iterator[Expr]: ...
|
|
293
314
|
|
|
294
315
|
@overload
|
|
295
316
|
@classmethod
|
|
296
317
|
def list_subexprs(
|
|
297
|
-
cls,
|
|
298
|
-
|
|
318
|
+
cls,
|
|
319
|
+
expr_list: Iterable[Expr],
|
|
320
|
+
expr_class: type[T],
|
|
321
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
322
|
+
traverse_matches: bool = True,
|
|
299
323
|
) -> Iterator[T]: ...
|
|
300
324
|
|
|
301
325
|
@classmethod
|
|
302
326
|
def list_subexprs(
|
|
303
|
-
cls,
|
|
304
|
-
|
|
327
|
+
cls,
|
|
328
|
+
expr_list: Iterable[Expr],
|
|
329
|
+
expr_class: Optional[type[T]] = None,
|
|
330
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
331
|
+
traverse_matches: bool = True,
|
|
305
332
|
) -> Iterator[T]:
|
|
306
333
|
"""Produce subexprs for all exprs in list. Can contain duplicates."""
|
|
307
334
|
for e in expr_list:
|
|
@@ -322,6 +349,7 @@ class Expr(abc.ABC):
|
|
|
322
349
|
"""Returns table ids referenced by this expr."""
|
|
323
350
|
from .column_ref import ColumnRef
|
|
324
351
|
from .rowid_ref import RowidRef
|
|
352
|
+
|
|
325
353
|
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
326
354
|
|
|
327
355
|
@classmethod
|
|
@@ -334,6 +362,7 @@ class Expr(abc.ABC):
|
|
|
334
362
|
result: list[catalog.Column] = []
|
|
335
363
|
assert '_classname' in expr_dict
|
|
336
364
|
from .column_ref import ColumnRef
|
|
365
|
+
|
|
337
366
|
if expr_dict['_classname'] == 'ColumnRef':
|
|
338
367
|
result.append(ColumnRef.get_column(expr_dict))
|
|
339
368
|
if 'components' in expr_dict:
|
|
@@ -341,66 +370,57 @@ class Expr(abc.ABC):
|
|
|
341
370
|
result.extend(cls.get_refd_columns(component_dict))
|
|
342
371
|
return result
|
|
343
372
|
|
|
344
|
-
def
|
|
345
|
-
"""Returns True if this expr is a constant."""
|
|
346
|
-
return all(comp.is_constant() for comp in self.components)
|
|
347
|
-
|
|
348
|
-
def _as_constant(self) -> Any:
|
|
349
|
-
return None
|
|
350
|
-
|
|
351
|
-
def as_constant(self) -> Any:
|
|
373
|
+
def as_literal(self) -> Optional[Expr]:
|
|
352
374
|
"""
|
|
353
|
-
|
|
375
|
+
Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
|
|
354
376
|
"""
|
|
355
|
-
if self.is_constant():
|
|
356
|
-
return self._as_constant()
|
|
357
377
|
return None
|
|
358
378
|
|
|
359
379
|
@classmethod
|
|
360
380
|
def from_array(cls, elements: Iterable) -> Optional[Expr]:
|
|
361
381
|
from .inline_expr import InlineArray
|
|
382
|
+
|
|
362
383
|
inline_array = InlineArray(elements)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
384
|
+
return inline_array.maybe_literal()
|
|
385
|
+
|
|
386
|
+
def maybe_literal(self: Expr) -> Expr:
|
|
387
|
+
"""
|
|
388
|
+
Return a Literal if this expression can be evaluated to a constant value, otherwise return the expression.
|
|
389
|
+
"""
|
|
390
|
+
lit_expr = self.as_literal()
|
|
391
|
+
if lit_expr is not None:
|
|
392
|
+
return lit_expr
|
|
367
393
|
else:
|
|
368
|
-
return
|
|
394
|
+
return self
|
|
369
395
|
|
|
370
396
|
@classmethod
|
|
371
397
|
def from_object(cls, o: object) -> Optional[Expr]:
|
|
372
398
|
"""
|
|
373
399
|
Try to turn a literal object into an Expr.
|
|
374
400
|
"""
|
|
401
|
+
from .inline_expr import InlineDict, InlineList
|
|
402
|
+
from .literal import Literal
|
|
403
|
+
|
|
375
404
|
# Try to create a literal. We need to check for InlineList/InlineDict
|
|
376
405
|
# first, to prevent them from inappropriately being interpreted as JsonType
|
|
377
406
|
# literals.
|
|
407
|
+
if isinstance(o, Literal):
|
|
408
|
+
return o
|
|
409
|
+
|
|
378
410
|
if isinstance(o, (list, tuple, dict, Expr)):
|
|
379
|
-
expr:
|
|
411
|
+
expr: Expr
|
|
380
412
|
if isinstance(o, (list, tuple)):
|
|
381
|
-
from .inline_expr import InlineList
|
|
382
413
|
expr = InlineList(o)
|
|
383
414
|
elif isinstance(o, dict):
|
|
384
|
-
from .inline_expr import InlineDict
|
|
385
415
|
expr = InlineDict(o)
|
|
386
|
-
|
|
416
|
+
else:
|
|
387
417
|
expr = o
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
return expr
|
|
391
|
-
# Check if the expression is constant
|
|
392
|
-
if expr is not None:
|
|
393
|
-
expr_value = expr.as_constant()
|
|
394
|
-
if expr_value is not None:
|
|
395
|
-
from .literal import Literal
|
|
396
|
-
return Literal(expr_value)
|
|
397
|
-
else:
|
|
398
|
-
return expr
|
|
418
|
+
|
|
419
|
+
return expr.maybe_literal()
|
|
399
420
|
else:
|
|
400
421
|
# convert scalar to a literal
|
|
401
422
|
obj_type = ts.ColumnType.infer_literal_type(o)
|
|
402
423
|
if obj_type is not None:
|
|
403
|
-
from .literal import Literal
|
|
404
424
|
return Literal(o, col_type=obj_type)
|
|
405
425
|
return None
|
|
406
426
|
|
|
@@ -444,10 +464,7 @@ class Expr(abc.ABC):
|
|
|
444
464
|
Turn Expr object into a dict that can be passed to json.dumps().
|
|
445
465
|
Subclasses override _as_dict().
|
|
446
466
|
"""
|
|
447
|
-
return {
|
|
448
|
-
'_classname': self.__class__.__name__,
|
|
449
|
-
**self._as_dict(),
|
|
450
|
-
}
|
|
467
|
+
return {'_classname': self.__class__.__name__, **self._as_dict()}
|
|
451
468
|
|
|
452
469
|
@classmethod
|
|
453
470
|
def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
|
|
@@ -485,6 +502,7 @@ class Expr(abc.ABC):
|
|
|
485
502
|
|
|
486
503
|
def isin(self, value_set: Any) -> 'exprs.InPredicate':
|
|
487
504
|
from .in_predicate import InPredicate
|
|
505
|
+
|
|
488
506
|
if isinstance(value_set, Expr):
|
|
489
507
|
return InPredicate(self, value_set_expr=value_set)
|
|
490
508
|
else:
|
|
@@ -492,6 +510,7 @@ class Expr(abc.ABC):
|
|
|
492
510
|
|
|
493
511
|
def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
|
|
494
512
|
from pixeltable.exprs import TypeCast
|
|
513
|
+
|
|
495
514
|
# Interpret the type argument the same way we would if given in a schema
|
|
496
515
|
col_type = ts.ColumnType.normalize_type(new_type, nullable_default=True, allow_builtin_types=False)
|
|
497
516
|
if not self.col_type.nullable:
|
|
@@ -500,7 +519,9 @@ class Expr(abc.ABC):
|
|
|
500
519
|
col_type = col_type.copy(nullable=False)
|
|
501
520
|
return TypeCast(self, col_type)
|
|
502
521
|
|
|
503
|
-
def apply(
|
|
522
|
+
def apply(
|
|
523
|
+
self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None
|
|
524
|
+
) -> 'exprs.FunctionCall':
|
|
504
525
|
if col_type is not None:
|
|
505
526
|
col_type = ts.ColumnType.normalize_type(col_type)
|
|
506
527
|
function = self._make_applicator_function(fn, col_type)
|
|
@@ -509,10 +530,7 @@ class Expr(abc.ABC):
|
|
|
509
530
|
|
|
510
531
|
def __dir__(self) -> list[str]:
|
|
511
532
|
attrs = ['isin', 'astype', 'apply']
|
|
512
|
-
attrs += [
|
|
513
|
-
f.name
|
|
514
|
-
for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)
|
|
515
|
-
]
|
|
533
|
+
attrs += [f.name for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)]
|
|
516
534
|
return attrs
|
|
517
535
|
|
|
518
536
|
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
@@ -521,9 +539,11 @@ class Expr(abc.ABC):
|
|
|
521
539
|
def __getitem__(self, index: object) -> Expr:
|
|
522
540
|
if self.col_type.is_json_type():
|
|
523
541
|
from .json_path import JsonPath
|
|
542
|
+
|
|
524
543
|
return JsonPath(self)[index]
|
|
525
544
|
if self.col_type.is_array_type():
|
|
526
545
|
from .array_slice import ArraySlice
|
|
546
|
+
|
|
527
547
|
if not isinstance(index, tuple):
|
|
528
548
|
index = (index,)
|
|
529
549
|
if any(not isinstance(i, (int, slice)) for i in index):
|
|
@@ -537,6 +557,7 @@ class Expr(abc.ABC):
|
|
|
537
557
|
"""
|
|
538
558
|
from .json_path import JsonPath
|
|
539
559
|
from .method_ref import MethodRef
|
|
560
|
+
|
|
540
561
|
if self.col_type.is_json_type():
|
|
541
562
|
return JsonPath(self).__getattr__(name)
|
|
542
563
|
else:
|
|
@@ -555,7 +576,8 @@ class Expr(abc.ABC):
|
|
|
555
576
|
|
|
556
577
|
def __bool__(self) -> bool:
|
|
557
578
|
raise TypeError(
|
|
558
|
-
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
579
|
+
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
580
|
+
)
|
|
559
581
|
|
|
560
582
|
def __lt__(self, other: object) -> 'exprs.Comparison':
|
|
561
583
|
return self._make_comparison(ComparisonOperator.LT, other)
|
|
@@ -566,6 +588,7 @@ class Expr(abc.ABC):
|
|
|
566
588
|
def __eq__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
|
|
567
589
|
if other is None:
|
|
568
590
|
from .is_null import IsNull
|
|
591
|
+
|
|
569
592
|
return IsNull(self)
|
|
570
593
|
return self._make_comparison(ComparisonOperator.EQ, other)
|
|
571
594
|
|
|
@@ -573,6 +596,7 @@ class Expr(abc.ABC):
|
|
|
573
596
|
if other is None:
|
|
574
597
|
from .compound_predicate import CompoundPredicate
|
|
575
598
|
from .is_null import IsNull
|
|
599
|
+
|
|
576
600
|
return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
|
|
577
601
|
return self._make_comparison(ComparisonOperator.NE, other)
|
|
578
602
|
|
|
@@ -589,6 +613,7 @@ class Expr(abc.ABC):
|
|
|
589
613
|
# TODO: check for compatibility
|
|
590
614
|
from .comparison import Comparison
|
|
591
615
|
from .literal import Literal
|
|
616
|
+
|
|
592
617
|
if isinstance(other, Expr):
|
|
593
618
|
return Comparison(op, self, other)
|
|
594
619
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
@@ -641,6 +666,7 @@ class Expr(abc.ABC):
|
|
|
641
666
|
# TODO: check for compatibility
|
|
642
667
|
from .arithmetic_expr import ArithmeticExpr
|
|
643
668
|
from .literal import Literal
|
|
669
|
+
|
|
644
670
|
if isinstance(other, Expr):
|
|
645
671
|
return ArithmeticExpr(op, self, other)
|
|
646
672
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
@@ -655,6 +681,7 @@ class Expr(abc.ABC):
|
|
|
655
681
|
# TODO: check for compatibility
|
|
656
682
|
from .arithmetic_expr import ArithmeticExpr
|
|
657
683
|
from .literal import Literal
|
|
684
|
+
|
|
658
685
|
assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
|
|
659
686
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
660
687
|
return ArithmeticExpr(op, Literal(other), self)
|
|
@@ -666,6 +693,7 @@ class Expr(abc.ABC):
|
|
|
666
693
|
if not other.col_type.is_bool_type():
|
|
667
694
|
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
668
695
|
from .compound_predicate import CompoundPredicate
|
|
696
|
+
|
|
669
697
|
return CompoundPredicate(LogicalOperator.AND, [self, other])
|
|
670
698
|
|
|
671
699
|
def __or__(self, other: object) -> Expr:
|
|
@@ -674,14 +702,15 @@ class Expr(abc.ABC):
|
|
|
674
702
|
if not other.col_type.is_bool_type():
|
|
675
703
|
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
676
704
|
from .compound_predicate import CompoundPredicate
|
|
705
|
+
|
|
677
706
|
return CompoundPredicate(LogicalOperator.OR, [self, other])
|
|
678
707
|
|
|
679
708
|
def __invert__(self) -> Expr:
|
|
680
709
|
from .compound_predicate import CompoundPredicate
|
|
710
|
+
|
|
681
711
|
return CompoundPredicate(LogicalOperator.NOT, [self])
|
|
682
712
|
|
|
683
|
-
def split_conjuncts(
|
|
684
|
-
self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
713
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
685
714
|
"""
|
|
686
715
|
Returns clauses of a conjunction that meet condition in the first element.
|
|
687
716
|
The second element contains remaining clauses, rolled into a conjunction.
|
|
@@ -721,7 +750,8 @@ class Expr(abc.ABC):
|
|
|
721
750
|
if fn_type is None:
|
|
722
751
|
raise excs.Error(
|
|
723
752
|
f'Column type of `{fn.__name__}` cannot be inferred. '
|
|
724
|
-
f'Use `.apply({fn.__name__}, col_type=...)` to specify.'
|
|
753
|
+
f'Use `.apply({fn.__name__}, col_type=...)` to specify.'
|
|
754
|
+
)
|
|
725
755
|
|
|
726
756
|
# TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
|
|
727
757
|
# and all optional parameters take their default values. Should we provide a more
|
|
@@ -741,17 +771,15 @@ class Expr(abc.ABC):
|
|
|
741
771
|
second_param = next(params_iter) if len(params) >= 2 else None
|
|
742
772
|
# Check that fn has at least one positional parameter
|
|
743
773
|
if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
|
|
744
|
-
raise excs.Error(
|
|
745
|
-
f'Function `{fn.__name__}` has no positional parameters.'
|
|
746
|
-
)
|
|
774
|
+
raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
|
|
747
775
|
# Check that fn has at most one required parameter, i.e., its second parameter
|
|
748
776
|
# has no default and is not a varargs
|
|
749
|
-
if
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
)
|
|
777
|
+
if (
|
|
778
|
+
len(params) >= 2
|
|
779
|
+
and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
780
|
+
and second_param.default == inspect.Parameter.empty
|
|
781
|
+
):
|
|
782
|
+
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
|
755
783
|
except ValueError:
|
|
756
784
|
# inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
|
|
757
785
|
# know of any way to get the signature of a builtin, nor to check for this in
|
|
@@ -765,7 +793,8 @@ class Expr(abc.ABC):
|
|
|
765
793
|
# We also set the display_name explicitly, so that the `FunctionCall` gets the
|
|
766
794
|
# name of `decorated_fn`, not the lambda.
|
|
767
795
|
return func.make_function(
|
|
768
|
-
decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__
|
|
796
|
+
decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__
|
|
797
|
+
)
|
|
769
798
|
|
|
770
799
|
|
|
771
800
|
# A dictionary of result types of various stdlib functions that are
|
pixeltable/exprs/expr_dict.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from typing import Generic,
|
|
1
|
+
from typing import Generic, Iterable, Iterator, Optional, TypeVar
|
|
2
2
|
|
|
3
3
|
T = TypeVar('T')
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
class ExprDict(Generic[T]):
|
|
8
9
|
"""
|
|
9
10
|
A dictionary that maps Expr instances to values of type T.
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Generic, Iterable, Iterator, Optional, TypeVar
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
7
7
|
T = TypeVar('T', bound='Expr')
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
class ExprSet(Generic[T]):
|
|
10
11
|
"""
|
|
11
12
|
A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
|
|
12
13
|
"""
|
|
14
|
+
|
|
13
15
|
exprs: dict[int, T] # key: Expr.id
|
|
14
16
|
exprs_by_idx: dict[int, T] # key: slot_idx
|
|
15
17
|
|
|
@@ -22,7 +22,6 @@ from .sql_element_cache import SqlElementCache
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class FunctionCall(Expr):
|
|
25
|
-
|
|
26
25
|
fn: func.Function
|
|
27
26
|
is_method_call: bool
|
|
28
27
|
agg_init_args: dict[str, Any]
|
|
@@ -58,7 +57,7 @@ class FunctionCall(Expr):
|
|
|
58
57
|
return_type: ts.ColumnType,
|
|
59
58
|
order_by_clause: Optional[list[Any]] = None,
|
|
60
59
|
group_by_clause: Optional[list[Any]] = None,
|
|
61
|
-
is_method_call: bool = False
|
|
60
|
+
is_method_call: bool = False,
|
|
62
61
|
):
|
|
63
62
|
if order_by_clause is None:
|
|
64
63
|
order_by_clause = []
|
|
@@ -69,7 +68,7 @@ class FunctionCall(Expr):
|
|
|
69
68
|
|
|
70
69
|
self.fn = fn
|
|
71
70
|
self.is_method_call = is_method_call
|
|
72
|
-
#self.normalize_args(fn.name, signature, bound_args)
|
|
71
|
+
# self.normalize_args(fn.name, signature, bound_args)
|
|
73
72
|
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
74
73
|
signature = fn.signature
|
|
75
74
|
|
|
@@ -79,8 +78,10 @@ class FunctionCall(Expr):
|
|
|
79
78
|
for arg_name, arg in bound_args.items():
|
|
80
79
|
param = signature.parameters[arg_name]
|
|
81
80
|
if (
|
|
82
|
-
param.col_type is not None
|
|
83
|
-
and
|
|
81
|
+
param.col_type is not None
|
|
82
|
+
and not param.col_type.nullable
|
|
83
|
+
and isinstance(arg, Expr)
|
|
84
|
+
and arg.col_type.nullable
|
|
84
85
|
):
|
|
85
86
|
return_type = return_type.copy(nullable=True)
|
|
86
87
|
break
|
|
@@ -96,7 +97,9 @@ class FunctionCall(Expr):
|
|
|
96
97
|
self.agg_init_args = {
|
|
97
98
|
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
|
|
98
99
|
}
|
|
99
|
-
bound_args = {
|
|
100
|
+
bound_args = {
|
|
101
|
+
arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]
|
|
102
|
+
}
|
|
100
103
|
|
|
101
104
|
# construct components, args, kwargs
|
|
102
105
|
self.args = []
|
|
@@ -171,7 +174,8 @@ class FunctionCall(Expr):
|
|
|
171
174
|
# (that's done in SQL)
|
|
172
175
|
if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
|
|
173
176
|
raise excs.Error(
|
|
174
|
-
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
177
|
+
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
178
|
+
)
|
|
175
179
|
# don't add components after this, everthing after order_by_start_idx is part of the order_by clause
|
|
176
180
|
self.order_by_start_idx = len(self.components)
|
|
177
181
|
self.components.extend(order_by_clause)
|
|
@@ -257,7 +261,8 @@ class FunctionCall(Expr):
|
|
|
257
261
|
):
|
|
258
262
|
raise excs.Error(
|
|
259
263
|
f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
|
|
260
|
-
f'{param.col_type}'
|
|
264
|
+
f'{param.col_type}'
|
|
265
|
+
)
|
|
261
266
|
|
|
262
267
|
def _equals(self, other: FunctionCall) -> bool:
|
|
263
268
|
if self.fn != other.fn:
|
|
@@ -282,7 +287,7 @@ class FunctionCall(Expr):
|
|
|
282
287
|
('kwargs', self.kwargs),
|
|
283
288
|
('group_by_start_idx', self.group_by_start_idx),
|
|
284
289
|
('group_by_stop_idx', self.group_by_stop_idx),
|
|
285
|
-
('order_by_start_idx', self.order_by_start_idx)
|
|
290
|
+
('order_by_start_idx', self.order_by_start_idx),
|
|
286
291
|
]
|
|
287
292
|
|
|
288
293
|
def __repr__(self) -> str:
|
|
@@ -298,13 +303,14 @@ class FunctionCall(Expr):
|
|
|
298
303
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
299
304
|
def print_arg(arg: Any) -> str:
|
|
300
305
|
return repr(arg) if isinstance(arg, str) else str(arg)
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
306
|
+
|
|
307
|
+
arg_strs = [print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]]
|
|
308
|
+
arg_strs.extend(
|
|
309
|
+
[
|
|
310
|
+
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
311
|
+
for param_name, (idx, arg) in self.kwargs.items()
|
|
312
|
+
]
|
|
313
|
+
)
|
|
308
314
|
if len(self.order_by) > 0:
|
|
309
315
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
310
316
|
if self.fn.requires_order_by:
|
|
@@ -322,18 +328,22 @@ class FunctionCall(Expr):
|
|
|
322
328
|
|
|
323
329
|
@property
|
|
324
330
|
def group_by(self) -> list[Expr]:
|
|
325
|
-
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
331
|
+
return self.components[self.group_by_start_idx : self.group_by_stop_idx]
|
|
326
332
|
|
|
327
333
|
@property
|
|
328
334
|
def order_by(self) -> list[Expr]:
|
|
329
|
-
return self.components[self.order_by_start_idx:]
|
|
335
|
+
return self.components[self.order_by_start_idx :]
|
|
330
336
|
|
|
331
337
|
@property
|
|
332
338
|
def is_window_fn_call(self) -> bool:
|
|
333
|
-
return
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
339
|
+
return (
|
|
340
|
+
isinstance(self.fn, func.AggregateFunction)
|
|
341
|
+
and self.fn.allows_window
|
|
342
|
+
and (
|
|
343
|
+
not self.fn.allows_std_agg
|
|
344
|
+
or self.has_group_by()
|
|
345
|
+
or (len(self.order_by) > 0 and not self.fn.requires_order_by)
|
|
346
|
+
)
|
|
337
347
|
)
|
|
338
348
|
|
|
339
349
|
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
@@ -435,7 +445,7 @@ class FunctionCall(Expr):
|
|
|
435
445
|
Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
|
|
436
446
|
data_rows
|
|
437
447
|
"""
|
|
438
|
-
assert all(name in self._param_values for name in param_names)
|
|
448
|
+
assert all(name in self._param_values for name in param_names), f'{param_names}, {self._param_values.keys()}'
|
|
439
449
|
result: list[dict[str, Any]] = []
|
|
440
450
|
for row in data_rows:
|
|
441
451
|
d: dict[str, Any] = {}
|
|
@@ -512,16 +522,12 @@ class FunctionCall(Expr):
|
|
|
512
522
|
fn = func.Function.from_dict(d['fn'])
|
|
513
523
|
assert not fn.is_polymorphic
|
|
514
524
|
return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
|
|
515
|
-
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
516
|
-
order_by_exprs = components[d['order_by_start_idx']:]
|
|
525
|
+
group_by_exprs = components[d['group_by_start_idx'] : d['group_by_stop_idx']]
|
|
526
|
+
order_by_exprs = components[d['order_by_start_idx'] :]
|
|
517
527
|
|
|
518
|
-
args = [
|
|
519
|
-
expr if idx is None else components[idx]
|
|
520
|
-
for idx, expr in d['args']
|
|
521
|
-
]
|
|
528
|
+
args = [expr if idx is None else components[idx] for idx, expr in d['args']]
|
|
522
529
|
kwargs = {
|
|
523
|
-
param_name: (expr if idx is None else components[idx])
|
|
524
|
-
for param_name, (idx, expr) in d['kwargs'].items()
|
|
530
|
+
param_name: (expr if idx is None else components[idx]) for param_name, (idx, expr) in d['kwargs'].items()
|
|
525
531
|
}
|
|
526
532
|
|
|
527
533
|
# `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
|
|
@@ -538,9 +544,7 @@ class FunctionCall(Expr):
|
|
|
538
544
|
# for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
|
|
539
545
|
# case where this is necessary to support existing conditional_return_type implementations). Once the general
|
|
540
546
|
# pattern is implemented, we can remove this hack.
|
|
541
|
-
unpacked_bound_args = {
|
|
542
|
-
param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
|
|
543
|
-
}
|
|
547
|
+
unpacked_bound_args = {param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()}
|
|
544
548
|
|
|
545
549
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
546
550
|
call_return_type = fn.call_return_type([], unpacked_bound_args)
|
|
@@ -567,13 +571,7 @@ class FunctionCall(Expr):
|
|
|
567
571
|
f'Return type as currently defined: `{call_return_type}`'
|
|
568
572
|
)
|
|
569
573
|
|
|
570
|
-
fn_call = cls(
|
|
571
|
-
fn,
|
|
572
|
-
bound_args,
|
|
573
|
-
return_type,
|
|
574
|
-
group_by_clause=group_by_exprs,
|
|
575
|
-
order_by_clause=order_by_exprs
|
|
576
|
-
)
|
|
574
|
+
fn_call = cls(fn, bound_args, return_type, group_by_clause=group_by_exprs, order_by_clause=order_by_exprs)
|
|
577
575
|
return fn_call
|
|
578
576
|
|
|
579
577
|
@classmethod
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import Union
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
8
|
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
def print_slice(s: slice) -> str:
|
|
11
12
|
start_str = f'{str(s.start) if s.start is not None else ""}'
|
|
12
13
|
stop_str = f'{str(s.stop) if s.stop is not None else ""}'
|