pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,13 +7,12 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload, Iterable
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
-
from typing_extensions import Self
|
|
14
|
+
from typing_extensions import _AnnotatedAlias, Self
|
|
15
15
|
|
|
16
|
-
import pixeltable
|
|
17
16
|
import pixeltable.catalog as catalog
|
|
18
17
|
import pixeltable.exceptions as excs
|
|
19
18
|
import pixeltable.func as func
|
|
@@ -91,7 +90,7 @@ class Expr(abc.ABC):
|
|
|
91
90
|
result = c_scope
|
|
92
91
|
return result
|
|
93
92
|
|
|
94
|
-
def bind_rel_paths(self, mapper: Optional['
|
|
93
|
+
def bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
|
|
95
94
|
"""
|
|
96
95
|
Binds relative JsonPaths to mapper.
|
|
97
96
|
This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
|
|
@@ -121,7 +120,7 @@ class Expr(abc.ABC):
|
|
|
121
120
|
return False
|
|
122
121
|
return self._equals(other)
|
|
123
122
|
|
|
124
|
-
def _equals(self, other:
|
|
123
|
+
def _equals(self, other: Self) -> bool:
|
|
125
124
|
# we already compared the type and components in equals(); subclasses that require additional comparisons
|
|
126
125
|
# override this
|
|
127
126
|
return True
|
|
@@ -232,12 +231,6 @@ class Expr(abc.ABC):
|
|
|
232
231
|
return self._retarget(tbl_versions)
|
|
233
232
|
|
|
234
233
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
235
|
-
from .column_ref import ColumnRef
|
|
236
|
-
if isinstance(self, ColumnRef):
|
|
237
|
-
target = tbl_versions[self.col.tbl.id]
|
|
238
|
-
assert self.col.id in target.cols_by_id
|
|
239
|
-
col = target.cols_by_id[self.col.id]
|
|
240
|
-
return ColumnRef(col)
|
|
241
234
|
for i in range (len(self.components)):
|
|
242
235
|
self.components[i] = self.components[i]._retarget(tbl_versions)
|
|
243
236
|
return self
|
|
@@ -281,29 +274,32 @@ class Expr(abc.ABC):
|
|
|
281
274
|
"""
|
|
282
275
|
Iterate over all subexprs, including self.
|
|
283
276
|
"""
|
|
284
|
-
is_match =
|
|
285
|
-
|
|
286
|
-
|
|
277
|
+
is_match = isinstance(self, expr_class) if expr_class is not None else True
|
|
278
|
+
# apply filter after checking for expr_class
|
|
279
|
+
if filter is not None and is_match:
|
|
280
|
+
is_match = filter(self)
|
|
287
281
|
if not is_match or traverse_matches:
|
|
288
282
|
for c in self.components:
|
|
289
283
|
yield from c.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
|
|
290
284
|
if is_match:
|
|
291
|
-
yield self
|
|
285
|
+
yield self # type: ignore[misc]
|
|
292
286
|
|
|
293
287
|
@overload
|
|
288
|
+
@classmethod
|
|
294
289
|
def list_subexprs(
|
|
295
|
-
expr_list:
|
|
290
|
+
cls, expr_list: Iterable[Expr], *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
296
291
|
) -> Iterator[Expr]: ...
|
|
297
292
|
|
|
298
293
|
@overload
|
|
294
|
+
@classmethod
|
|
299
295
|
def list_subexprs(
|
|
300
|
-
expr_list:
|
|
296
|
+
cls, expr_list: Iterable[Expr], expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
|
|
301
297
|
traverse_matches: bool = True
|
|
302
298
|
) -> Iterator[T]: ...
|
|
303
299
|
|
|
304
300
|
@classmethod
|
|
305
301
|
def list_subexprs(
|
|
306
|
-
cls, expr_list:
|
|
302
|
+
cls, expr_list: Iterable[Expr], expr_class: Optional[type[T]] = None,
|
|
307
303
|
filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
308
304
|
) -> Iterator[T]:
|
|
309
305
|
"""Produce subexprs for all exprs in list. Can contain duplicates."""
|
|
@@ -312,13 +308,11 @@ class Expr(abc.ABC):
|
|
|
312
308
|
|
|
313
309
|
def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
|
|
314
310
|
"""
|
|
315
|
-
Returns True if any subexpr is an instance of cls.
|
|
311
|
+
Returns True if any subexpr is an instance of cls and/or matches filter.
|
|
316
312
|
"""
|
|
317
|
-
assert
|
|
318
|
-
if cls is not None:
|
|
319
|
-
filter = lambda e: isinstance(e, cls)
|
|
313
|
+
assert cls is not None or filter is not None
|
|
320
314
|
try:
|
|
321
|
-
_ = next(self.subexprs(filter=filter, traverse_matches=False))
|
|
315
|
+
_ = next(self.subexprs(expr_class=cls, filter=filter, traverse_matches=False))
|
|
322
316
|
return True
|
|
323
317
|
except StopIteration:
|
|
324
318
|
return False
|
|
@@ -330,11 +324,8 @@ class Expr(abc.ABC):
|
|
|
330
324
|
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
331
325
|
|
|
332
326
|
@classmethod
|
|
333
|
-
def
|
|
334
|
-
|
|
335
|
-
for e in expr_list:
|
|
336
|
-
ids.update(e.tbl_ids())
|
|
337
|
-
return ids
|
|
327
|
+
def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
|
|
328
|
+
return set(tbl_id for e in exprs_ for tbl_id in e.tbl_ids())
|
|
338
329
|
|
|
339
330
|
@classmethod
|
|
340
331
|
def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
|
|
@@ -384,7 +375,7 @@ class Expr(abc.ABC):
|
|
|
384
375
|
pass
|
|
385
376
|
|
|
386
377
|
@abc.abstractmethod
|
|
387
|
-
def eval(self, data_row: DataRow, row_builder: '
|
|
378
|
+
def eval(self, data_row: DataRow, row_builder: 'exprs.RowBuilder') -> None:
|
|
388
379
|
"""
|
|
389
380
|
Compute the expr value for data_row and store the result in data_row[slot_idx].
|
|
390
381
|
Not called if sql_expr() != None (exception: Literal).
|
|
@@ -450,18 +441,20 @@ class Expr(abc.ABC):
|
|
|
450
441
|
def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
|
|
451
442
|
assert False, 'not implemented'
|
|
452
443
|
|
|
453
|
-
def isin(self, value_set: Any) -> '
|
|
444
|
+
def isin(self, value_set: Any) -> 'exprs.InPredicate':
|
|
454
445
|
from .in_predicate import InPredicate
|
|
455
446
|
if isinstance(value_set, Expr):
|
|
456
447
|
return InPredicate(self, value_set_expr=value_set)
|
|
457
448
|
else:
|
|
458
449
|
return InPredicate(self, value_set_literal=value_set)
|
|
459
450
|
|
|
460
|
-
def astype(self, new_type: ts.ColumnType) -> '
|
|
451
|
+
def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
|
|
461
452
|
from pixeltable.exprs import TypeCast
|
|
462
|
-
return TypeCast(self, new_type)
|
|
453
|
+
return TypeCast(self, ts.ColumnType.normalize_type(new_type))
|
|
463
454
|
|
|
464
|
-
def apply(self, fn: Callable, *, col_type:
|
|
455
|
+
def apply(self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None) -> 'exprs.FunctionCall':
|
|
456
|
+
if col_type is not None:
|
|
457
|
+
col_type = ts.ColumnType.normalize_type(col_type)
|
|
465
458
|
function = self._make_applicator_function(fn, col_type)
|
|
466
459
|
# Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
|
|
467
460
|
return function(self)
|
|
@@ -474,23 +467,32 @@ class Expr(abc.ABC):
|
|
|
474
467
|
]
|
|
475
468
|
return attrs
|
|
476
469
|
|
|
470
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
471
|
+
raise NotImplementedError(f'Expression of type `{type(self)}` is not callable')
|
|
472
|
+
|
|
477
473
|
def __getitem__(self, index: object) -> Expr:
|
|
478
474
|
if self.col_type.is_json_type():
|
|
479
475
|
from .json_path import JsonPath
|
|
480
|
-
return JsonPath(self)
|
|
476
|
+
return JsonPath(self)[index]
|
|
481
477
|
if self.col_type.is_array_type():
|
|
482
478
|
from .array_slice import ArraySlice
|
|
479
|
+
if not isinstance(index, tuple):
|
|
480
|
+
index = (index,)
|
|
481
|
+
if any(not isinstance(i, (int, slice)) for i in index):
|
|
482
|
+
raise AttributeError(f'Invalid array indices: {index}')
|
|
483
483
|
return ArraySlice(self, index)
|
|
484
484
|
raise AttributeError(f'Type {self.col_type} is not subscriptable')
|
|
485
485
|
|
|
486
|
-
def __getattr__(self, name: str) ->
|
|
486
|
+
def __getattr__(self, name: str) -> 'exprs.Expr':
|
|
487
487
|
"""
|
|
488
488
|
ex.: <img col>.rotate(60)
|
|
489
489
|
"""
|
|
490
|
+
from .json_path import JsonPath
|
|
491
|
+
from .method_ref import MethodRef
|
|
490
492
|
if self.col_type.is_json_type():
|
|
491
|
-
return
|
|
493
|
+
return JsonPath(self).__getattr__(name)
|
|
492
494
|
else:
|
|
493
|
-
method_ref =
|
|
495
|
+
method_ref = MethodRef(self, name)
|
|
494
496
|
if method_ref.fn.is_property:
|
|
495
497
|
# Marked as a property, so autoinvoke the method to obtain a `FunctionCall`
|
|
496
498
|
assert method_ref.fn.arity == 1
|
|
@@ -503,32 +505,32 @@ class Expr(abc.ABC):
|
|
|
503
505
|
raise TypeError(
|
|
504
506
|
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
|
|
505
507
|
|
|
506
|
-
def __lt__(self, other: object) -> '
|
|
508
|
+
def __lt__(self, other: object) -> 'exprs.Comparison':
|
|
507
509
|
return self._make_comparison(ComparisonOperator.LT, other)
|
|
508
510
|
|
|
509
|
-
def __le__(self, other: object) -> '
|
|
511
|
+
def __le__(self, other: object) -> 'exprs.Comparison':
|
|
510
512
|
return self._make_comparison(ComparisonOperator.LE, other)
|
|
511
513
|
|
|
512
|
-
def __eq__(self, other: object) -> '
|
|
514
|
+
def __eq__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
|
|
513
515
|
if other is None:
|
|
514
516
|
from .is_null import IsNull
|
|
515
517
|
return IsNull(self)
|
|
516
518
|
return self._make_comparison(ComparisonOperator.EQ, other)
|
|
517
519
|
|
|
518
|
-
def __ne__(self, other: object) -> '
|
|
520
|
+
def __ne__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
|
|
519
521
|
if other is None:
|
|
520
522
|
from .compound_predicate import CompoundPredicate
|
|
521
523
|
from .is_null import IsNull
|
|
522
524
|
return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
|
|
523
525
|
return self._make_comparison(ComparisonOperator.NE, other)
|
|
524
526
|
|
|
525
|
-
def __gt__(self, other: object) -> '
|
|
527
|
+
def __gt__(self, other: object) -> 'exprs.Comparison':
|
|
526
528
|
return self._make_comparison(ComparisonOperator.GT, other)
|
|
527
529
|
|
|
528
|
-
def __ge__(self, other: object) -> '
|
|
530
|
+
def __ge__(self, other: object) -> 'exprs.Comparison':
|
|
529
531
|
return self._make_comparison(ComparisonOperator.GE, other)
|
|
530
532
|
|
|
531
|
-
def _make_comparison(self, op: ComparisonOperator, other: object) -> '
|
|
533
|
+
def _make_comparison(self, op: ComparisonOperator, other: object) -> 'exprs.Comparison':
|
|
532
534
|
"""
|
|
533
535
|
other: Union[Expr, LiteralPythonTypes]
|
|
534
536
|
"""
|
|
@@ -538,49 +540,49 @@ class Expr(abc.ABC):
|
|
|
538
540
|
if isinstance(other, Expr):
|
|
539
541
|
return Comparison(op, self, other)
|
|
540
542
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
541
|
-
return Comparison(op, self, Literal(other))
|
|
543
|
+
return Comparison(op, self, Literal(other))
|
|
542
544
|
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
543
545
|
|
|
544
|
-
def __neg__(self) -> '
|
|
546
|
+
def __neg__(self) -> 'exprs.ArithmeticExpr':
|
|
545
547
|
return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
|
|
546
548
|
|
|
547
|
-
def __add__(self, other: object) -> '
|
|
549
|
+
def __add__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
548
550
|
return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
|
|
549
551
|
|
|
550
|
-
def __sub__(self, other: object) -> '
|
|
552
|
+
def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
551
553
|
return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
|
|
552
554
|
|
|
553
|
-
def __mul__(self, other: object) -> '
|
|
555
|
+
def __mul__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
554
556
|
return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
|
|
555
557
|
|
|
556
|
-
def __truediv__(self, other: object) -> '
|
|
558
|
+
def __truediv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
557
559
|
return self._make_arithmetic_expr(ArithmeticOperator.DIV, other)
|
|
558
560
|
|
|
559
|
-
def __mod__(self, other: object) -> '
|
|
561
|
+
def __mod__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
560
562
|
return self._make_arithmetic_expr(ArithmeticOperator.MOD, other)
|
|
561
563
|
|
|
562
|
-
def __floordiv__(self, other: object) -> '
|
|
564
|
+
def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
563
565
|
return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
|
|
564
566
|
|
|
565
|
-
def __radd__(self, other: object) -> '
|
|
567
|
+
def __radd__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
566
568
|
return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
|
|
567
569
|
|
|
568
|
-
def __rsub__(self, other: object) -> '
|
|
570
|
+
def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
569
571
|
return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
|
|
570
572
|
|
|
571
|
-
def __rmul__(self, other: object) -> '
|
|
573
|
+
def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
572
574
|
return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
|
|
573
575
|
|
|
574
|
-
def __rtruediv__(self, other: object) -> '
|
|
576
|
+
def __rtruediv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
575
577
|
return self._rmake_arithmetic_expr(ArithmeticOperator.DIV, other)
|
|
576
578
|
|
|
577
|
-
def __rmod__(self, other: object) -> '
|
|
579
|
+
def __rmod__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
578
580
|
return self._rmake_arithmetic_expr(ArithmeticOperator.MOD, other)
|
|
579
581
|
|
|
580
|
-
def __rfloordiv__(self, other: object) -> '
|
|
582
|
+
def __rfloordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
581
583
|
return self._rmake_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
|
|
582
584
|
|
|
583
|
-
def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> '
|
|
585
|
+
def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
|
|
584
586
|
"""
|
|
585
587
|
other: Union[Expr, LiteralPythonTypes]
|
|
586
588
|
"""
|
|
@@ -590,10 +592,10 @@ class Expr(abc.ABC):
|
|
|
590
592
|
if isinstance(other, Expr):
|
|
591
593
|
return ArithmeticExpr(op, self, other)
|
|
592
594
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
593
|
-
return ArithmeticExpr(op, self, Literal(other))
|
|
595
|
+
return ArithmeticExpr(op, self, Literal(other))
|
|
594
596
|
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
595
597
|
|
|
596
|
-
def _rmake_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> '
|
|
598
|
+
def _rmake_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
|
|
597
599
|
"""
|
|
598
600
|
Right-handed version of _make_arithmetic_expr. other must be a literal; if it were an Expr,
|
|
599
601
|
the operation would have already been evaluated in its left-handed form.
|
|
@@ -603,7 +605,7 @@ class Expr(abc.ABC):
|
|
|
603
605
|
from .literal import Literal
|
|
604
606
|
assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
|
|
605
607
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
606
|
-
return ArithmeticExpr(op, Literal(other), self)
|
|
608
|
+
return ArithmeticExpr(op, Literal(other), self)
|
|
607
609
|
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
608
610
|
|
|
609
611
|
def __and__(self, other: object) -> Expr:
|
|
@@ -638,7 +640,7 @@ class Expr(abc.ABC):
|
|
|
638
640
|
else:
|
|
639
641
|
return [], self
|
|
640
642
|
|
|
641
|
-
def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> '
|
|
643
|
+
def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'func.Function':
|
|
642
644
|
"""
|
|
643
645
|
Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
|
|
644
646
|
the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import Generic, TypeVar, Optional, Iterator, Iterable
|
|
2
|
+
|
|
3
|
+
T = TypeVar('T')
|
|
4
|
+
|
|
5
|
+
from .expr import Expr
|
|
6
|
+
|
|
7
|
+
class ExprDict(Generic[T]):
|
|
8
|
+
"""
|
|
9
|
+
A dictionary that maps Expr instances to values of type T.
|
|
10
|
+
|
|
11
|
+
We cannot use dict[Expr, T] because Expr.__eq__() serves a different purpose than the default __eq__.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
_data: dict[int, tuple[Expr, T]]
|
|
15
|
+
|
|
16
|
+
def __init__(self, iterable: Optional[Iterable[tuple[Expr, T]]] = None):
|
|
17
|
+
self._data = {}
|
|
18
|
+
|
|
19
|
+
if iterable is not None:
|
|
20
|
+
for key, value in iterable:
|
|
21
|
+
self[key] = value
|
|
22
|
+
|
|
23
|
+
def __setitem__(self, key: Expr, value: T) -> None:
|
|
24
|
+
self._data[key.id] = (key, value)
|
|
25
|
+
|
|
26
|
+
def __getitem__(self, key: Expr) -> T:
|
|
27
|
+
return self._data[key.id][1]
|
|
28
|
+
|
|
29
|
+
def __delitem__(self, key: Expr) -> None:
|
|
30
|
+
del self._data[key.id]
|
|
31
|
+
|
|
32
|
+
def __len__(self) -> int:
|
|
33
|
+
return len(self._data)
|
|
34
|
+
|
|
35
|
+
def __iter__(self) -> Iterator[Expr]:
|
|
36
|
+
return (expr for expr, _ in self._data.values())
|
|
37
|
+
|
|
38
|
+
def __contains__(self, key: Expr) -> bool:
|
|
39
|
+
return key.id in self._data
|
|
40
|
+
|
|
41
|
+
def get(self, key: Expr, default: Optional[T] = None) -> Optional[T]:
|
|
42
|
+
item = self._data.get(key.id)
|
|
43
|
+
return item[1] if item is not None else default
|
|
44
|
+
|
|
45
|
+
def clear(self) -> None:
|
|
46
|
+
self._data.clear()
|
|
47
|
+
|
|
48
|
+
def keys(self) -> Iterator[Expr]:
|
|
49
|
+
return self.__iter__()
|
|
50
|
+
|
|
51
|
+
def values(self) -> Iterator[T]:
|
|
52
|
+
return (value for _, value in self._data.values())
|
|
53
|
+
|
|
54
|
+
def items(self) -> Iterator[tuple[Expr, T]]:
|
|
55
|
+
return ((expr, value) for expr, value in self._data.values())
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,25 +1,26 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, Iterable, Iterator
|
|
3
|
+
from typing import Optional, Iterable, Iterator, TypeVar, Generic
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
7
|
+
T = TypeVar('T', bound='Expr')
|
|
7
8
|
|
|
8
|
-
class ExprSet:
|
|
9
|
+
class ExprSet(Generic[T]):
|
|
9
10
|
"""
|
|
10
11
|
A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
|
|
11
12
|
"""
|
|
12
|
-
exprs: dict[int,
|
|
13
|
-
exprs_by_idx: dict[int,
|
|
13
|
+
exprs: dict[int, T] # key: Expr.id
|
|
14
|
+
exprs_by_idx: dict[int, T] # key: slot_idx
|
|
14
15
|
|
|
15
|
-
def __init__(self, elements: Optional[Iterable[
|
|
16
|
+
def __init__(self, elements: Optional[Iterable[T]] = None):
|
|
16
17
|
self.exprs = {}
|
|
17
18
|
self.exprs_by_idx = {}
|
|
18
19
|
if elements is not None:
|
|
19
20
|
for e in elements:
|
|
20
21
|
self.add(e)
|
|
21
22
|
|
|
22
|
-
def add(self, expr:
|
|
23
|
+
def add(self, expr: T) -> None:
|
|
23
24
|
if expr.id in self.exprs:
|
|
24
25
|
return
|
|
25
26
|
self.exprs[expr.id] = expr
|
|
@@ -27,24 +28,22 @@ class ExprSet:
|
|
|
27
28
|
return
|
|
28
29
|
self.exprs_by_idx[expr.slot_idx] = expr
|
|
29
30
|
|
|
30
|
-
def update(self, *others: Iterable[
|
|
31
|
+
def update(self, *others: Iterable[T]) -> None:
|
|
31
32
|
for other in others:
|
|
32
33
|
for e in other:
|
|
33
34
|
self.add(e)
|
|
34
35
|
|
|
35
|
-
def __contains__(self, item:
|
|
36
|
+
def __contains__(self, item: T) -> bool:
|
|
36
37
|
return item.id in self.exprs
|
|
37
38
|
|
|
38
39
|
def __len__(self) -> int:
|
|
39
40
|
return len(self.exprs)
|
|
40
41
|
|
|
41
|
-
def __iter__(self) -> Iterator[
|
|
42
|
+
def __iter__(self) -> Iterator[T]:
|
|
42
43
|
return iter(self.exprs.values())
|
|
43
44
|
|
|
44
|
-
def __getitem__(self, index: object) -> Optional[
|
|
45
|
+
def __getitem__(self, index: object) -> Optional[T]:
|
|
45
46
|
"""Indexed lookup by slot_idx or Expr.id."""
|
|
46
|
-
if not isinstance(index, int) and not isinstance(index, Expr):
|
|
47
|
-
pass
|
|
48
47
|
assert isinstance(index, int) or isinstance(index, Expr)
|
|
49
48
|
if isinstance(index, int):
|
|
50
49
|
# return expr with matching slot_idx
|
|
@@ -52,11 +51,23 @@ class ExprSet:
|
|
|
52
51
|
else:
|
|
53
52
|
return self.exprs.get(index.id)
|
|
54
53
|
|
|
55
|
-
def issuperset(self, other: ExprSet) -> bool:
|
|
54
|
+
def issuperset(self, other: ExprSet[T]) -> bool:
|
|
56
55
|
return self.exprs.keys() >= other.exprs.keys()
|
|
57
56
|
|
|
58
|
-
def __ge__(self, other: ExprSet) -> bool:
|
|
57
|
+
def __ge__(self, other: ExprSet[T]) -> bool:
|
|
59
58
|
return self.issuperset(other)
|
|
60
59
|
|
|
61
|
-
def __le__(self, other: ExprSet) -> bool:
|
|
60
|
+
def __le__(self, other: ExprSet[T]) -> bool:
|
|
62
61
|
return other.issuperset(self)
|
|
62
|
+
|
|
63
|
+
def difference(self, *others: Iterable[T]) -> ExprSet[T]:
|
|
64
|
+
id_diff = set(self.exprs.keys()).difference(e.id for other_set in others for e in other_set)
|
|
65
|
+
return ExprSet(self.exprs[id] for id in id_diff)
|
|
66
|
+
|
|
67
|
+
def __sub__(self, other: ExprSet[T]) -> ExprSet[T]:
|
|
68
|
+
return self.difference(other)
|
|
69
|
+
|
|
70
|
+
def __add__(self, other: ExprSet) -> ExprSet:
|
|
71
|
+
exprs = self.exprs.copy()
|
|
72
|
+
exprs.update(other.exprs)
|
|
73
|
+
return ExprSet(exprs.values())
|
|
@@ -50,14 +50,29 @@ class FunctionCall(Expr):
|
|
|
50
50
|
if group_by_clause is None:
|
|
51
51
|
group_by_clause = []
|
|
52
52
|
signature = fn.signature
|
|
53
|
-
|
|
53
|
+
return_type = fn.call_return_type(bound_args)
|
|
54
54
|
self.fn = fn
|
|
55
55
|
self.is_method_call = is_method_call
|
|
56
56
|
self.normalize_args(fn.name, signature, bound_args)
|
|
57
57
|
|
|
58
|
+
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
59
|
+
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
60
|
+
# `None` when any of its non-nullable inputs are `None`.
|
|
61
|
+
for arg_name, arg in bound_args.items():
|
|
62
|
+
param = signature.parameters[arg_name]
|
|
63
|
+
if (
|
|
64
|
+
param.col_type is not None and not param.col_type.nullable
|
|
65
|
+
and isinstance(arg, Expr) and arg.col_type.nullable
|
|
66
|
+
):
|
|
67
|
+
return_type = return_type.copy(nullable=True)
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
super().__init__(return_type)
|
|
71
|
+
|
|
58
72
|
self.agg_init_args = {}
|
|
59
73
|
if self.is_agg_fn_call:
|
|
60
74
|
# we separate out the init args for the aggregator
|
|
75
|
+
assert isinstance(fn, func.AggregateFunction)
|
|
61
76
|
self.agg_init_args = {
|
|
62
77
|
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
|
|
63
78
|
}
|
|
@@ -71,17 +86,17 @@ class FunctionCall(Expr):
|
|
|
71
86
|
self.arg_types = []
|
|
72
87
|
self.kwarg_types = {}
|
|
73
88
|
# the prefix of parameters that are bound can be passed by position
|
|
74
|
-
for
|
|
75
|
-
if
|
|
89
|
+
for py_param in fn.signature.py_signature.parameters.values():
|
|
90
|
+
if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
76
91
|
break
|
|
77
|
-
arg = bound_args[
|
|
92
|
+
arg = bound_args[py_param.name]
|
|
78
93
|
if isinstance(arg, Expr):
|
|
79
94
|
self.args.append((len(self.components), None))
|
|
80
95
|
self.components.append(arg.copy())
|
|
81
96
|
else:
|
|
82
97
|
self.args.append((None, arg))
|
|
83
|
-
if
|
|
84
|
-
self.arg_types.append(signature.parameters[
|
|
98
|
+
if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
99
|
+
self.arg_types.append(signature.parameters[py_param.name].col_type)
|
|
85
100
|
|
|
86
101
|
# the remaining args are passed as keywords
|
|
87
102
|
kw_param_names = set(bound_args.keys()) - set(list(fn.signature.py_signature.parameters.keys())[:len(self.args)])
|
|
@@ -138,13 +153,11 @@ class FunctionCall(Expr):
|
|
|
138
153
|
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
139
154
|
|
|
140
155
|
def default_column_name(self) -> Optional[str]:
|
|
141
|
-
|
|
142
|
-
return self.fn.name
|
|
143
|
-
return super().default_column_name()
|
|
156
|
+
return self.fn.name
|
|
144
157
|
|
|
145
158
|
@classmethod
|
|
146
159
|
def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
|
|
147
|
-
"""Converts
|
|
160
|
+
"""Converts args to Exprs where appropriate and checks that they are compatible with signature.
|
|
148
161
|
|
|
149
162
|
Updates bound_args in place, where necessary.
|
|
150
163
|
"""
|
|
@@ -263,6 +276,7 @@ class FunctionCall(Expr):
|
|
|
263
276
|
for param_name, (idx, arg) in self.kwargs.items()
|
|
264
277
|
])
|
|
265
278
|
if len(self.order_by) > 0:
|
|
279
|
+
assert isinstance(self.fn, func.AggregateFunction)
|
|
266
280
|
if self.fn.requires_order_by:
|
|
267
281
|
arg_strs.insert(0, Expr.print_list(self.order_by))
|
|
268
282
|
else:
|
|
@@ -273,7 +287,7 @@ class FunctionCall(Expr):
|
|
|
273
287
|
separator = ', ' if inline else ',\n '
|
|
274
288
|
return separator.join(arg_strs)
|
|
275
289
|
|
|
276
|
-
def has_group_by(self) ->
|
|
290
|
+
def has_group_by(self) -> bool:
|
|
277
291
|
return self.group_by_stop_idx != 0
|
|
278
292
|
|
|
279
293
|
@property
|
|
@@ -286,14 +300,19 @@ class FunctionCall(Expr):
|
|
|
286
300
|
|
|
287
301
|
@property
|
|
288
302
|
def is_window_fn_call(self) -> bool:
|
|
289
|
-
return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
303
|
+
return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and (
|
|
304
|
+
not self.fn.allows_std_agg
|
|
305
|
+
or self.has_group_by()
|
|
306
|
+
or (len(self.order_by) > 0 and not self.fn.requires_order_by)
|
|
307
|
+
)
|
|
293
308
|
|
|
294
309
|
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
295
310
|
return self.group_by, self.order_by
|
|
296
311
|
|
|
312
|
+
def get_window_ordering(self) -> list[tuple[Expr, bool]]:
|
|
313
|
+
# ordering is implicitly ascending
|
|
314
|
+
return [(e, None) for e in self.group_by] + [(e, True) for e in self.order_by]
|
|
315
|
+
|
|
297
316
|
@property
|
|
298
317
|
def is_agg_fn_call(self) -> bool:
|
|
299
318
|
return isinstance(self.fn, func.AggregateFunction)
|
|
@@ -303,6 +322,10 @@ class FunctionCall(Expr):
|
|
|
303
322
|
return self.order_by
|
|
304
323
|
|
|
305
324
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
325
|
+
# we currently can't translate aggregate functions with grouping and/or ordering to SQL
|
|
326
|
+
if self.has_group_by() or len(self.order_by) > 0:
|
|
327
|
+
return None
|
|
328
|
+
|
|
306
329
|
# try to construct args and kwargs to call self.fn._to_sql()
|
|
307
330
|
kwargs: dict[str, sql.ColumnElement] = {}
|
|
308
331
|
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
@@ -374,6 +397,18 @@ class FunctionCall(Expr):
|
|
|
374
397
|
return args, kwargs
|
|
375
398
|
|
|
376
399
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
400
|
+
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
401
|
+
# we need to evaluate the template
|
|
402
|
+
# TODO: can we get rid of this extra copy?
|
|
403
|
+
fn_expr = self.components[self.fn_expr_idx]
|
|
404
|
+
data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
|
|
405
|
+
return
|
|
406
|
+
elif self.is_agg_fn_call and not self.is_window_fn_call:
|
|
407
|
+
if self.aggregator is None:
|
|
408
|
+
pass
|
|
409
|
+
data_row[self.slot_idx] = self.aggregator.value()
|
|
410
|
+
return
|
|
411
|
+
|
|
377
412
|
args, kwargs = self._make_args(data_row)
|
|
378
413
|
signature = self.fn.signature
|
|
379
414
|
if signature.parameters is not None:
|
|
@@ -389,15 +424,11 @@ class FunctionCall(Expr):
|
|
|
389
424
|
data_row[self.slot_idx] = None
|
|
390
425
|
return
|
|
391
426
|
|
|
392
|
-
if isinstance(self.fn, func.
|
|
393
|
-
# we need to evaluate the template
|
|
394
|
-
# TODO: can we get rid of this extra copy?
|
|
395
|
-
fn_expr = self.components[self.fn_expr_idx]
|
|
396
|
-
data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
|
|
397
|
-
elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
|
|
427
|
+
if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
|
|
398
428
|
# optimization: avoid additional level of indirection we'd get from calling Function.exec()
|
|
399
429
|
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
400
430
|
elif self.is_window_fn_call:
|
|
431
|
+
assert isinstance(self.fn, func.AggregateFunction)
|
|
401
432
|
if self.has_group_by():
|
|
402
433
|
if self.current_partition_vals is None:
|
|
403
434
|
self.current_partition_vals = [None] * len(self.group_by)
|
|
@@ -410,8 +441,6 @@ class FunctionCall(Expr):
|
|
|
410
441
|
self.aggregator = self.fn.agg_cls(**self.agg_init_args)
|
|
411
442
|
self.aggregator.update(*args)
|
|
412
443
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
413
|
-
elif self.is_agg_fn_call:
|
|
414
|
-
data_row[self.slot_idx] = self.aggregator.value()
|
|
415
444
|
else:
|
|
416
445
|
data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
|
|
417
446
|
|
|
@@ -425,7 +454,7 @@ class FunctionCall(Expr):
|
|
|
425
454
|
return result
|
|
426
455
|
|
|
427
456
|
@classmethod
|
|
428
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
457
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
|
|
429
458
|
assert 'fn' in d
|
|
430
459
|
assert 'args' in d
|
|
431
460
|
assert 'kwargs' in d
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -5,7 +5,7 @@ import enum
|
|
|
5
5
|
from typing import Union
|
|
6
6
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime
|
|
8
|
+
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
|
|
9
9
|
|
|
10
10
|
def print_slice(s: slice) -> str:
|
|
11
11
|
start_str = f'{str(s.start) if s.start is not None else ""}'
|
|
@@ -35,6 +35,7 @@ class ComparisonOperator(enum.Enum):
|
|
|
35
35
|
return '>'
|
|
36
36
|
if self == self.GE:
|
|
37
37
|
return '>='
|
|
38
|
+
assert False
|
|
38
39
|
|
|
39
40
|
def reverse(self) -> ComparisonOperator:
|
|
40
41
|
if self == self.LT:
|
|
@@ -60,6 +61,7 @@ class LogicalOperator(enum.Enum):
|
|
|
60
61
|
return '|'
|
|
61
62
|
if self == self.NOT:
|
|
62
63
|
return '~'
|
|
64
|
+
assert False
|
|
63
65
|
|
|
64
66
|
|
|
65
67
|
class ArithmeticOperator(enum.Enum):
|
|
@@ -83,3 +85,4 @@ class ArithmeticOperator(enum.Enum):
|
|
|
83
85
|
return '%'
|
|
84
86
|
if self == self.FLOORDIV:
|
|
85
87
|
return '//'
|
|
88
|
+
assert False
|