pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (120) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/column.py +37 -11
  5. pixeltable/catalog/globals.py +21 -0
  6. pixeltable/catalog/insertable_table.py +6 -4
  7. pixeltable/catalog/table.py +227 -148
  8. pixeltable/catalog/table_version.py +66 -28
  9. pixeltable/catalog/table_version_path.py +0 -8
  10. pixeltable/catalog/view.py +18 -19
  11. pixeltable/dataframe.py +16 -32
  12. pixeltable/env.py +6 -1
  13. pixeltable/exec/__init__.py +1 -2
  14. pixeltable/exec/aggregation_node.py +27 -17
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/data_row_batch.py +9 -26
  17. pixeltable/exec/exec_node.py +36 -7
  18. pixeltable/exec/expr_eval_node.py +19 -11
  19. pixeltable/exec/in_memory_data_node.py +14 -11
  20. pixeltable/exec/sql_node.py +266 -138
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/arithmetic_expr.py +3 -1
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +93 -14
  26. pixeltable/exprs/comparison.py +5 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +56 -36
  29. pixeltable/exprs/expr.py +65 -63
  30. pixeltable/exprs/expr_dict.py +55 -0
  31. pixeltable/exprs/expr_set.py +26 -15
  32. pixeltable/exprs/function_call.py +53 -24
  33. pixeltable/exprs/globals.py +4 -1
  34. pixeltable/exprs/in_predicate.py +8 -7
  35. pixeltable/exprs/inline_expr.py +4 -4
  36. pixeltable/exprs/is_null.py +4 -4
  37. pixeltable/exprs/json_mapper.py +11 -12
  38. pixeltable/exprs/json_path.py +5 -10
  39. pixeltable/exprs/literal.py +5 -5
  40. pixeltable/exprs/method_ref.py +5 -4
  41. pixeltable/exprs/object_ref.py +2 -1
  42. pixeltable/exprs/row_builder.py +88 -36
  43. pixeltable/exprs/rowid_ref.py +14 -13
  44. pixeltable/exprs/similarity_expr.py +12 -7
  45. pixeltable/exprs/sql_element_cache.py +12 -6
  46. pixeltable/exprs/type_cast.py +8 -6
  47. pixeltable/exprs/variable.py +5 -4
  48. pixeltable/ext/functions/whisperx.py +7 -2
  49. pixeltable/func/aggregate_function.py +1 -1
  50. pixeltable/func/callable_function.py +2 -2
  51. pixeltable/func/function.py +11 -10
  52. pixeltable/func/function_registry.py +6 -7
  53. pixeltable/func/query_template_function.py +11 -12
  54. pixeltable/func/signature.py +17 -15
  55. pixeltable/func/udf.py +0 -4
  56. pixeltable/functions/__init__.py +2 -2
  57. pixeltable/functions/audio.py +4 -6
  58. pixeltable/functions/globals.py +84 -42
  59. pixeltable/functions/huggingface.py +31 -34
  60. pixeltable/functions/image.py +59 -45
  61. pixeltable/functions/json.py +0 -1
  62. pixeltable/functions/llama_cpp.py +106 -0
  63. pixeltable/functions/mistralai.py +2 -2
  64. pixeltable/functions/ollama.py +147 -0
  65. pixeltable/functions/openai.py +22 -25
  66. pixeltable/functions/replicate.py +72 -0
  67. pixeltable/functions/string.py +59 -50
  68. pixeltable/functions/timestamp.py +20 -20
  69. pixeltable/functions/together.py +2 -2
  70. pixeltable/functions/video.py +11 -20
  71. pixeltable/functions/whisper.py +2 -20
  72. pixeltable/globals.py +65 -74
  73. pixeltable/index/base.py +2 -2
  74. pixeltable/index/btree.py +20 -7
  75. pixeltable/index/embedding_index.py +12 -14
  76. pixeltable/io/__init__.py +1 -2
  77. pixeltable/io/external_store.py +11 -5
  78. pixeltable/io/fiftyone.py +178 -0
  79. pixeltable/io/globals.py +98 -2
  80. pixeltable/io/hf_datasets.py +1 -1
  81. pixeltable/io/label_studio.py +6 -6
  82. pixeltable/io/parquet.py +14 -13
  83. pixeltable/iterators/base.py +3 -2
  84. pixeltable/iterators/document.py +10 -8
  85. pixeltable/iterators/video.py +126 -60
  86. pixeltable/metadata/__init__.py +4 -3
  87. pixeltable/metadata/converters/convert_14.py +4 -2
  88. pixeltable/metadata/converters/convert_15.py +1 -1
  89. pixeltable/metadata/converters/convert_19.py +1 -0
  90. pixeltable/metadata/converters/convert_20.py +1 -1
  91. pixeltable/metadata/converters/convert_21.py +34 -0
  92. pixeltable/metadata/converters/util.py +54 -12
  93. pixeltable/metadata/notes.py +1 -0
  94. pixeltable/metadata/schema.py +40 -21
  95. pixeltable/plan.py +149 -165
  96. pixeltable/py.typed +0 -0
  97. pixeltable/store.py +57 -37
  98. pixeltable/tool/create_test_db_dump.py +6 -6
  99. pixeltable/tool/create_test_video.py +1 -1
  100. pixeltable/tool/doc_plugins/griffe.py +3 -34
  101. pixeltable/tool/embed_udf.py +1 -1
  102. pixeltable/tool/mypy_plugin.py +55 -0
  103. pixeltable/type_system.py +260 -61
  104. pixeltable/utils/arrow.py +10 -9
  105. pixeltable/utils/coco.py +4 -4
  106. pixeltable/utils/documents.py +16 -2
  107. pixeltable/utils/filecache.py +9 -9
  108. pixeltable/utils/formatter.py +10 -11
  109. pixeltable/utils/http_server.py +2 -5
  110. pixeltable/utils/media_store.py +6 -6
  111. pixeltable/utils/pytorch.py +10 -11
  112. pixeltable/utils/sql.py +2 -1
  113. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
  114. pixeltable-0.2.22.dist-info/RECORD +153 -0
  115. pixeltable/exec/media_validation_node.py +0 -43
  116. pixeltable/utils/help.py +0 -11
  117. pixeltable-0.2.20.dist-info/RECORD +0 -147
  118. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
  119. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
  120. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/expr.py CHANGED
@@ -7,13 +7,12 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload, Iterable
11
11
  from uuid import UUID
12
12
 
13
13
  import sqlalchemy as sql
14
- from typing_extensions import Self
14
+ from typing_extensions import _AnnotatedAlias, Self
15
15
 
16
- import pixeltable
17
16
  import pixeltable.catalog as catalog
18
17
  import pixeltable.exceptions as excs
19
18
  import pixeltable.func as func
@@ -91,7 +90,7 @@ class Expr(abc.ABC):
91
90
  result = c_scope
92
91
  return result
93
92
 
94
- def bind_rel_paths(self, mapper: Optional['pixeltable.exprs.JsonMapper'] = None) -> None:
93
+ def bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
95
94
  """
96
95
  Binds relative JsonPaths to mapper.
97
96
  This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
@@ -121,7 +120,7 @@ class Expr(abc.ABC):
121
120
  return False
122
121
  return self._equals(other)
123
122
 
124
- def _equals(self, other: Expr) -> bool:
123
+ def _equals(self, other: Self) -> bool:
125
124
  # we already compared the type and components in equals(); subclasses that require additional comparisons
126
125
  # override this
127
126
  return True
@@ -232,12 +231,6 @@ class Expr(abc.ABC):
232
231
  return self._retarget(tbl_versions)
233
232
 
234
233
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
235
- from .column_ref import ColumnRef
236
- if isinstance(self, ColumnRef):
237
- target = tbl_versions[self.col.tbl.id]
238
- assert self.col.id in target.cols_by_id
239
- col = target.cols_by_id[self.col.id]
240
- return ColumnRef(col)
241
234
  for i in range (len(self.components)):
242
235
  self.components[i] = self.components[i]._retarget(tbl_versions)
243
236
  return self
@@ -281,29 +274,32 @@ class Expr(abc.ABC):
281
274
  """
282
275
  Iterate over all subexprs, including self.
283
276
  """
284
- is_match = filter is None or filter(self)
285
- if expr_class is not None:
286
- is_match = is_match and isinstance(self, expr_class)
277
+ is_match = isinstance(self, expr_class) if expr_class is not None else True
278
+ # apply filter after checking for expr_class
279
+ if filter is not None and is_match:
280
+ is_match = filter(self)
287
281
  if not is_match or traverse_matches:
288
282
  for c in self.components:
289
283
  yield from c.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
290
284
  if is_match:
291
- yield self
285
+ yield self # type: ignore[misc]
292
286
 
293
287
  @overload
288
+ @classmethod
294
289
  def list_subexprs(
295
- expr_list: list[Expr], *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
290
+ cls, expr_list: Iterable[Expr], *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
296
291
  ) -> Iterator[Expr]: ...
297
292
 
298
293
  @overload
294
+ @classmethod
299
295
  def list_subexprs(
300
- expr_list: list[Expr], expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
296
+ cls, expr_list: Iterable[Expr], expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
301
297
  traverse_matches: bool = True
302
298
  ) -> Iterator[T]: ...
303
299
 
304
300
  @classmethod
305
301
  def list_subexprs(
306
- cls, expr_list: list[Expr], expr_class: Optional[type[T]] = None,
302
+ cls, expr_list: Iterable[Expr], expr_class: Optional[type[T]] = None,
307
303
  filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
308
304
  ) -> Iterator[T]:
309
305
  """Produce subexprs for all exprs in list. Can contain duplicates."""
@@ -312,13 +308,11 @@ class Expr(abc.ABC):
312
308
 
313
309
  def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
314
310
  """
315
- Returns True if any subexpr is an instance of cls.
311
+ Returns True if any subexpr is an instance of cls and/or matches filter.
316
312
  """
317
- assert (cls is not None) != (filter is not None) # need one of them
318
- if cls is not None:
319
- filter = lambda e: isinstance(e, cls)
313
+ assert cls is not None or filter is not None
320
314
  try:
321
- _ = next(self.subexprs(filter=filter, traverse_matches=False))
315
+ _ = next(self.subexprs(expr_class=cls, filter=filter, traverse_matches=False))
322
316
  return True
323
317
  except StopIteration:
324
318
  return False
@@ -330,11 +324,8 @@ class Expr(abc.ABC):
330
324
  return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
331
325
 
332
326
  @classmethod
333
- def list_tbl_ids(cls, expr_list: list[Expr]) -> set[UUID]:
334
- ids: set[UUID] = set()
335
- for e in expr_list:
336
- ids.update(e.tbl_ids())
337
- return ids
327
+ def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
328
+ return set(tbl_id for e in exprs_ for tbl_id in e.tbl_ids())
338
329
 
339
330
  @classmethod
340
331
  def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
@@ -384,7 +375,7 @@ class Expr(abc.ABC):
384
375
  pass
385
376
 
386
377
  @abc.abstractmethod
387
- def eval(self, data_row: DataRow, row_builder: 'pixeltable.exprs.RowBuilder') -> None:
378
+ def eval(self, data_row: DataRow, row_builder: 'exprs.RowBuilder') -> None:
388
379
  """
389
380
  Compute the expr value for data_row and store the result in data_row[slot_idx].
390
381
  Not called if sql_expr() != None (exception: Literal).
@@ -450,18 +441,20 @@ class Expr(abc.ABC):
450
441
  def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
451
442
  assert False, 'not implemented'
452
443
 
453
- def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
444
+ def isin(self, value_set: Any) -> 'exprs.InPredicate':
454
445
  from .in_predicate import InPredicate
455
446
  if isinstance(value_set, Expr):
456
447
  return InPredicate(self, value_set_expr=value_set)
457
448
  else:
458
449
  return InPredicate(self, value_set_literal=value_set)
459
450
 
460
- def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
451
+ def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
461
452
  from pixeltable.exprs import TypeCast
462
- return TypeCast(self, new_type)
453
+ return TypeCast(self, ts.ColumnType.normalize_type(new_type))
463
454
 
464
- def apply(self, fn: Callable, *, col_type: Optional[ts.ColumnType] = None) -> 'pixeltable.exprs.FunctionCall':
455
+ def apply(self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None) -> 'exprs.FunctionCall':
456
+ if col_type is not None:
457
+ col_type = ts.ColumnType.normalize_type(col_type)
465
458
  function = self._make_applicator_function(fn, col_type)
466
459
  # Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
467
460
  return function(self)
@@ -474,23 +467,32 @@ class Expr(abc.ABC):
474
467
  ]
475
468
  return attrs
476
469
 
470
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
471
+ raise NotImplementedError(f'Expression of type `{type(self)}` is not callable')
472
+
477
473
  def __getitem__(self, index: object) -> Expr:
478
474
  if self.col_type.is_json_type():
479
475
  from .json_path import JsonPath
480
- return JsonPath(self).__getitem__(index)
476
+ return JsonPath(self)[index]
481
477
  if self.col_type.is_array_type():
482
478
  from .array_slice import ArraySlice
479
+ if not isinstance(index, tuple):
480
+ index = (index,)
481
+ if any(not isinstance(i, (int, slice)) for i in index):
482
+ raise AttributeError(f'Invalid array indices: {index}')
483
483
  return ArraySlice(self, index)
484
484
  raise AttributeError(f'Type {self.col_type} is not subscriptable')
485
485
 
486
- def __getattr__(self, name: str) -> Union['pixeltable.exprs.MethodRef', 'pixeltable.exprs.FunctionCall', 'pixeltable.exprs.JsonPath']:
486
+ def __getattr__(self, name: str) -> 'exprs.Expr':
487
487
  """
488
488
  ex.: <img col>.rotate(60)
489
489
  """
490
+ from .json_path import JsonPath
491
+ from .method_ref import MethodRef
490
492
  if self.col_type.is_json_type():
491
- return pixeltable.exprs.JsonPath(self).__getattr__(name)
493
+ return JsonPath(self).__getattr__(name)
492
494
  else:
493
- method_ref = pixeltable.exprs.MethodRef(self, name)
495
+ method_ref = MethodRef(self, name)
494
496
  if method_ref.fn.is_property:
495
497
  # Marked as a property, so autoinvoke the method to obtain a `FunctionCall`
496
498
  assert method_ref.fn.arity == 1
@@ -503,32 +505,32 @@ class Expr(abc.ABC):
503
505
  raise TypeError(
504
506
  'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
505
507
 
506
- def __lt__(self, other: object) -> 'pixeltable.exprs.Comparison':
508
+ def __lt__(self, other: object) -> 'exprs.Comparison':
507
509
  return self._make_comparison(ComparisonOperator.LT, other)
508
510
 
509
- def __le__(self, other: object) -> 'pixeltable.exprs.Comparison':
511
+ def __le__(self, other: object) -> 'exprs.Comparison':
510
512
  return self._make_comparison(ComparisonOperator.LE, other)
511
513
 
512
- def __eq__(self, other: object) -> 'pixeltable.exprs.Comparison':
514
+ def __eq__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
513
515
  if other is None:
514
516
  from .is_null import IsNull
515
517
  return IsNull(self)
516
518
  return self._make_comparison(ComparisonOperator.EQ, other)
517
519
 
518
- def __ne__(self, other: object) -> 'pixeltable.exprs.Comparison':
520
+ def __ne__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
519
521
  if other is None:
520
522
  from .compound_predicate import CompoundPredicate
521
523
  from .is_null import IsNull
522
524
  return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
523
525
  return self._make_comparison(ComparisonOperator.NE, other)
524
526
 
525
- def __gt__(self, other: object) -> 'pixeltable.exprs.Comparison':
527
+ def __gt__(self, other: object) -> 'exprs.Comparison':
526
528
  return self._make_comparison(ComparisonOperator.GT, other)
527
529
 
528
- def __ge__(self, other: object) -> 'pixeltable.exprs.Comparison':
530
+ def __ge__(self, other: object) -> 'exprs.Comparison':
529
531
  return self._make_comparison(ComparisonOperator.GE, other)
530
532
 
531
- def _make_comparison(self, op: ComparisonOperator, other: object) -> 'pixeltable.exprs.Comparison':
533
+ def _make_comparison(self, op: ComparisonOperator, other: object) -> 'exprs.Comparison':
532
534
  """
533
535
  other: Union[Expr, LiteralPythonTypes]
534
536
  """
@@ -538,49 +540,49 @@ class Expr(abc.ABC):
538
540
  if isinstance(other, Expr):
539
541
  return Comparison(op, self, other)
540
542
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
541
- return Comparison(op, self, Literal(other)) # type: ignore[arg-type]
543
+ return Comparison(op, self, Literal(other))
542
544
  raise TypeError(f'Other must be Expr or literal: {type(other)}')
543
545
 
544
- def __neg__(self) -> 'pixeltable.exprs.ArithmeticExpr':
546
+ def __neg__(self) -> 'exprs.ArithmeticExpr':
545
547
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
546
548
 
547
- def __add__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
549
+ def __add__(self, other: object) -> 'exprs.ArithmeticExpr':
548
550
  return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
549
551
 
550
- def __sub__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
552
+ def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
551
553
  return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
552
554
 
553
- def __mul__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
555
+ def __mul__(self, other: object) -> 'exprs.ArithmeticExpr':
554
556
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
555
557
 
556
- def __truediv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
558
+ def __truediv__(self, other: object) -> 'exprs.ArithmeticExpr':
557
559
  return self._make_arithmetic_expr(ArithmeticOperator.DIV, other)
558
560
 
559
- def __mod__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
561
+ def __mod__(self, other: object) -> 'exprs.ArithmeticExpr':
560
562
  return self._make_arithmetic_expr(ArithmeticOperator.MOD, other)
561
563
 
562
- def __floordiv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
564
+ def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
563
565
  return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
564
566
 
565
- def __radd__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
567
+ def __radd__(self, other: object) -> 'exprs.ArithmeticExpr':
566
568
  return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
567
569
 
568
- def __rsub__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
570
+ def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
569
571
  return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
570
572
 
571
- def __rmul__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
573
+ def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr':
572
574
  return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
573
575
 
574
- def __rtruediv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
576
+ def __rtruediv__(self, other: object) -> 'exprs.ArithmeticExpr':
575
577
  return self._rmake_arithmetic_expr(ArithmeticOperator.DIV, other)
576
578
 
577
- def __rmod__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
579
+ def __rmod__(self, other: object) -> 'exprs.ArithmeticExpr':
578
580
  return self._rmake_arithmetic_expr(ArithmeticOperator.MOD, other)
579
581
 
580
- def __rfloordiv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
582
+ def __rfloordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
581
583
  return self._rmake_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
582
584
 
583
- def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
585
+ def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
584
586
  """
585
587
  other: Union[Expr, LiteralPythonTypes]
586
588
  """
@@ -590,10 +592,10 @@ class Expr(abc.ABC):
590
592
  if isinstance(other, Expr):
591
593
  return ArithmeticExpr(op, self, other)
592
594
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
593
- return ArithmeticExpr(op, self, Literal(other)) # type: ignore[arg-type]
595
+ return ArithmeticExpr(op, self, Literal(other))
594
596
  raise TypeError(f'Other must be Expr or literal: {type(other)}')
595
597
 
596
- def _rmake_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
598
+ def _rmake_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
597
599
  """
598
600
  Right-handed version of _make_arithmetic_expr. other must be a literal; if it were an Expr,
599
601
  the operation would have already been evaluated in its left-handed form.
@@ -603,7 +605,7 @@ class Expr(abc.ABC):
603
605
  from .literal import Literal
604
606
  assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
605
607
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
606
- return ArithmeticExpr(op, Literal(other), self) # type: ignore[arg-type]
608
+ return ArithmeticExpr(op, Literal(other), self)
607
609
  raise TypeError(f'Other must be Expr or literal: {type(other)}')
608
610
 
609
611
  def __and__(self, other: object) -> Expr:
@@ -638,7 +640,7 @@ class Expr(abc.ABC):
638
640
  else:
639
641
  return [], self
640
642
 
641
- def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'pixeltable.func.Function':
643
+ def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'func.Function':
642
644
  """
643
645
  Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
644
646
  the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
@@ -0,0 +1,55 @@
1
+ from typing import Generic, TypeVar, Optional, Iterator, Iterable
2
+
3
+ T = TypeVar('T')
4
+
5
+ from .expr import Expr
6
+
7
+ class ExprDict(Generic[T]):
8
+ """
9
+ A dictionary that maps Expr instances to values of type T.
10
+
11
+ We cannot use dict[Expr, T] because Expr.__eq__() serves a different purpose than the default __eq__.
12
+ """
13
+
14
+ _data: dict[int, tuple[Expr, T]]
15
+
16
+ def __init__(self, iterable: Optional[Iterable[tuple[Expr, T]]] = None):
17
+ self._data = {}
18
+
19
+ if iterable is not None:
20
+ for key, value in iterable:
21
+ self[key] = value
22
+
23
+ def __setitem__(self, key: Expr, value: T) -> None:
24
+ self._data[key.id] = (key, value)
25
+
26
+ def __getitem__(self, key: Expr) -> T:
27
+ return self._data[key.id][1]
28
+
29
+ def __delitem__(self, key: Expr) -> None:
30
+ del self._data[key.id]
31
+
32
+ def __len__(self) -> int:
33
+ return len(self._data)
34
+
35
+ def __iter__(self) -> Iterator[Expr]:
36
+ return (expr for expr, _ in self._data.values())
37
+
38
+ def __contains__(self, key: Expr) -> bool:
39
+ return key.id in self._data
40
+
41
+ def get(self, key: Expr, default: Optional[T] = None) -> Optional[T]:
42
+ item = self._data.get(key.id)
43
+ return item[1] if item is not None else default
44
+
45
+ def clear(self) -> None:
46
+ self._data.clear()
47
+
48
+ def keys(self) -> Iterator[Expr]:
49
+ return self.__iter__()
50
+
51
+ def values(self) -> Iterator[T]:
52
+ return (value for _, value in self._data.values())
53
+
54
+ def items(self) -> Iterator[tuple[Expr, T]]:
55
+ return ((expr, value) for expr, value in self._data.values())
@@ -1,25 +1,26 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, Iterable, Iterator
3
+ from typing import Optional, Iterable, Iterator, TypeVar, Generic
4
4
 
5
5
  from .expr import Expr
6
6
 
7
+ T = TypeVar('T', bound='Expr')
7
8
 
8
- class ExprSet:
9
+ class ExprSet(Generic[T]):
9
10
  """
10
11
  A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
11
12
  """
12
- exprs: dict[int, Expr] # key: Expr.id
13
- exprs_by_idx: dict[int, Expr] # key: slot_idx
13
+ exprs: dict[int, T] # key: Expr.id
14
+ exprs_by_idx: dict[int, T] # key: slot_idx
14
15
 
15
- def __init__(self, elements: Optional[Iterable[Expr]] = None):
16
+ def __init__(self, elements: Optional[Iterable[T]] = None):
16
17
  self.exprs = {}
17
18
  self.exprs_by_idx = {}
18
19
  if elements is not None:
19
20
  for e in elements:
20
21
  self.add(e)
21
22
 
22
- def add(self, expr: Expr) -> None:
23
+ def add(self, expr: T) -> None:
23
24
  if expr.id in self.exprs:
24
25
  return
25
26
  self.exprs[expr.id] = expr
@@ -27,24 +28,22 @@ class ExprSet:
27
28
  return
28
29
  self.exprs_by_idx[expr.slot_idx] = expr
29
30
 
30
- def update(self, *others: Iterable[Expr]) -> None:
31
+ def update(self, *others: Iterable[T]) -> None:
31
32
  for other in others:
32
33
  for e in other:
33
34
  self.add(e)
34
35
 
35
- def __contains__(self, item: Expr) -> bool:
36
+ def __contains__(self, item: T) -> bool:
36
37
  return item.id in self.exprs
37
38
 
38
39
  def __len__(self) -> int:
39
40
  return len(self.exprs)
40
41
 
41
- def __iter__(self) -> Iterator[Expr]:
42
+ def __iter__(self) -> Iterator[T]:
42
43
  return iter(self.exprs.values())
43
44
 
44
- def __getitem__(self, index: object) -> Optional[Expr]:
45
+ def __getitem__(self, index: object) -> Optional[T]:
45
46
  """Indexed lookup by slot_idx or Expr.id."""
46
- if not isinstance(index, int) and not isinstance(index, Expr):
47
- pass
48
47
  assert isinstance(index, int) or isinstance(index, Expr)
49
48
  if isinstance(index, int):
50
49
  # return expr with matching slot_idx
@@ -52,11 +51,23 @@ class ExprSet:
52
51
  else:
53
52
  return self.exprs.get(index.id)
54
53
 
55
- def issuperset(self, other: ExprSet) -> bool:
54
+ def issuperset(self, other: ExprSet[T]) -> bool:
56
55
  return self.exprs.keys() >= other.exprs.keys()
57
56
 
58
- def __ge__(self, other: ExprSet) -> bool:
57
+ def __ge__(self, other: ExprSet[T]) -> bool:
59
58
  return self.issuperset(other)
60
59
 
61
- def __le__(self, other: ExprSet) -> bool:
60
+ def __le__(self, other: ExprSet[T]) -> bool:
62
61
  return other.issuperset(self)
62
+
63
+ def difference(self, *others: Iterable[T]) -> ExprSet[T]:
64
+ id_diff = set(self.exprs.keys()).difference(e.id for other_set in others for e in other_set)
65
+ return ExprSet(self.exprs[id] for id in id_diff)
66
+
67
+ def __sub__(self, other: ExprSet[T]) -> ExprSet[T]:
68
+ return self.difference(other)
69
+
70
+ def __add__(self, other: ExprSet) -> ExprSet:
71
+ exprs = self.exprs.copy()
72
+ exprs.update(other.exprs)
73
+ return ExprSet(exprs.values())
@@ -50,14 +50,29 @@ class FunctionCall(Expr):
50
50
  if group_by_clause is None:
51
51
  group_by_clause = []
52
52
  signature = fn.signature
53
- super().__init__(fn.call_return_type(bound_args))
53
+ return_type = fn.call_return_type(bound_args)
54
54
  self.fn = fn
55
55
  self.is_method_call = is_method_call
56
56
  self.normalize_args(fn.name, signature, bound_args)
57
57
 
58
+ # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
59
+ # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
60
+ # `None` when any of its non-nullable inputs are `None`.
61
+ for arg_name, arg in bound_args.items():
62
+ param = signature.parameters[arg_name]
63
+ if (
64
+ param.col_type is not None and not param.col_type.nullable
65
+ and isinstance(arg, Expr) and arg.col_type.nullable
66
+ ):
67
+ return_type = return_type.copy(nullable=True)
68
+ break
69
+
70
+ super().__init__(return_type)
71
+
58
72
  self.agg_init_args = {}
59
73
  if self.is_agg_fn_call:
60
74
  # we separate out the init args for the aggregator
75
+ assert isinstance(fn, func.AggregateFunction)
61
76
  self.agg_init_args = {
62
77
  arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
63
78
  }
@@ -71,17 +86,17 @@ class FunctionCall(Expr):
71
86
  self.arg_types = []
72
87
  self.kwarg_types = {}
73
88
  # the prefix of parameters that are bound can be passed by position
74
- for param in fn.signature.py_signature.parameters.values():
75
- if param.name not in bound_args or param.kind == inspect.Parameter.KEYWORD_ONLY:
89
+ for py_param in fn.signature.py_signature.parameters.values():
90
+ if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
76
91
  break
77
- arg = bound_args[param.name]
92
+ arg = bound_args[py_param.name]
78
93
  if isinstance(arg, Expr):
79
94
  self.args.append((len(self.components), None))
80
95
  self.components.append(arg.copy())
81
96
  else:
82
97
  self.args.append((None, arg))
83
- if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
84
- self.arg_types.append(signature.parameters[param.name].col_type)
98
+ if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
99
+ self.arg_types.append(signature.parameters[py_param.name].col_type)
85
100
 
86
101
  # the remaining args are passed as keywords
87
102
  kw_param_names = set(bound_args.keys()) - set(list(fn.signature.py_signature.parameters.keys())[:len(self.args)])
@@ -138,13 +153,11 @@ class FunctionCall(Expr):
138
153
  return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
139
154
 
140
155
  def default_column_name(self) -> Optional[str]:
141
- if self.fn.is_property:
142
- return self.fn.name
143
- return super().default_column_name()
156
+ return self.fn.name
144
157
 
145
158
  @classmethod
146
159
  def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
147
- """Converts all args to Exprs and checks that they are compatible with signature.
160
+ """Converts args to Exprs where appropriate and checks that they are compatible with signature.
148
161
 
149
162
  Updates bound_args in place, where necessary.
150
163
  """
@@ -263,6 +276,7 @@ class FunctionCall(Expr):
263
276
  for param_name, (idx, arg) in self.kwargs.items()
264
277
  ])
265
278
  if len(self.order_by) > 0:
279
+ assert isinstance(self.fn, func.AggregateFunction)
266
280
  if self.fn.requires_order_by:
267
281
  arg_strs.insert(0, Expr.print_list(self.order_by))
268
282
  else:
@@ -273,7 +287,7 @@ class FunctionCall(Expr):
273
287
  separator = ', ' if inline else ',\n '
274
288
  return separator.join(arg_strs)
275
289
 
276
- def has_group_by(self) -> list[Expr]:
290
+ def has_group_by(self) -> bool:
277
291
  return self.group_by_stop_idx != 0
278
292
 
279
293
  @property
@@ -286,14 +300,19 @@ class FunctionCall(Expr):
286
300
 
287
301
  @property
288
302
  def is_window_fn_call(self) -> bool:
289
- return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and \
290
- (not self.fn.allows_std_agg \
291
- or self.has_group_by() \
292
- or (len(self.order_by) > 0 and not self.fn.requires_order_by))
303
+ return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and (
304
+ not self.fn.allows_std_agg
305
+ or self.has_group_by()
306
+ or (len(self.order_by) > 0 and not self.fn.requires_order_by)
307
+ )
293
308
 
294
309
  def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
295
310
  return self.group_by, self.order_by
296
311
 
312
+ def get_window_ordering(self) -> list[tuple[Expr, bool]]:
313
+ # ordering is implicitly ascending
314
+ return [(e, None) for e in self.group_by] + [(e, True) for e in self.order_by]
315
+
297
316
  @property
298
317
  def is_agg_fn_call(self) -> bool:
299
318
  return isinstance(self.fn, func.AggregateFunction)
@@ -303,6 +322,10 @@ class FunctionCall(Expr):
303
322
  return self.order_by
304
323
 
305
324
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
325
+ # we currently can't translate aggregate functions with grouping and/or ordering to SQL
326
+ if self.has_group_by() or len(self.order_by) > 0:
327
+ return None
328
+
306
329
  # try to construct args and kwargs to call self.fn._to_sql()
307
330
  kwargs: dict[str, sql.ColumnElement] = {}
308
331
  for param_name, (component_idx, arg) in self.kwargs.items():
@@ -374,6 +397,18 @@ class FunctionCall(Expr):
374
397
  return args, kwargs
375
398
 
376
399
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
400
+ if isinstance(self.fn, func.ExprTemplateFunction):
401
+ # we need to evaluate the template
402
+ # TODO: can we get rid of this extra copy?
403
+ fn_expr = self.components[self.fn_expr_idx]
404
+ data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
405
+ return
406
+ elif self.is_agg_fn_call and not self.is_window_fn_call:
407
+ if self.aggregator is None:
408
+ pass
409
+ data_row[self.slot_idx] = self.aggregator.value()
410
+ return
411
+
377
412
  args, kwargs = self._make_args(data_row)
378
413
  signature = self.fn.signature
379
414
  if signature.parameters is not None:
@@ -389,15 +424,11 @@ class FunctionCall(Expr):
389
424
  data_row[self.slot_idx] = None
390
425
  return
391
426
 
392
- if isinstance(self.fn, func.ExprTemplateFunction):
393
- # we need to evaluate the template
394
- # TODO: can we get rid of this extra copy?
395
- fn_expr = self.components[self.fn_expr_idx]
396
- data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
397
- elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
427
+ if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
398
428
  # optimization: avoid additional level of indirection we'd get from calling Function.exec()
399
429
  data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
400
430
  elif self.is_window_fn_call:
431
+ assert isinstance(self.fn, func.AggregateFunction)
401
432
  if self.has_group_by():
402
433
  if self.current_partition_vals is None:
403
434
  self.current_partition_vals = [None] * len(self.group_by)
@@ -410,8 +441,6 @@ class FunctionCall(Expr):
410
441
  self.aggregator = self.fn.agg_cls(**self.agg_init_args)
411
442
  self.aggregator.update(*args)
412
443
  data_row[self.slot_idx] = self.aggregator.value()
413
- elif self.is_agg_fn_call:
414
- data_row[self.slot_idx] = self.aggregator.value()
415
444
  else:
416
445
  data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
417
446
 
@@ -425,7 +454,7 @@ class FunctionCall(Expr):
425
454
  return result
426
455
 
427
456
  @classmethod
428
- def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
457
+ def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
429
458
  assert 'fn' in d
430
459
  assert 'args' in d
431
460
  assert 'kwargs' in d
@@ -5,7 +5,7 @@ import enum
5
5
  from typing import Union
6
6
 
7
7
  # Python types corresponding to our literal types
8
- LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
8
+ LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
9
9
 
10
10
  def print_slice(s: slice) -> str:
11
11
  start_str = f'{str(s.start) if s.start is not None else ""}'
@@ -35,6 +35,7 @@ class ComparisonOperator(enum.Enum):
35
35
  return '>'
36
36
  if self == self.GE:
37
37
  return '>='
38
+ assert False
38
39
 
39
40
  def reverse(self) -> ComparisonOperator:
40
41
  if self == self.LT:
@@ -60,6 +61,7 @@ class LogicalOperator(enum.Enum):
60
61
  return '|'
61
62
  if self == self.NOT:
62
63
  return '~'
64
+ assert False
63
65
 
64
66
 
65
67
  class ArithmeticOperator(enum.Enum):
@@ -83,3 +85,4 @@ class ArithmeticOperator(enum.Enum):
83
85
  return '%'
84
86
  if self == self.FLOORDIV:
85
87
  return '//'
88
+ assert False