pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +25 -15
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +123 -103
  14. pixeltable/catalog/table_version.py +292 -143
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +68 -27
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +39 -23
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +18 -17
  27. pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
  28. pixeltable/exec/expr_eval/globals.py +33 -11
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +170 -42
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +31 -16
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +21 -15
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +214 -109
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +61 -28
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +3 -2
  101. pixeltable/io/label_studio.py +80 -71
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +10 -13
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +9 -2
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +130 -85
  129. pixeltable/utils/arrow.py +1 -7
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +44 -0
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +13 -8
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.1.dist-info/RECORD +0 -160
  145. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/expr.py CHANGED
@@ -7,11 +7,11 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload, Iterable
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import sqlalchemy as sql
14
- from typing_extensions import _AnnotatedAlias, Self
14
+ from typing_extensions import Self, _AnnotatedAlias
15
15
 
16
16
  import pixeltable.catalog as catalog
17
17
  import pixeltable.exceptions as excs
@@ -24,11 +24,13 @@ from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes,
24
24
  if TYPE_CHECKING:
25
25
  from pixeltable import exprs
26
26
 
27
+
27
28
  class ExprScope:
28
29
  """
29
30
  Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
30
31
  parent is None: outermost scope
31
32
  """
33
+
32
34
  def __init__(self, parent: Optional[ExprScope]):
33
35
  self.parent = parent
34
36
 
@@ -137,6 +139,9 @@ class Expr(abc.ABC):
137
139
  for attr, value in self._id_attrs():
138
140
  hasher.update(attr.encode('utf-8'))
139
141
  hasher.update(str(value).encode('utf-8'))
142
+ # Include the col_type of the expression to avoid expressions with identical str() representations
143
+ # but different types being considered the same expression, e.g. str(int(4)) == "4"
144
+ hasher.update(repr(self.col_type).encode('utf-8'))
140
145
  for expr in self.components:
141
146
  hasher.update(str(expr.id).encode('utf-8'))
142
147
  # truncate to machine's word size
@@ -183,13 +188,19 @@ class Expr(abc.ABC):
183
188
 
184
189
  def substitute(self, spec: dict[Expr, Expr]) -> Expr:
185
190
  """
186
- Replace 'old' with 'new' recursively.
191
+ Replace 'old' with 'new' recursively, and return a new version of the expression
192
+ This method must be used in the form: expr = expr.substitute(spec)
187
193
  """
194
+ from .literal import Literal
195
+
196
+ if isinstance(self, Literal):
197
+ return self
188
198
  for old, new in spec.items():
189
199
  if self.equals(old):
190
200
  return new.copy()
191
201
  for i in range(len(self.components)):
192
202
  self.components[i] = self.components[i].substitute(spec)
203
+ self = self.maybe_literal()
193
204
  self.id = self._create_id()
194
205
  return self
195
206
 
@@ -205,14 +216,18 @@ class Expr(abc.ABC):
205
216
  """
206
217
  from .column_ref import ColumnRef
207
218
  from .expr_set import ExprSet
219
+
208
220
  if resolve_cols is None:
209
221
  resolve_cols = set()
210
222
  result = self
211
223
  while True:
212
- target_col_refs = ExprSet([
213
- e for e in result.subexprs()
214
- if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
215
- ])
224
+ target_col_refs = ExprSet(
225
+ [
226
+ e
227
+ for e in result.subexprs()
228
+ if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
229
+ ]
230
+ )
216
231
  if len(target_col_refs) == 0:
217
232
  return result
218
233
  result = result.substitute({ref: ref.col.value_expr for ref in target_col_refs})
@@ -220,6 +235,7 @@ class Expr(abc.ABC):
220
235
  def is_bound_by(self, tbls: list[catalog.TableVersionPath]) -> bool:
221
236
  """Returns True if this expr can be evaluated in the context of tbls."""
222
237
  from .column_ref import ColumnRef
238
+
223
239
  col_refs = self.subexprs(ColumnRef)
224
240
  for col_ref in col_refs:
225
241
  if not any(tbl.has_column(col_ref.col) for tbl in tbls):
@@ -232,7 +248,7 @@ class Expr(abc.ABC):
232
248
  return self._retarget(tbl_versions)
233
249
 
234
250
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
235
- for i in range (len(self.components)):
251
+ for i in range(len(self.components)):
236
252
  self.components[i] = self.components[i]._retarget(tbl_versions)
237
253
  return self
238
254
 
@@ -264,13 +280,14 @@ class Expr(abc.ABC):
264
280
 
265
281
  @overload
266
282
  def subexprs(
267
- self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
268
- traverse_matches: bool = True
283
+ self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
269
284
  ) -> Iterator[T]: ...
270
285
 
271
286
  def subexprs(
272
- self, expr_class: Optional[type[T]] = None, filter: Optional[Callable[[Expr], bool]] = None,
273
- traverse_matches: bool = True
287
+ self,
288
+ expr_class: Optional[type[T]] = None,
289
+ filter: Optional[Callable[[Expr], bool]] = None,
290
+ traverse_matches: bool = True,
274
291
  ) -> Iterator[T]:
275
292
  """
276
293
  Iterate over all subexprs, including self.
@@ -288,20 +305,30 @@ class Expr(abc.ABC):
288
305
  @overload
289
306
  @classmethod
290
307
  def list_subexprs(
291
- cls, expr_list: Iterable[Expr], *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
308
+ cls,
309
+ expr_list: Iterable[Expr],
310
+ *,
311
+ filter: Optional[Callable[[Expr], bool]] = None,
312
+ traverse_matches: bool = True,
292
313
  ) -> Iterator[Expr]: ...
293
314
 
294
315
  @overload
295
316
  @classmethod
296
317
  def list_subexprs(
297
- cls, expr_list: Iterable[Expr], expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
298
- traverse_matches: bool = True
318
+ cls,
319
+ expr_list: Iterable[Expr],
320
+ expr_class: type[T],
321
+ filter: Optional[Callable[[Expr], bool]] = None,
322
+ traverse_matches: bool = True,
299
323
  ) -> Iterator[T]: ...
300
324
 
301
325
  @classmethod
302
326
  def list_subexprs(
303
- cls, expr_list: Iterable[Expr], expr_class: Optional[type[T]] = None,
304
- filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
327
+ cls,
328
+ expr_list: Iterable[Expr],
329
+ expr_class: Optional[type[T]] = None,
330
+ filter: Optional[Callable[[Expr], bool]] = None,
331
+ traverse_matches: bool = True,
305
332
  ) -> Iterator[T]:
306
333
  """Produce subexprs for all exprs in list. Can contain duplicates."""
307
334
  for e in expr_list:
@@ -322,6 +349,7 @@ class Expr(abc.ABC):
322
349
  """Returns table ids referenced by this expr."""
323
350
  from .column_ref import ColumnRef
324
351
  from .rowid_ref import RowidRef
352
+
325
353
  return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
326
354
 
327
355
  @classmethod
@@ -334,6 +362,7 @@ class Expr(abc.ABC):
334
362
  result: list[catalog.Column] = []
335
363
  assert '_classname' in expr_dict
336
364
  from .column_ref import ColumnRef
365
+
337
366
  if expr_dict['_classname'] == 'ColumnRef':
338
367
  result.append(ColumnRef.get_column(expr_dict))
339
368
  if 'components' in expr_dict:
@@ -341,66 +370,57 @@ class Expr(abc.ABC):
341
370
  result.extend(cls.get_refd_columns(component_dict))
342
371
  return result
343
372
 
344
- def is_constant(self) -> bool:
345
- """Returns True if this expr is a constant."""
346
- return all(comp.is_constant() for comp in self.components)
347
-
348
- def _as_constant(self) -> Any:
349
- return None
350
-
351
- def as_constant(self) -> Any:
373
+ def as_literal(self) -> Optional[Expr]:
352
374
  """
353
- If expression is a constant then return the associated value which will be converted to a Literal.
375
+ Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
354
376
  """
355
- if self.is_constant():
356
- return self._as_constant()
357
377
  return None
358
378
 
359
379
  @classmethod
360
380
  def from_array(cls, elements: Iterable) -> Optional[Expr]:
361
381
  from .inline_expr import InlineArray
382
+
362
383
  inline_array = InlineArray(elements)
363
- constant_array = inline_array.as_constant()
364
- if constant_array is not None:
365
- from .literal import Literal
366
- return Literal(constant_array, inline_array.col_type)
384
+ return inline_array.maybe_literal()
385
+
386
+ def maybe_literal(self: Expr) -> Expr:
387
+ """
388
+ Return a Literal if this expression can be evaluated to a constant value, otherwise return the expression.
389
+ """
390
+ lit_expr = self.as_literal()
391
+ if lit_expr is not None:
392
+ return lit_expr
367
393
  else:
368
- return inline_array
394
+ return self
369
395
 
370
396
  @classmethod
371
397
  def from_object(cls, o: object) -> Optional[Expr]:
372
398
  """
373
399
  Try to turn a literal object into an Expr.
374
400
  """
401
+ from .inline_expr import InlineDict, InlineList
402
+ from .literal import Literal
403
+
375
404
  # Try to create a literal. We need to check for InlineList/InlineDict
376
405
  # first, to prevent them from inappropriately being interpreted as JsonType
377
406
  # literals.
407
+ if isinstance(o, Literal):
408
+ return o
409
+
378
410
  if isinstance(o, (list, tuple, dict, Expr)):
379
- expr: Optional[Expr] = None
411
+ expr: Expr
380
412
  if isinstance(o, (list, tuple)):
381
- from .inline_expr import InlineList
382
413
  expr = InlineList(o)
383
414
  elif isinstance(o, dict):
384
- from .inline_expr import InlineDict
385
415
  expr = InlineDict(o)
386
- elif isinstance(o, Expr):
416
+ else:
387
417
  expr = o
388
- from .literal import Literal
389
- if isinstance(expr, Literal):
390
- return expr
391
- # Check if the expression is constant
392
- if expr is not None:
393
- expr_value = expr.as_constant()
394
- if expr_value is not None:
395
- from .literal import Literal
396
- return Literal(expr_value)
397
- else:
398
- return expr
418
+
419
+ return expr.maybe_literal()
399
420
  else:
400
421
  # convert scalar to a literal
401
422
  obj_type = ts.ColumnType.infer_literal_type(o)
402
423
  if obj_type is not None:
403
- from .literal import Literal
404
424
  return Literal(o, col_type=obj_type)
405
425
  return None
406
426
 
@@ -444,10 +464,7 @@ class Expr(abc.ABC):
444
464
  Turn Expr object into a dict that can be passed to json.dumps().
445
465
  Subclasses override _as_dict().
446
466
  """
447
- return {
448
- '_classname': self.__class__.__name__,
449
- **self._as_dict(),
450
- }
467
+ return {'_classname': self.__class__.__name__, **self._as_dict()}
451
468
 
452
469
  @classmethod
453
470
  def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
@@ -485,6 +502,7 @@ class Expr(abc.ABC):
485
502
 
486
503
  def isin(self, value_set: Any) -> 'exprs.InPredicate':
487
504
  from .in_predicate import InPredicate
505
+
488
506
  if isinstance(value_set, Expr):
489
507
  return InPredicate(self, value_set_expr=value_set)
490
508
  else:
@@ -492,6 +510,7 @@ class Expr(abc.ABC):
492
510
 
493
511
  def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
494
512
  from pixeltable.exprs import TypeCast
513
+
495
514
  # Interpret the type argument the same way we would if given in a schema
496
515
  col_type = ts.ColumnType.normalize_type(new_type, nullable_default=True, allow_builtin_types=False)
497
516
  if not self.col_type.nullable:
@@ -500,7 +519,9 @@ class Expr(abc.ABC):
500
519
  col_type = col_type.copy(nullable=False)
501
520
  return TypeCast(self, col_type)
502
521
 
503
- def apply(self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None) -> 'exprs.FunctionCall':
522
+ def apply(
523
+ self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None
524
+ ) -> 'exprs.FunctionCall':
504
525
  if col_type is not None:
505
526
  col_type = ts.ColumnType.normalize_type(col_type)
506
527
  function = self._make_applicator_function(fn, col_type)
@@ -509,10 +530,7 @@ class Expr(abc.ABC):
509
530
 
510
531
  def __dir__(self) -> list[str]:
511
532
  attrs = ['isin', 'astype', 'apply']
512
- attrs += [
513
- f.name
514
- for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)
515
- ]
533
+ attrs += [f.name for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)]
516
534
  return attrs
517
535
 
518
536
  def __call__(self, *args: Any, **kwargs: Any) -> Any:
@@ -521,9 +539,11 @@ class Expr(abc.ABC):
521
539
  def __getitem__(self, index: object) -> Expr:
522
540
  if self.col_type.is_json_type():
523
541
  from .json_path import JsonPath
542
+
524
543
  return JsonPath(self)[index]
525
544
  if self.col_type.is_array_type():
526
545
  from .array_slice import ArraySlice
546
+
527
547
  if not isinstance(index, tuple):
528
548
  index = (index,)
529
549
  if any(not isinstance(i, (int, slice)) for i in index):
@@ -537,6 +557,7 @@ class Expr(abc.ABC):
537
557
  """
538
558
  from .json_path import JsonPath
539
559
  from .method_ref import MethodRef
560
+
540
561
  if self.col_type.is_json_type():
541
562
  return JsonPath(self).__getattr__(name)
542
563
  else:
@@ -555,7 +576,8 @@ class Expr(abc.ABC):
555
576
 
556
577
  def __bool__(self) -> bool:
557
578
  raise TypeError(
558
- 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
579
+ 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
580
+ )
559
581
 
560
582
  def __lt__(self, other: object) -> 'exprs.Comparison':
561
583
  return self._make_comparison(ComparisonOperator.LT, other)
@@ -566,6 +588,7 @@ class Expr(abc.ABC):
566
588
  def __eq__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
567
589
  if other is None:
568
590
  from .is_null import IsNull
591
+
569
592
  return IsNull(self)
570
593
  return self._make_comparison(ComparisonOperator.EQ, other)
571
594
 
@@ -573,6 +596,7 @@ class Expr(abc.ABC):
573
596
  if other is None:
574
597
  from .compound_predicate import CompoundPredicate
575
598
  from .is_null import IsNull
599
+
576
600
  return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
577
601
  return self._make_comparison(ComparisonOperator.NE, other)
578
602
 
@@ -589,6 +613,7 @@ class Expr(abc.ABC):
589
613
  # TODO: check for compatibility
590
614
  from .comparison import Comparison
591
615
  from .literal import Literal
616
+
592
617
  if isinstance(other, Expr):
593
618
  return Comparison(op, self, other)
594
619
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
@@ -641,6 +666,7 @@ class Expr(abc.ABC):
641
666
  # TODO: check for compatibility
642
667
  from .arithmetic_expr import ArithmeticExpr
643
668
  from .literal import Literal
669
+
644
670
  if isinstance(other, Expr):
645
671
  return ArithmeticExpr(op, self, other)
646
672
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
@@ -655,6 +681,7 @@ class Expr(abc.ABC):
655
681
  # TODO: check for compatibility
656
682
  from .arithmetic_expr import ArithmeticExpr
657
683
  from .literal import Literal
684
+
658
685
  assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
659
686
  if isinstance(other, typing.get_args(LiteralPythonTypes)):
660
687
  return ArithmeticExpr(op, Literal(other), self)
@@ -666,6 +693,7 @@ class Expr(abc.ABC):
666
693
  if not other.col_type.is_bool_type():
667
694
  raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
668
695
  from .compound_predicate import CompoundPredicate
696
+
669
697
  return CompoundPredicate(LogicalOperator.AND, [self, other])
670
698
 
671
699
  def __or__(self, other: object) -> Expr:
@@ -674,14 +702,15 @@ class Expr(abc.ABC):
674
702
  if not other.col_type.is_bool_type():
675
703
  raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
676
704
  from .compound_predicate import CompoundPredicate
705
+
677
706
  return CompoundPredicate(LogicalOperator.OR, [self, other])
678
707
 
679
708
  def __invert__(self) -> Expr:
680
709
  from .compound_predicate import CompoundPredicate
710
+
681
711
  return CompoundPredicate(LogicalOperator.NOT, [self])
682
712
 
683
- def split_conjuncts(
684
- self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
713
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
685
714
  """
686
715
  Returns clauses of a conjunction that meet condition in the first element.
687
716
  The second element contains remaining clauses, rolled into a conjunction.
@@ -721,7 +750,8 @@ class Expr(abc.ABC):
721
750
  if fn_type is None:
722
751
  raise excs.Error(
723
752
  f'Column type of `{fn.__name__}` cannot be inferred. '
724
- f'Use `.apply({fn.__name__}, col_type=...)` to specify.')
753
+ f'Use `.apply({fn.__name__}, col_type=...)` to specify.'
754
+ )
725
755
 
726
756
  # TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
727
757
  # and all optional parameters take their default values. Should we provide a more
@@ -741,17 +771,15 @@ class Expr(abc.ABC):
741
771
  second_param = next(params_iter) if len(params) >= 2 else None
742
772
  # Check that fn has at least one positional parameter
743
773
  if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
744
- raise excs.Error(
745
- f'Function `{fn.__name__}` has no positional parameters.'
746
- )
774
+ raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
747
775
  # Check that fn has at most one required parameter, i.e., its second parameter
748
776
  # has no default and is not a varargs
749
- if len(params) >= 2 and \
750
- second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) and \
751
- second_param.default == inspect.Parameter.empty:
752
- raise excs.Error(
753
- f'Function `{fn.__name__}` has multiple required parameters.'
754
- )
777
+ if (
778
+ len(params) >= 2
779
+ and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
780
+ and second_param.default == inspect.Parameter.empty
781
+ ):
782
+ raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
755
783
  except ValueError:
756
784
  # inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
757
785
  # know of any way to get the signature of a builtin, nor to check for this in
@@ -765,7 +793,8 @@ class Expr(abc.ABC):
765
793
  # We also set the display_name explicitly, so that the `FunctionCall` gets the
766
794
  # name of `decorated_fn`, not the lambda.
767
795
  return func.make_function(
768
- decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__)
796
+ decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__
797
+ )
769
798
 
770
799
 
771
800
  # A dictionary of result types of various stdlib functions that are
@@ -1,9 +1,10 @@
1
- from typing import Generic, TypeVar, Optional, Iterator, Iterable
1
+ from typing import Generic, Iterable, Iterator, Optional, TypeVar
2
2
 
3
3
  T = TypeVar('T')
4
4
 
5
5
  from .expr import Expr
6
6
 
7
+
7
8
  class ExprDict(Generic[T]):
8
9
  """
9
10
  A dictionary that maps Expr instances to values of type T.
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, Iterable, Iterator, TypeVar, Generic
3
+ from typing import Generic, Iterable, Iterator, Optional, TypeVar
4
4
 
5
5
  from .expr import Expr
6
6
 
7
7
  T = TypeVar('T', bound='Expr')
8
8
 
9
+
9
10
  class ExprSet(Generic[T]):
10
11
  """
11
12
  A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
12
13
  """
14
+
13
15
  exprs: dict[int, T] # key: Expr.id
14
16
  exprs_by_idx: dict[int, T] # key: slot_idx
15
17
 
@@ -22,7 +22,6 @@ from .sql_element_cache import SqlElementCache
22
22
 
23
23
 
24
24
  class FunctionCall(Expr):
25
-
26
25
  fn: func.Function
27
26
  is_method_call: bool
28
27
  agg_init_args: dict[str, Any]
@@ -58,7 +57,7 @@ class FunctionCall(Expr):
58
57
  return_type: ts.ColumnType,
59
58
  order_by_clause: Optional[list[Any]] = None,
60
59
  group_by_clause: Optional[list[Any]] = None,
61
- is_method_call: bool = False
60
+ is_method_call: bool = False,
62
61
  ):
63
62
  if order_by_clause is None:
64
63
  order_by_clause = []
@@ -69,7 +68,7 @@ class FunctionCall(Expr):
69
68
 
70
69
  self.fn = fn
71
70
  self.is_method_call = is_method_call
72
- #self.normalize_args(fn.name, signature, bound_args)
71
+ # self.normalize_args(fn.name, signature, bound_args)
73
72
  self.resource_pool = fn.call_resource_pool(bound_args)
74
73
  signature = fn.signature
75
74
 
@@ -79,8 +78,10 @@ class FunctionCall(Expr):
79
78
  for arg_name, arg in bound_args.items():
80
79
  param = signature.parameters[arg_name]
81
80
  if (
82
- param.col_type is not None and not param.col_type.nullable
83
- and isinstance(arg, Expr) and arg.col_type.nullable
81
+ param.col_type is not None
82
+ and not param.col_type.nullable
83
+ and isinstance(arg, Expr)
84
+ and arg.col_type.nullable
84
85
  ):
85
86
  return_type = return_type.copy(nullable=True)
86
87
  break
@@ -96,7 +97,9 @@ class FunctionCall(Expr):
96
97
  self.agg_init_args = {
97
98
  arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
98
99
  }
99
- bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]}
100
+ bound_args = {
101
+ arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]
102
+ }
100
103
 
101
104
  # construct components, args, kwargs
102
105
  self.args = []
@@ -171,7 +174,8 @@ class FunctionCall(Expr):
171
174
  # (that's done in SQL)
172
175
  if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
173
176
  raise excs.Error(
174
- f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}')
177
+ f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
178
+ )
175
179
  # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
176
180
  self.order_by_start_idx = len(self.components)
177
181
  self.components.extend(order_by_clause)
@@ -257,7 +261,8 @@ class FunctionCall(Expr):
257
261
  ):
258
262
  raise excs.Error(
259
263
  f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
260
- f'{param.col_type}')
264
+ f'{param.col_type}'
265
+ )
261
266
 
262
267
  def _equals(self, other: FunctionCall) -> bool:
263
268
  if self.fn != other.fn:
@@ -282,7 +287,7 @@ class FunctionCall(Expr):
282
287
  ('kwargs', self.kwargs),
283
288
  ('group_by_start_idx', self.group_by_start_idx),
284
289
  ('group_by_stop_idx', self.group_by_stop_idx),
285
- ('order_by_start_idx', self.order_by_start_idx)
290
+ ('order_by_start_idx', self.order_by_start_idx),
286
291
  ]
287
292
 
288
293
  def __repr__(self) -> str:
@@ -298,13 +303,14 @@ class FunctionCall(Expr):
298
303
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
299
304
  def print_arg(arg: Any) -> str:
300
305
  return repr(arg) if isinstance(arg, str) else str(arg)
301
- arg_strs = [
302
- print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
303
- ]
304
- arg_strs.extend([
305
- f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
306
- for param_name, (idx, arg) in self.kwargs.items()
307
- ])
306
+
307
+ arg_strs = [print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]]
308
+ arg_strs.extend(
309
+ [
310
+ f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
311
+ for param_name, (idx, arg) in self.kwargs.items()
312
+ ]
313
+ )
308
314
  if len(self.order_by) > 0:
309
315
  assert isinstance(self.fn, func.AggregateFunction)
310
316
  if self.fn.requires_order_by:
@@ -322,18 +328,22 @@ class FunctionCall(Expr):
322
328
 
323
329
  @property
324
330
  def group_by(self) -> list[Expr]:
325
- return self.components[self.group_by_start_idx:self.group_by_stop_idx]
331
+ return self.components[self.group_by_start_idx : self.group_by_stop_idx]
326
332
 
327
333
  @property
328
334
  def order_by(self) -> list[Expr]:
329
- return self.components[self.order_by_start_idx:]
335
+ return self.components[self.order_by_start_idx :]
330
336
 
331
337
  @property
332
338
  def is_window_fn_call(self) -> bool:
333
- return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and (
334
- not self.fn.allows_std_agg
335
- or self.has_group_by()
336
- or (len(self.order_by) > 0 and not self.fn.requires_order_by)
339
+ return (
340
+ isinstance(self.fn, func.AggregateFunction)
341
+ and self.fn.allows_window
342
+ and (
343
+ not self.fn.allows_std_agg
344
+ or self.has_group_by()
345
+ or (len(self.order_by) > 0 and not self.fn.requires_order_by)
346
+ )
337
347
  )
338
348
 
339
349
  def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
@@ -435,7 +445,7 @@ class FunctionCall(Expr):
435
445
  Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
436
446
  data_rows
437
447
  """
438
- assert all(name in self._param_values for name in param_names)
448
+ assert all(name in self._param_values for name in param_names), f'{param_names}, {self._param_values.keys()}'
439
449
  result: list[dict[str, Any]] = []
440
450
  for row in data_rows:
441
451
  d: dict[str, Any] = {}
@@ -512,16 +522,12 @@ class FunctionCall(Expr):
512
522
  fn = func.Function.from_dict(d['fn'])
513
523
  assert not fn.is_polymorphic
514
524
  return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
515
- group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
516
- order_by_exprs = components[d['order_by_start_idx']:]
525
+ group_by_exprs = components[d['group_by_start_idx'] : d['group_by_stop_idx']]
526
+ order_by_exprs = components[d['order_by_start_idx'] :]
517
527
 
518
- args = [
519
- expr if idx is None else components[idx]
520
- for idx, expr in d['args']
521
- ]
528
+ args = [expr if idx is None else components[idx] for idx, expr in d['args']]
522
529
  kwargs = {
523
- param_name: (expr if idx is None else components[idx])
524
- for param_name, (idx, expr) in d['kwargs'].items()
530
+ param_name: (expr if idx is None else components[idx]) for param_name, (idx, expr) in d['kwargs'].items()
525
531
  }
526
532
 
527
533
  # `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
@@ -538,9 +544,7 @@ class FunctionCall(Expr):
538
544
  # for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
539
545
  # case where this is necessary to support existing conditional_return_type implementations). Once the general
540
546
  # pattern is implemented, we can remove this hack.
541
- unpacked_bound_args = {
542
- param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
543
- }
547
+ unpacked_bound_args = {param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()}
544
548
 
545
549
  # Evaluate the call_return_type as defined in the current codebase.
546
550
  call_return_type = fn.call_return_type([], unpacked_bound_args)
@@ -567,13 +571,7 @@ class FunctionCall(Expr):
567
571
  f'Return type as currently defined: `{call_return_type}`'
568
572
  )
569
573
 
570
- fn_call = cls(
571
- fn,
572
- bound_args,
573
- return_type,
574
- group_by_clause=group_by_exprs,
575
- order_by_clause=order_by_exprs
576
- )
574
+ fn_call = cls(fn, bound_args, return_type, group_by_clause=group_by_exprs, order_by_clause=order_by_exprs)
577
575
  return fn_call
578
576
 
579
577
  @classmethod
@@ -7,6 +7,7 @@ from typing import Union
7
7
  # Python types corresponding to our literal types
8
8
  LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
9
9
 
10
+
10
11
  def print_slice(s: slice) -> str:
11
12
  start_str = f'{str(s.start) if s.start is not None else ""}'
12
13
  stop_str = f'{str(s.stop) if s.stop is not None else ""}'