pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +370 -93
  5. pixeltable/catalog/column.py +6 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +14 -16
  8. pixeltable/catalog/insertable_table.py +6 -8
  9. pixeltable/catalog/path.py +14 -7
  10. pixeltable/catalog/table.py +72 -62
  11. pixeltable/catalog/table_version.py +137 -107
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +10 -14
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +108 -42
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +1 -2
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -18
  32. pixeltable/exprs/__init__.py +1 -1
  33. pixeltable/exprs/column_property_ref.py +1 -1
  34. pixeltable/exprs/column_ref.py +3 -3
  35. pixeltable/exprs/compound_predicate.py +1 -1
  36. pixeltable/exprs/data_row.py +17 -1
  37. pixeltable/exprs/expr.py +12 -12
  38. pixeltable/exprs/function_call.py +34 -2
  39. pixeltable/exprs/json_mapper.py +95 -48
  40. pixeltable/exprs/json_path.py +4 -9
  41. pixeltable/exprs/method_ref.py +2 -2
  42. pixeltable/exprs/object_ref.py +2 -2
  43. pixeltable/exprs/row_builder.py +33 -6
  44. pixeltable/exprs/similarity_expr.py +1 -1
  45. pixeltable/exprs/sql_element_cache.py +1 -1
  46. pixeltable/exprs/string_op.py +2 -2
  47. pixeltable/ext/__init__.py +1 -1
  48. pixeltable/ext/functions/__init__.py +1 -1
  49. pixeltable/ext/functions/whisperx.py +1 -1
  50. pixeltable/ext/functions/yolox.py +1 -1
  51. pixeltable/func/__init__.py +1 -1
  52. pixeltable/func/aggregate_function.py +2 -2
  53. pixeltable/func/callable_function.py +3 -6
  54. pixeltable/func/expr_template_function.py +24 -4
  55. pixeltable/func/function.py +7 -9
  56. pixeltable/func/function_registry.py +1 -1
  57. pixeltable/func/query_template_function.py +87 -4
  58. pixeltable/func/signature.py +1 -1
  59. pixeltable/func/tools.py +1 -1
  60. pixeltable/func/udf.py +2 -2
  61. pixeltable/functions/__init__.py +1 -1
  62. pixeltable/functions/anthropic.py +2 -2
  63. pixeltable/functions/audio.py +1 -1
  64. pixeltable/functions/deepseek.py +1 -1
  65. pixeltable/functions/fireworks.py +1 -1
  66. pixeltable/functions/globals.py +6 -6
  67. pixeltable/functions/huggingface.py +1 -1
  68. pixeltable/functions/image.py +1 -1
  69. pixeltable/functions/json.py +1 -1
  70. pixeltable/functions/llama_cpp.py +1 -1
  71. pixeltable/functions/math.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +1 -1
  74. pixeltable/functions/openai.py +2 -2
  75. pixeltable/functions/replicate.py +1 -1
  76. pixeltable/functions/string.py +1 -1
  77. pixeltable/functions/timestamp.py +1 -1
  78. pixeltable/functions/together.py +1 -1
  79. pixeltable/functions/util.py +1 -1
  80. pixeltable/functions/video.py +2 -2
  81. pixeltable/functions/vision.py +2 -2
  82. pixeltable/globals.py +7 -2
  83. pixeltable/index/embedding_index.py +12 -1
  84. pixeltable/io/__init__.py +5 -3
  85. pixeltable/io/fiftyone.py +6 -7
  86. pixeltable/io/label_studio.py +21 -20
  87. pixeltable/io/pandas.py +6 -5
  88. pixeltable/iterators/__init__.py +1 -1
  89. pixeltable/metadata/__init__.py +6 -4
  90. pixeltable/metadata/converters/convert_24.py +3 -3
  91. pixeltable/metadata/converters/convert_25.py +1 -1
  92. pixeltable/metadata/converters/convert_29.py +1 -1
  93. pixeltable/metadata/converters/convert_31.py +11 -0
  94. pixeltable/metadata/converters/convert_32.py +15 -0
  95. pixeltable/metadata/converters/convert_33.py +17 -0
  96. pixeltable/metadata/notes.py +3 -0
  97. pixeltable/metadata/schema.py +26 -1
  98. pixeltable/plan.py +2 -3
  99. pixeltable/share/packager.py +8 -24
  100. pixeltable/share/publish.py +20 -9
  101. pixeltable/store.py +9 -6
  102. pixeltable/type_system.py +19 -7
  103. pixeltable/utils/console_output.py +3 -2
  104. pixeltable/utils/coroutine.py +3 -3
  105. pixeltable/utils/dbms.py +66 -0
  106. pixeltable/utils/documents.py +61 -67
  107. pixeltable/utils/exception_handler.py +59 -0
  108. pixeltable/utils/filecache.py +1 -1
  109. pixeltable/utils/http_server.py +3 -2
  110. pixeltable/utils/pytorch.py +1 -1
  111. pixeltable/utils/sql.py +1 -1
  112. pixeltable-0.3.12.dist-info/METADATA +436 -0
  113. pixeltable-0.3.12.dist-info/RECORD +183 -0
  114. pixeltable/catalog/path_dict.py +0 -169
  115. pixeltable-0.3.10.dist-info/METADATA +0 -382
  116. pixeltable-0.3.10.dist-info/RECORD +0 -179
  117. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
  118. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
  119. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
@@ -20,16 +20,84 @@ class JsonMapper(Expr):
20
20
  JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
21
21
  The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
22
22
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
23
+
24
+ JsonMapper is executed in two phases:
25
+ - the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
26
+ list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
27
+ slot of JsonMapperDispatch)
28
+ - JsonMapper.eval() collects the slot values of the target expr into its result list
23
29
  """
24
30
 
25
31
  target_expr_scope: ExprScope
26
32
  parent_mapper: Optional[JsonMapper]
27
33
  target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
28
34
 
29
- def __init__(self, src_expr: Expr, target_expr: Expr):
35
+ def __init__(self, src_expr: Optional[Expr], target_expr: Optional[Expr]):
30
36
  # TODO: type spec should be list[target_expr.col_type]
31
37
  super().__init__(ts.JsonType())
32
38
 
39
+ dispatch = JsonMapperDispatch(src_expr, target_expr)
40
+ self.components.append(dispatch)
41
+ self.id = self._create_id()
42
+
43
+ def __repr__(self) -> str:
44
+ return f'map({self._src_expr}, lambda R: {self._target_expr})'
45
+
46
+ @property
47
+ def _src_expr(self) -> Expr:
48
+ return self.components[0].src_expr
49
+
50
+ @property
51
+ def _target_expr(self) -> Expr:
52
+ return self.components[0].target_expr
53
+
54
+ def _equals(self, _: JsonMapper) -> bool:
55
+ return True
56
+
57
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
58
+ return None
59
+
60
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
61
+ from ..exec.expr_eval.evaluators import NestedRowList
62
+
63
+ dispatch_slot_idx = self.components[0].slot_idx
64
+ nested_rows = data_row.vals[dispatch_slot_idx]
65
+ if nested_rows is None:
66
+ data_row[self.slot_idx] = None
67
+ return
68
+ assert isinstance(nested_rows, NestedRowList)
69
+ # TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
70
+ data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
71
+
72
+ def _as_dict(self) -> dict:
73
+ """
74
+ We only serialize src and target exprs, everything else is re-created at runtime.
75
+ """
76
+ return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
77
+
78
+ @classmethod
79
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
80
+ assert len(components) == 2
81
+ src_expr, target_expr = components[0], components[1]
82
+ return cls(src_expr, target_expr)
83
+
84
+
85
+ class JsonMapperDispatch(Expr):
86
+ """
87
+ An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
88
+ its input DataRows. It has the same dependencies as the originating JsonMapper.
89
+
90
+ - The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
91
+ - It stores a NestedRowList instance in its slot.
92
+ """
93
+
94
+ target_expr_scope: ExprScope
95
+ parent_mapper: Optional[JsonMapperDispatch]
96
+ target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
97
+
98
+ def __init__(self, src_expr: Expr, target_expr: Expr):
99
+ super().__init__(ts.InvalidType())
100
+
33
101
  # we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
34
102
  # this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
35
103
  self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
@@ -40,28 +108,36 @@ class JsonMapper(Expr):
40
108
  self.parent_mapper = None
41
109
  self.target_expr_eval_ctx = None
42
110
 
43
- # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMappers will
44
- # be recognized as equal so long as they have the same src_expr and target_expr.
111
+ # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
112
+ # instances will be recognized as equal so long as they have the same src_expr and target_expr.
45
113
  # TODO: Might this cause problems after certain substitutions?
46
114
  self.id = self._create_id()
47
115
 
48
116
  scope_anchor = ObjectRef(self.target_expr_scope, self)
49
117
  self.components.append(scope_anchor)
50
118
 
51
- def _bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
- self._src_expr._bind_rel_paths(mapper)
53
- self._target_expr._bind_rel_paths(self)
119
+ def _bind_rel_paths(self, mapper: Optional[JsonMapperDispatch] = None) -> None:
120
+ self.src_expr._bind_rel_paths(mapper)
121
+ self.target_expr._bind_rel_paths(self)
54
122
  self.parent_mapper = mapper
55
123
  parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
56
124
  self.target_expr_scope.parent = parent_scope
57
125
 
126
+ def equals(self, other: Expr) -> bool:
127
+ """
128
+ We override equals() because we need to avoid comparing our scope anchor.
129
+ """
130
+ if type(self) is not type(other):
131
+ return False
132
+ return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
133
+
58
134
  def scope(self) -> ExprScope:
59
135
  # need to ignore target_expr
60
- return self._src_expr.scope()
136
+ return self.src_expr.scope()
61
137
 
62
138
  def dependencies(self) -> list[Expr]:
63
- result = [self._src_expr]
64
- result.extend(self._target_dependencies(self._target_expr))
139
+ result = [self.src_expr]
140
+ result.extend(self._target_dependencies(self.target_expr))
65
141
  return result
66
142
 
67
143
  def _target_dependencies(self, e: Expr) -> list[Expr]:
@@ -77,23 +153,12 @@ class JsonMapper(Expr):
77
153
  result.extend(self._target_dependencies(c))
78
154
  return result
79
155
 
80
- def equals(self, other: Expr) -> bool:
81
- """
82
- We override equals() because we need to avoid comparing our scope anchor.
83
- """
84
- if type(self) is not type(other):
85
- return False
86
- return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
87
-
88
- def __repr__(self) -> str:
89
- return f'map({self._src_expr}, lambda R: {self._target_expr})'
90
-
91
156
  @property
92
- def _src_expr(self) -> Expr:
157
+ def src_expr(self) -> Expr:
93
158
  return self.components[0]
94
159
 
95
160
  @property
96
- def _target_expr(self) -> Expr:
161
+ def target_expr(self) -> Expr:
97
162
  return self.components[1]
98
163
 
99
164
  @property
@@ -104,37 +169,19 @@ class JsonMapper(Expr):
104
169
  assert isinstance(result, ObjectRef)
105
170
  return result
106
171
 
107
- def _equals(self, _: JsonMapper) -> bool:
108
- return True
109
-
110
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
111
- return None
172
+ def __repr__(self) -> str:
173
+ return 'JsonMapperDispatch()'
112
174
 
113
175
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
114
- # this will be called, but the value has already been materialized elsewhere
115
- src = data_row[self._src_expr.slot_idx]
116
- if not isinstance(src, list):
117
- # invalid/non-list src path
118
- data_row[self.slot_idx] = None
119
- return
120
-
121
- result = [None] * len(src)
122
- if self.target_expr_eval_ctx is None:
123
- self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
124
- for i, val in enumerate(src):
125
- data_row[self.scope_anchor.slot_idx] = val
126
- # stored target_expr
127
- row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
128
- result[i] = data_row[self._target_expr.slot_idx]
129
- data_row[self.slot_idx] = result
176
+ # eval is handled by JsonMapperDispatcher
177
+ raise AssertionError('this should never be called')
130
178
 
131
179
  def _as_dict(self) -> dict:
132
180
  """
133
- We need to avoid serializing component[2], which is an ObjectRef.
181
+ JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
134
182
  """
135
- return {'components': [c.as_dict() for c in self.components[0:2]]}
183
+ raise AssertionError('this should never be called')
136
184
 
137
185
  @classmethod
138
- def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
139
- assert len(components) == 2
140
- return cls(components[0], components[1])
186
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
187
+ raise AssertionError('this should never be called')
@@ -5,13 +5,12 @@ from typing import Any, Optional, Union
5
5
  import jmespath
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable as pxt
9
8
  from pixeltable import catalog, exceptions as excs, type_system as ts
10
9
 
11
10
  from .data_row import DataRow
12
11
  from .expr import Expr
13
12
  from .globals import print_slice
14
- from .json_mapper import JsonMapper
13
+ from .json_mapper import JsonMapperDispatch
15
14
  from .object_ref import ObjectRef
16
15
  from .row_builder import RowBuilder
17
16
  from .sql_element_cache import SqlElementCache
@@ -19,10 +18,7 @@ from .sql_element_cache import SqlElementCache
19
18
 
20
19
  class JsonPath(Expr):
21
20
  def __init__(
22
- self,
23
- anchor: Optional['pxt.exprs.Expr'],
24
- path_elements: Optional[list[Union[str, int, slice]]] = None,
25
- scope_idx: int = 0,
21
+ self, anchor: Optional[Expr], path_elements: Optional[list[Union[str, int, slice]]] = None, scope_idx: int = 0
26
22
  ) -> None:
27
23
  """
28
24
  anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
@@ -80,11 +76,10 @@ class JsonPath(Expr):
80
76
  def is_relative_path(self) -> bool:
81
77
  return self._anchor is None
82
78
 
83
- @property
84
79
  def _has_relative_path(self) -> bool:
85
- return self.is_relative_path() or super()._has_relative_path
80
+ return self.is_relative_path() or super()._has_relative_path()
86
81
 
87
- def _bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
82
+ def _bind_rel_paths(self, mapper: Optional['JsonMapperDispatch'] = None) -> None:
88
83
  if self.is_relative_path():
89
84
  # TODO: take scope_idx into account
90
85
  self.set_anchor(mapper.scope_anchor)
@@ -23,7 +23,7 @@ class MethodRef(Expr):
23
23
  # TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
24
24
  # converted to a `FunctionCall` by binding any remaining parameters).
25
25
 
26
- def __init__(self, base_expr: Expr, method_name: str):
26
+ def __init__(self, base_expr: Expr, method_name: str) -> None:
27
27
  super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
28
28
  self.base_expr = base_expr
29
29
  self.method_name = method_name
@@ -43,7 +43,7 @@ class MethodRef(Expr):
43
43
  assert len(components) == 1
44
44
  return cls(components[0], d['method_name'])
45
45
 
46
- def __call__(self, *args, **kwargs) -> FunctionCall:
46
+ def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
47
47
  result = self.fn(*[self.base_expr, *args], **kwargs)
48
48
  assert isinstance(result, FunctionCall)
49
49
  result.is_method_call = True
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
8
8
 
9
9
  from .data_row import DataRow
10
10
  from .expr import Expr, ExprScope
11
- from .json_mapper import JsonMapper
11
+ from .json_mapper import JsonMapperDispatch
12
12
  from .row_builder import RowBuilder
13
13
  from .sql_element_cache import SqlElementCache
14
14
 
@@ -19,7 +19,7 @@ class ObjectRef(Expr):
19
19
  The object is generated/materialized elsewhere and establishes a new scope.
20
20
  """
21
21
 
22
- def __init__(self, scope: ExprScope, owner: JsonMapper):
22
+ def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
23
23
  # TODO: do we need an Unknown type after all?
24
24
  super().__init__(ts.JsonType()) # JsonType: this could be anything
25
25
  self._scope = scope
@@ -77,6 +77,8 @@ class RowBuilder:
77
77
  transitive_dependents: np.ndarray # of bool
78
78
  # dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
79
79
  dependencies: np.ndarray # of bool
80
+ # num_dependencies[i] = number of direct dependencies of expr with slot idx i
81
+ num_dependencies: np.ndarray # of int
80
82
 
81
83
  # records the output_expr that a subexpr belongs to
82
84
  # (a subexpr can be shared across multiple output exprs)
@@ -209,6 +211,7 @@ class RowBuilder:
209
211
  exc_dependencies[expr.slot_idx].add(d.slot_idx)
210
212
  exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
211
213
 
214
+ self.num_dependencies = np.sum(self.dependencies, axis=1)
212
215
  self.dependents = self.dependencies.T
213
216
  self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
214
217
  for i in reversed(range(self.num_materialized)):
@@ -275,8 +278,14 @@ class RowBuilder:
275
278
  for d in e.dependencies():
276
279
  self._record_output_expr_id(d, output_expr_id)
277
280
 
278
- def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
279
- """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
281
+ def _compute_dependencies(
282
+ self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: Optional[ExprScope] = None
283
+ ) -> list[int]:
284
+ """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
285
+
286
+ If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
287
+ immediate dependents that aren't in target_scope, but doesn't resolve those).
288
+ """
280
289
  dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
281
290
  # doing this front-to-back ensures that we capture transitive dependencies
282
291
  max_target_slot_idx = max(target_slot_idxs)
@@ -289,6 +298,9 @@ class RowBuilder:
289
298
  if expr.slot_idx in self.input_expr_slot_idxs:
290
299
  # this is input and therefore doesn't depend on other exprs
291
300
  continue
301
+ if target_scope is not None and expr.scope() != target_scope:
302
+ # don't resolve dependencies outside of target_scope
303
+ continue
292
304
  for d in expr.dependencies():
293
305
  assert d.slot_idx is not None, f'{expr}, {d}'
294
306
  if d.slot_idx in excluded_slot_idxs:
@@ -320,10 +332,15 @@ class RowBuilder:
320
332
  for c in e.components:
321
333
  self.__set_slot_idxs_aux(c)
322
334
 
323
- def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
335
+ def get_dependencies(
336
+ self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
337
+ ) -> list[Expr]:
324
338
  """
325
339
  Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
326
340
  The exprs given in 'exclude' are excluded.
341
+ If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
342
+ dependencies from enclosing scopes.
343
+
327
344
  Returns:
328
345
  list of Exprs from unique_exprs (= with slot_idx set)
329
346
  """
@@ -334,23 +351,33 @@ class RowBuilder:
334
351
  return []
335
352
  # make sure we only refer to recorded exprs
336
353
  targets = [self.unique_exprs[e] for e in targets]
354
+ target_scope: Optional[ExprScope] = None
355
+ if limit_scope:
356
+ # make sure all targets are from the same scope
357
+ target_scopes = {e.scope() for e in targets}
358
+ assert len(target_scopes) == 1
359
+ target_scope = target_scopes.pop()
337
360
  exclude = [self.unique_exprs[e] for e in exclude]
338
361
  target_slot_idxs = [e.slot_idx for e in targets]
339
362
  excluded_slot_idxs = [e.slot_idx for e in exclude]
340
- all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
363
+ all_dependencies = set(
364
+ self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
365
+ )
341
366
  all_dependencies.update(target_slot_idxs)
342
367
  result_ids = list(all_dependencies)
343
368
  result_ids.sort()
344
369
  return [self.unique_exprs[id] for id in result_ids]
345
370
 
346
- def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
371
+ def create_eval_ctx(
372
+ self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
373
+ ) -> EvalCtx:
347
374
  """Return EvalCtx for targets"""
348
375
  targets = list(targets)
349
376
  if exclude is None:
350
377
  exclude = []
351
378
  if len(targets) == 0:
352
379
  return self.EvalCtx([], [], [], [])
353
- dependencies = self.get_dependencies(targets, exclude)
380
+ dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
354
381
  targets = [self.unique_exprs[e] for e in targets]
355
382
  target_slot_idxs = [e.slot_idx for e in targets]
356
383
  ctx_slot_idxs = [e.slot_idx for e in dependencies]
@@ -47,7 +47,7 @@ class SimilarityExpr(Expr):
47
47
  def __repr__(self) -> str:
48
48
  return f'{self.components[0]}.similarity({self.components[1]})'
49
49
 
50
- def _id_attrs(self):
50
+ def _id_attrs(self) -> list[tuple[str, Any]]:
51
51
  return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
52
52
 
53
53
  def default_column_name(self) -> str:
@@ -17,7 +17,7 @@ class SqlElementCache:
17
17
  for e, el in elements.items():
18
18
  self.cache[e.id] = el
19
19
 
20
- def extend(self, elements: ExprDict[sql.ColumnElement]):
20
+ def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
21
21
  for e, el in elements.items():
22
22
  self.cache[e.id] = el
23
23
 
@@ -26,7 +26,7 @@ class StringOp(Expr):
26
26
  self.operator = operator
27
27
  self.components = [op1, op2]
28
28
  assert op1.col_type.is_string_type()
29
- if operator in {StringOperator.CONCAT, StringOperator.REPEAT}:
29
+ if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
30
30
  if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
31
31
  raise excs.Error(
32
32
  f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
@@ -89,7 +89,7 @@ class StringOp(Expr):
89
89
  """
90
90
  Return the result of evaluating the expression on two int/float operands
91
91
  """
92
- assert self.operator in {StringOperator.CONCAT, StringOperator.REPEAT}
92
+ assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
93
93
  if self.operator == StringOperator.CONCAT:
94
94
  assert isinstance(op2_val, str)
95
95
  return op1_val + op2_val
@@ -13,5 +13,5 @@ from . import functions
13
13
  __all__ = local_public_names(__name__)
14
14
 
15
15
 
16
- def __dir__():
16
+ def __dir__() -> list[str]:
17
17
  return __all__
@@ -7,5 +7,5 @@ from . import whisperx, yolox
7
7
  __all__ = local_public_names(__name__)
8
8
 
9
9
 
10
- def __dir__():
10
+ def __dir__() -> list[str]:
11
11
  return __all__
@@ -73,5 +73,5 @@ _model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
73
73
  __all__ = local_public_names(__name__)
74
74
 
75
75
 
76
- def __dir__():
76
+ def __dir__() -> list[str]:
77
77
  return __all__
@@ -110,5 +110,5 @@ _processor_cache: dict[str, 'YoloxProcessor'] = {}
110
110
  __all__ = local_public_names(__name__)
111
111
 
112
112
 
113
- def __dir__():
113
+ def __dir__() -> list[str]:
114
114
  return __all__
@@ -5,7 +5,7 @@ from .callable_function import CallableFunction
5
5
  from .expr_template_function import ExprTemplateFunction
6
6
  from .function import Function, InvalidFunction
7
7
  from .function_registry import FunctionRegistry
8
- from .query_template_function import QueryTemplateFunction, query
8
+ from .query_template_function import QueryTemplateFunction, query, retrieval_udf
9
9
  from .signature import Batch, Parameter, Signature
10
10
  from .tools import Tool, ToolChoice, Tools
11
11
  from .udf import expr_udf, make_function, udf
@@ -159,7 +159,7 @@ class AggregateFunction(Function):
159
159
  self.init_param_names.append(init_param_names)
160
160
  return self
161
161
 
162
- def _docstring(self) -> Optional[str]:
162
+ def comment(self) -> Optional[str]:
163
163
  return inspect.getdoc(self.agg_classes[0])
164
164
 
165
165
  def help_str(self) -> str:
@@ -252,7 +252,7 @@ def uda(
252
252
  ) -> Callable[[type[Aggregator]], AggregateFunction]: ...
253
253
 
254
254
 
255
- def uda(*args, **kwargs):
255
+ def uda(*args, **kwargs): # type: ignore[no-untyped-def]
256
256
  """Decorator for user-defined aggregate functions.
257
257
 
258
258
  The decorated class must inherit from Aggregator and implement the following methods:
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import asyncio
4
3
  import inspect
5
4
  from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
6
5
  from uuid import UUID
@@ -61,7 +60,7 @@ class CallableFunction(Function):
61
60
  def is_async(self) -> bool:
62
61
  return inspect.iscoroutinefunction(self.py_fn)
63
62
 
64
- def _docstring(self) -> Optional[str]:
63
+ def comment(self) -> Optional[str]:
65
64
  return inspect.getdoc(self.py_fns[0])
66
65
 
67
66
  @property
@@ -127,12 +126,10 @@ class CallableFunction(Function):
127
126
  """
128
127
  assert self.is_batched
129
128
  assert not self.is_polymorphic
129
+ assert not self.is_async
130
130
  # Unpack the constant parameters
131
131
  constant_kwargs, batched_kwargs = self.create_batch_kwargs(kwargs)
132
- if inspect.iscoroutinefunction(self.py_fn):
133
- return asyncio.run(self.py_fn(*args, **constant_kwargs, **batched_kwargs))
134
- else:
135
- return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
132
+ return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
136
133
 
137
134
  def create_batch_kwargs(self, kwargs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, list[Any]]]:
138
135
  """Converts kwargs containing lists into constant and batched kwargs in the format expected by a batched udf."""
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional, Sequence
2
2
 
3
- from pixeltable import exceptions as excs, exprs
3
+ from pixeltable import exceptions as excs, exprs, type_system as ts
4
4
 
5
5
  from .function import Function
6
6
  from .signature import Signature
@@ -76,12 +76,28 @@ class ExprTemplateFunction(Function):
76
76
  arg_expr = arg
77
77
  arg_exprs[param_expr] = arg_expr
78
78
  result = result.substitute(arg_exprs)
79
- assert not result._contains(exprs.Variable)
80
79
  return result
81
80
 
82
- def _docstring(self) -> Optional[str]:
81
+ def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
82
+ """
83
+ The call_return_type of an ExprTemplateFunction is derived from the template expression's col_type after
84
+ substitution (unlike for UDFs, whose call_return_type is derived from an explicitly specified
85
+ conditional_return_type).
86
+ """
87
+ assert not self.is_polymorphic
88
+ template = self.template
89
+ with_defaults = bound_args.copy()
90
+ with_defaults.update(
91
+ {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
92
+ )
93
+ substituted_expr = self.template.expr.copy().substitute(
94
+ {template.param_exprs[name]: expr for name, expr in with_defaults.items()}
95
+ )
96
+ return substituted_expr.col_type
97
+
98
+ def comment(self) -> Optional[str]:
83
99
  if isinstance(self.templates[0].expr, exprs.FunctionCall):
84
- return self.templates[0].expr.fn._docstring()
100
+ return self.templates[0].expr.fn.comment()
85
101
  return None
86
102
 
87
103
  def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
@@ -97,6 +113,10 @@ class ExprTemplateFunction(Function):
97
113
 
98
114
  @property
99
115
  def display_name(self) -> str:
116
+ if not self.self_name and isinstance(self.templates[0].expr, exprs.FunctionCall):
117
+ # In the common case where the templated expression is itself a FunctionCall,
118
+ # fall back on the display name of the underlying FunctionCall
119
+ return self.templates[0].expr.fn.display_name
100
120
  return self.self_name
101
121
 
102
122
  @property
@@ -10,9 +10,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
10
10
  import sqlalchemy as sql
11
11
  from typing_extensions import Self
12
12
 
13
- import pixeltable as pxt
14
- import pixeltable.exceptions as excs
15
- import pixeltable.type_system as ts
13
+ from pixeltable import exceptions as excs, type_system as ts
16
14
 
17
15
  from .globals import resolve_symbol
18
16
  from .signature import Signature
@@ -107,11 +105,11 @@ class Function(ABC):
107
105
  @abstractmethod
108
106
  def is_async(self) -> bool: ...
109
107
 
110
- def _docstring(self) -> Optional[str]:
108
+ def comment(self) -> Optional[str]:
111
109
  return None
112
110
 
113
111
  def help_str(self) -> str:
114
- docstring = self._docstring()
112
+ docstring = self.comment()
115
113
  display = self.display_name + str(self.signatures[0])
116
114
  if docstring is None:
117
115
  return display
@@ -155,7 +153,7 @@ class Function(ABC):
155
153
  """
156
154
  raise NotImplementedError()
157
155
 
158
- def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
156
+ def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
159
157
  from pixeltable import exprs
160
158
 
161
159
  args = [exprs.Expr.from_object(arg) for arg in args]
@@ -246,7 +244,7 @@ class Function(ABC):
246
244
  # `None` when any of its non-nullable inputs are `None`.
247
245
  for arg_name, arg in bound_args.items():
248
246
  param = self.signature.parameters[arg_name]
249
- if param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
247
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
250
248
  continue
251
249
  if arg.col_type.nullable and not param.col_type.nullable:
252
250
  return_type = return_type.copy(nullable=True)
@@ -385,10 +383,10 @@ class Function(ABC):
385
383
  else:
386
384
  var = exprs.Variable(name, param.col_type)
387
385
  bindings[name] = var
388
- if args_ok and param.kind in {
386
+ if args_ok and param.kind in (
389
387
  inspect.Parameter.POSITIONAL_ONLY,
390
388
  inspect.Parameter.POSITIONAL_OR_KEYWORD,
391
- }:
389
+ ):
392
390
  template_args.append(var)
393
391
  else:
394
392
  template_kwargs[name] = var
@@ -31,7 +31,7 @@ class FunctionRegistry:
31
31
  cls._instance = FunctionRegistry()
32
32
  return cls._instance
33
33
 
34
- def __init__(self):
34
+ def __init__(self) -> None:
35
35
  self.stored_fns_by_id: dict[UUID, Function] = {}
36
36
  self.module_fns: dict[str, Function] = {} # fqn -> Function
37
37
  self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}