pixeltable 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (106) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +63 -36
  5. pixeltable/catalog/column.py +6 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +12 -14
  8. pixeltable/catalog/insertable_table.py +4 -7
  9. pixeltable/catalog/path.py +2 -2
  10. pixeltable/catalog/table.py +64 -56
  11. pixeltable/catalog/table_version.py +42 -40
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +8 -7
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +108 -42
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +1 -2
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -17
  32. pixeltable/exprs/__init__.py +1 -1
  33. pixeltable/exprs/column_property_ref.py +1 -1
  34. pixeltable/exprs/column_ref.py +3 -3
  35. pixeltable/exprs/compound_predicate.py +1 -1
  36. pixeltable/exprs/data_row.py +17 -1
  37. pixeltable/exprs/expr.py +12 -12
  38. pixeltable/exprs/function_call.py +34 -2
  39. pixeltable/exprs/json_mapper.py +95 -48
  40. pixeltable/exprs/json_path.py +3 -4
  41. pixeltable/exprs/method_ref.py +2 -2
  42. pixeltable/exprs/object_ref.py +2 -2
  43. pixeltable/exprs/row_builder.py +33 -6
  44. pixeltable/exprs/similarity_expr.py +1 -1
  45. pixeltable/exprs/sql_element_cache.py +1 -1
  46. pixeltable/exprs/string_op.py +2 -2
  47. pixeltable/ext/__init__.py +1 -1
  48. pixeltable/ext/functions/__init__.py +1 -1
  49. pixeltable/ext/functions/whisperx.py +1 -1
  50. pixeltable/ext/functions/yolox.py +1 -1
  51. pixeltable/func/aggregate_function.py +1 -1
  52. pixeltable/func/callable_function.py +2 -5
  53. pixeltable/func/expr_template_function.py +22 -2
  54. pixeltable/func/function.py +4 -5
  55. pixeltable/func/function_registry.py +1 -1
  56. pixeltable/func/signature.py +1 -1
  57. pixeltable/func/udf.py +2 -2
  58. pixeltable/functions/__init__.py +1 -1
  59. pixeltable/functions/anthropic.py +2 -2
  60. pixeltable/functions/audio.py +1 -1
  61. pixeltable/functions/deepseek.py +1 -1
  62. pixeltable/functions/fireworks.py +1 -1
  63. pixeltable/functions/globals.py +6 -6
  64. pixeltable/functions/huggingface.py +1 -1
  65. pixeltable/functions/image.py +1 -1
  66. pixeltable/functions/json.py +1 -1
  67. pixeltable/functions/llama_cpp.py +1 -1
  68. pixeltable/functions/math.py +1 -1
  69. pixeltable/functions/mistralai.py +1 -1
  70. pixeltable/functions/ollama.py +1 -1
  71. pixeltable/functions/openai.py +2 -2
  72. pixeltable/functions/replicate.py +1 -1
  73. pixeltable/functions/string.py +1 -1
  74. pixeltable/functions/timestamp.py +1 -1
  75. pixeltable/functions/together.py +1 -1
  76. pixeltable/functions/util.py +1 -1
  77. pixeltable/functions/video.py +2 -2
  78. pixeltable/functions/vision.py +2 -2
  79. pixeltable/index/embedding_index.py +12 -1
  80. pixeltable/io/__init__.py +5 -3
  81. pixeltable/io/fiftyone.py +6 -7
  82. pixeltable/io/label_studio.py +21 -20
  83. pixeltable/io/pandas.py +6 -5
  84. pixeltable/iterators/__init__.py +1 -1
  85. pixeltable/metadata/__init__.py +5 -3
  86. pixeltable/metadata/converters/convert_24.py +3 -3
  87. pixeltable/metadata/converters/convert_25.py +1 -1
  88. pixeltable/metadata/converters/convert_29.py +1 -1
  89. pixeltable/store.py +2 -2
  90. pixeltable/type_system.py +19 -7
  91. pixeltable/utils/console_output.py +3 -2
  92. pixeltable/utils/coroutine.py +3 -3
  93. pixeltable/utils/dbms.py +66 -0
  94. pixeltable/utils/documents.py +61 -67
  95. pixeltable/utils/filecache.py +1 -1
  96. pixeltable/utils/http_server.py +3 -2
  97. pixeltable/utils/pytorch.py +1 -1
  98. pixeltable/utils/sql.py +1 -1
  99. pixeltable-0.3.11.dist-info/METADATA +436 -0
  100. pixeltable-0.3.11.dist-info/RECORD +179 -0
  101. pixeltable/catalog/path_dict.py +0 -169
  102. pixeltable-0.3.10.dist-info/METADATA +0 -382
  103. pixeltable-0.3.10.dist-info/RECORD +0 -179
  104. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
  105. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +0 -0
  106. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
@@ -20,16 +20,84 @@ class JsonMapper(Expr):
20
20
  JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
21
21
  The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
22
22
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
23
+
24
+ JsonMapper is executed in two phases:
25
+ - the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
26
+ list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
27
+ slot of JsonMapperDispatch)
28
+ - JsonMapper.eval() collects the slot values of the target expr into its result list
23
29
  """
24
30
 
25
31
  target_expr_scope: ExprScope
26
32
  parent_mapper: Optional[JsonMapper]
27
33
  target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
28
34
 
29
- def __init__(self, src_expr: Expr, target_expr: Expr):
35
+ def __init__(self, src_expr: Optional[Expr], target_expr: Optional[Expr]):
30
36
  # TODO: type spec should be list[target_expr.col_type]
31
37
  super().__init__(ts.JsonType())
32
38
 
39
+ dispatch = JsonMapperDispatch(src_expr, target_expr)
40
+ self.components.append(dispatch)
41
+ self.id = self._create_id()
42
+
43
+ def __repr__(self) -> str:
44
+ return f'map({self._src_expr}, lambda R: {self._target_expr})'
45
+
46
+ @property
47
+ def _src_expr(self) -> Expr:
48
+ return self.components[0].src_expr
49
+
50
+ @property
51
+ def _target_expr(self) -> Expr:
52
+ return self.components[0].target_expr
53
+
54
+ def _equals(self, _: JsonMapper) -> bool:
55
+ return True
56
+
57
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
58
+ return None
59
+
60
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
61
+ from ..exec.expr_eval.evaluators import NestedRowList
62
+
63
+ dispatch_slot_idx = self.components[0].slot_idx
64
+ nested_rows = data_row.vals[dispatch_slot_idx]
65
+ if nested_rows is None:
66
+ data_row[self.slot_idx] = None
67
+ return
68
+ assert isinstance(nested_rows, NestedRowList)
69
+ # TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
70
+ data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
71
+
72
+ def _as_dict(self) -> dict:
73
+ """
74
+ We only serialize src and target exprs, everything else is re-created at runtime.
75
+ """
76
+ return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
77
+
78
+ @classmethod
79
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
80
+ assert len(components) == 2
81
+ src_expr, target_expr = components[0], components[1]
82
+ return cls(src_expr, target_expr)
83
+
84
+
85
+ class JsonMapperDispatch(Expr):
86
+ """
87
+ An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
88
+ its input DataRows. It has the same dependencies as the originating JsonMapper.
89
+
90
+ - The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
91
+ - It stores a NestedRowList instance in its slot.
92
+ """
93
+
94
+ target_expr_scope: ExprScope
95
+ parent_mapper: Optional[JsonMapperDispatch]
96
+ target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
97
+
98
+ def __init__(self, src_expr: Expr, target_expr: Expr):
99
+ super().__init__(ts.InvalidType())
100
+
33
101
  # we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
34
102
  # this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
35
103
  self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
@@ -40,28 +108,36 @@ class JsonMapper(Expr):
40
108
  self.parent_mapper = None
41
109
  self.target_expr_eval_ctx = None
42
110
 
43
- # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMappers will
44
- # be recognized as equal so long as they have the same src_expr and target_expr.
111
+ # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
112
+ # instances will be recognized as equal so long as they have the same src_expr and target_expr.
45
113
  # TODO: Might this cause problems after certain substitutions?
46
114
  self.id = self._create_id()
47
115
 
48
116
  scope_anchor = ObjectRef(self.target_expr_scope, self)
49
117
  self.components.append(scope_anchor)
50
118
 
51
- def _bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
- self._src_expr._bind_rel_paths(mapper)
53
- self._target_expr._bind_rel_paths(self)
119
+ def _bind_rel_paths(self, mapper: Optional[JsonMapperDispatch] = None) -> None:
120
+ self.src_expr._bind_rel_paths(mapper)
121
+ self.target_expr._bind_rel_paths(self)
54
122
  self.parent_mapper = mapper
55
123
  parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
56
124
  self.target_expr_scope.parent = parent_scope
57
125
 
126
+ def equals(self, other: Expr) -> bool:
127
+ """
128
+ We override equals() because we need to avoid comparing our scope anchor.
129
+ """
130
+ if type(self) is not type(other):
131
+ return False
132
+ return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
133
+
58
134
  def scope(self) -> ExprScope:
59
135
  # need to ignore target_expr
60
- return self._src_expr.scope()
136
+ return self.src_expr.scope()
61
137
 
62
138
  def dependencies(self) -> list[Expr]:
63
- result = [self._src_expr]
64
- result.extend(self._target_dependencies(self._target_expr))
139
+ result = [self.src_expr]
140
+ result.extend(self._target_dependencies(self.target_expr))
65
141
  return result
66
142
 
67
143
  def _target_dependencies(self, e: Expr) -> list[Expr]:
@@ -77,23 +153,12 @@ class JsonMapper(Expr):
77
153
  result.extend(self._target_dependencies(c))
78
154
  return result
79
155
 
80
- def equals(self, other: Expr) -> bool:
81
- """
82
- We override equals() because we need to avoid comparing our scope anchor.
83
- """
84
- if type(self) is not type(other):
85
- return False
86
- return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
87
-
88
- def __repr__(self) -> str:
89
- return f'map({self._src_expr}, lambda R: {self._target_expr})'
90
-
91
156
  @property
92
- def _src_expr(self) -> Expr:
157
+ def src_expr(self) -> Expr:
93
158
  return self.components[0]
94
159
 
95
160
  @property
96
- def _target_expr(self) -> Expr:
161
+ def target_expr(self) -> Expr:
97
162
  return self.components[1]
98
163
 
99
164
  @property
@@ -104,37 +169,19 @@ class JsonMapper(Expr):
104
169
  assert isinstance(result, ObjectRef)
105
170
  return result
106
171
 
107
- def _equals(self, _: JsonMapper) -> bool:
108
- return True
109
-
110
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
111
- return None
172
+ def __repr__(self) -> str:
173
+ return 'JsonMapperDispatch()'
112
174
 
113
175
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
114
- # this will be called, but the value has already been materialized elsewhere
115
- src = data_row[self._src_expr.slot_idx]
116
- if not isinstance(src, list):
117
- # invalid/non-list src path
118
- data_row[self.slot_idx] = None
119
- return
120
-
121
- result = [None] * len(src)
122
- if self.target_expr_eval_ctx is None:
123
- self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
124
- for i, val in enumerate(src):
125
- data_row[self.scope_anchor.slot_idx] = val
126
- # stored target_expr
127
- row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
128
- result[i] = data_row[self._target_expr.slot_idx]
129
- data_row[self.slot_idx] = result
176
+ # eval is handled by JsonMapperDispatcher
177
+ raise AssertionError('this should never be called')
130
178
 
131
179
  def _as_dict(self) -> dict:
132
180
  """
133
- We need to avoid serializing component[2], which is an ObjectRef.
181
+ JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
134
182
  """
135
- return {'components': [c.as_dict() for c in self.components[0:2]]}
183
+ raise AssertionError('this should never be called')
136
184
 
137
185
  @classmethod
138
- def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
139
- assert len(components) == 2
140
- return cls(components[0], components[1])
186
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
187
+ raise AssertionError('this should never be called')
@@ -11,7 +11,7 @@ from pixeltable import catalog, exceptions as excs, type_system as ts
11
11
  from .data_row import DataRow
12
12
  from .expr import Expr
13
13
  from .globals import print_slice
14
- from .json_mapper import JsonMapper
14
+ from .json_mapper import JsonMapperDispatch
15
15
  from .object_ref import ObjectRef
16
16
  from .row_builder import RowBuilder
17
17
  from .sql_element_cache import SqlElementCache
@@ -80,11 +80,10 @@ class JsonPath(Expr):
80
80
  def is_relative_path(self) -> bool:
81
81
  return self._anchor is None
82
82
 
83
- @property
84
83
  def _has_relative_path(self) -> bool:
85
- return self.is_relative_path() or super()._has_relative_path
84
+ return self.is_relative_path() or super()._has_relative_path()
86
85
 
87
- def _bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
86
+ def _bind_rel_paths(self, mapper: Optional['JsonMapperDispatch'] = None) -> None:
88
87
  if self.is_relative_path():
89
88
  # TODO: take scope_idx into account
90
89
  self.set_anchor(mapper.scope_anchor)
@@ -23,7 +23,7 @@ class MethodRef(Expr):
23
23
  # TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
24
24
  # converted to a `FunctionCall` by binding any remaining parameters).
25
25
 
26
- def __init__(self, base_expr: Expr, method_name: str):
26
+ def __init__(self, base_expr: Expr, method_name: str) -> None:
27
27
  super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
28
28
  self.base_expr = base_expr
29
29
  self.method_name = method_name
@@ -43,7 +43,7 @@ class MethodRef(Expr):
43
43
  assert len(components) == 1
44
44
  return cls(components[0], d['method_name'])
45
45
 
46
- def __call__(self, *args, **kwargs) -> FunctionCall:
46
+ def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
47
47
  result = self.fn(*[self.base_expr, *args], **kwargs)
48
48
  assert isinstance(result, FunctionCall)
49
49
  result.is_method_call = True
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
8
8
 
9
9
  from .data_row import DataRow
10
10
  from .expr import Expr, ExprScope
11
- from .json_mapper import JsonMapper
11
+ from .json_mapper import JsonMapperDispatch
12
12
  from .row_builder import RowBuilder
13
13
  from .sql_element_cache import SqlElementCache
14
14
 
@@ -19,7 +19,7 @@ class ObjectRef(Expr):
19
19
  The object is generated/materialized elsewhere and establishes a new scope.
20
20
  """
21
21
 
22
- def __init__(self, scope: ExprScope, owner: JsonMapper):
22
+ def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
23
23
  # TODO: do we need an Unknown type after all?
24
24
  super().__init__(ts.JsonType()) # JsonType: this could be anything
25
25
  self._scope = scope
@@ -77,6 +77,8 @@ class RowBuilder:
77
77
  transitive_dependents: np.ndarray # of bool
78
78
  # dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
79
79
  dependencies: np.ndarray # of bool
80
+ # num_dependencies[i] = number of direct dependencies of expr with slot idx i
81
+ num_dependencies: np.ndarray # of int
80
82
 
81
83
  # records the output_expr that a subexpr belongs to
82
84
  # (a subexpr can be shared across multiple output exprs)
@@ -209,6 +211,7 @@ class RowBuilder:
209
211
  exc_dependencies[expr.slot_idx].add(d.slot_idx)
210
212
  exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
211
213
 
214
+ self.num_dependencies = np.sum(self.dependencies, axis=1)
212
215
  self.dependents = self.dependencies.T
213
216
  self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
214
217
  for i in reversed(range(self.num_materialized)):
@@ -275,8 +278,14 @@ class RowBuilder:
275
278
  for d in e.dependencies():
276
279
  self._record_output_expr_id(d, output_expr_id)
277
280
 
278
- def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
279
- """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
281
+ def _compute_dependencies(
282
+ self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: Optional[ExprScope] = None
283
+ ) -> list[int]:
284
+ """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
285
+
286
+ If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
287
+ immediate dependents that aren't in target_scope, but doesn't resolve those).
288
+ """
280
289
  dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
281
290
  # doing this front-to-back ensures that we capture transitive dependencies
282
291
  max_target_slot_idx = max(target_slot_idxs)
@@ -289,6 +298,9 @@ class RowBuilder:
289
298
  if expr.slot_idx in self.input_expr_slot_idxs:
290
299
  # this is input and therefore doesn't depend on other exprs
291
300
  continue
301
+ if target_scope is not None and expr.scope() != target_scope:
302
+ # don't resolve dependencies outside of target_scope
303
+ continue
292
304
  for d in expr.dependencies():
293
305
  assert d.slot_idx is not None, f'{expr}, {d}'
294
306
  if d.slot_idx in excluded_slot_idxs:
@@ -320,10 +332,15 @@ class RowBuilder:
320
332
  for c in e.components:
321
333
  self.__set_slot_idxs_aux(c)
322
334
 
323
- def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
335
+ def get_dependencies(
336
+ self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
337
+ ) -> list[Expr]:
324
338
  """
325
339
  Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
326
340
  The exprs given in 'exclude' are excluded.
341
+ If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
342
+ dependencies from enclosing scopes.
343
+
327
344
  Returns:
328
345
  list of Exprs from unique_exprs (= with slot_idx set)
329
346
  """
@@ -334,23 +351,33 @@ class RowBuilder:
334
351
  return []
335
352
  # make sure we only refer to recorded exprs
336
353
  targets = [self.unique_exprs[e] for e in targets]
354
+ target_scope: Optional[ExprScope] = None
355
+ if limit_scope:
356
+ # make sure all targets are from the same scope
357
+ target_scopes = {e.scope() for e in targets}
358
+ assert len(target_scopes) == 1
359
+ target_scope = target_scopes.pop()
337
360
  exclude = [self.unique_exprs[e] for e in exclude]
338
361
  target_slot_idxs = [e.slot_idx for e in targets]
339
362
  excluded_slot_idxs = [e.slot_idx for e in exclude]
340
- all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
363
+ all_dependencies = set(
364
+ self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
365
+ )
341
366
  all_dependencies.update(target_slot_idxs)
342
367
  result_ids = list(all_dependencies)
343
368
  result_ids.sort()
344
369
  return [self.unique_exprs[id] for id in result_ids]
345
370
 
346
- def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
371
+ def create_eval_ctx(
372
+ self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None, limit_scope: bool = True
373
+ ) -> EvalCtx:
347
374
  """Return EvalCtx for targets"""
348
375
  targets = list(targets)
349
376
  if exclude is None:
350
377
  exclude = []
351
378
  if len(targets) == 0:
352
379
  return self.EvalCtx([], [], [], [])
353
- dependencies = self.get_dependencies(targets, exclude)
380
+ dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
354
381
  targets = [self.unique_exprs[e] for e in targets]
355
382
  target_slot_idxs = [e.slot_idx for e in targets]
356
383
  ctx_slot_idxs = [e.slot_idx for e in dependencies]
@@ -47,7 +47,7 @@ class SimilarityExpr(Expr):
47
47
  def __repr__(self) -> str:
48
48
  return f'{self.components[0]}.similarity({self.components[1]})'
49
49
 
50
- def _id_attrs(self):
50
+ def _id_attrs(self) -> list[tuple[str, Any]]:
51
51
  return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
52
52
 
53
53
  def default_column_name(self) -> str:
@@ -17,7 +17,7 @@ class SqlElementCache:
17
17
  for e, el in elements.items():
18
18
  self.cache[e.id] = el
19
19
 
20
- def extend(self, elements: ExprDict[sql.ColumnElement]):
20
+ def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
21
21
  for e, el in elements.items():
22
22
  self.cache[e.id] = el
23
23
 
@@ -26,7 +26,7 @@ class StringOp(Expr):
26
26
  self.operator = operator
27
27
  self.components = [op1, op2]
28
28
  assert op1.col_type.is_string_type()
29
- if operator in {StringOperator.CONCAT, StringOperator.REPEAT}:
29
+ if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
30
30
  if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
31
31
  raise excs.Error(
32
32
  f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
@@ -89,7 +89,7 @@ class StringOp(Expr):
89
89
  """
90
90
  Return the result of evaluating the expression on two int/float operands
91
91
  """
92
- assert self.operator in {StringOperator.CONCAT, StringOperator.REPEAT}
92
+ assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
93
93
  if self.operator == StringOperator.CONCAT:
94
94
  assert isinstance(op2_val, str)
95
95
  return op1_val + op2_val
@@ -13,5 +13,5 @@ from . import functions
13
13
  __all__ = local_public_names(__name__)
14
14
 
15
15
 
16
- def __dir__():
16
+ def __dir__() -> list[str]:
17
17
  return __all__
@@ -7,5 +7,5 @@ from . import whisperx, yolox
7
7
  __all__ = local_public_names(__name__)
8
8
 
9
9
 
10
- def __dir__():
10
+ def __dir__() -> list[str]:
11
11
  return __all__
@@ -73,5 +73,5 @@ _model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
73
73
  __all__ = local_public_names(__name__)
74
74
 
75
75
 
76
- def __dir__():
76
+ def __dir__() -> list[str]:
77
77
  return __all__
@@ -110,5 +110,5 @@ _processor_cache: dict[str, 'YoloxProcessor'] = {}
110
110
  __all__ = local_public_names(__name__)
111
111
 
112
112
 
113
- def __dir__():
113
+ def __dir__() -> list[str]:
114
114
  return __all__
@@ -252,7 +252,7 @@ def uda(
252
252
  ) -> Callable[[type[Aggregator]], AggregateFunction]: ...
253
253
 
254
254
 
255
- def uda(*args, **kwargs):
255
+ def uda(*args, **kwargs): # type: ignore[no-untyped-def]
256
256
  """Decorator for user-defined aggregate functions.
257
257
 
258
258
  The decorated class must inherit from Aggregator and implement the following methods:
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import asyncio
4
3
  import inspect
5
4
  from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
6
5
  from uuid import UUID
@@ -127,12 +126,10 @@ class CallableFunction(Function):
127
126
  """
128
127
  assert self.is_batched
129
128
  assert not self.is_polymorphic
129
+ assert not self.is_async
130
130
  # Unpack the constant parameters
131
131
  constant_kwargs, batched_kwargs = self.create_batch_kwargs(kwargs)
132
- if inspect.iscoroutinefunction(self.py_fn):
133
- return asyncio.run(self.py_fn(*args, **constant_kwargs, **batched_kwargs))
134
- else:
135
- return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
132
+ return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
136
133
 
137
134
  def create_batch_kwargs(self, kwargs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, list[Any]]]:
138
135
  """Converts kwargs containing lists into constant and batched kwargs in the format expected by a batched udf."""
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional, Sequence
2
2
 
3
- from pixeltable import exceptions as excs, exprs
3
+ from pixeltable import exceptions as excs, exprs, type_system as ts
4
4
 
5
5
  from .function import Function
6
6
  from .signature import Signature
@@ -76,9 +76,25 @@ class ExprTemplateFunction(Function):
76
76
  arg_expr = arg
77
77
  arg_exprs[param_expr] = arg_expr
78
78
  result = result.substitute(arg_exprs)
79
- assert not result._contains(exprs.Variable)
80
79
  return result
81
80
 
81
+ def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
82
+ """
83
+ The call_return_type of an ExprTemplateFunction is derived from the template expression's col_type after
84
+ substitution (unlike for UDFs, whose call_return_type is derived from an explicitly specified
85
+ conditional_return_type).
86
+ """
87
+ assert not self.is_polymorphic
88
+ template = self.template
89
+ with_defaults = bound_args.copy()
90
+ with_defaults.update(
91
+ {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
92
+ )
93
+ substituted_expr = self.template.expr.copy().substitute(
94
+ {template.param_exprs[name]: expr for name, expr in with_defaults.items()}
95
+ )
96
+ return substituted_expr.col_type
97
+
82
98
  def _docstring(self) -> Optional[str]:
83
99
  if isinstance(self.templates[0].expr, exprs.FunctionCall):
84
100
  return self.templates[0].expr.fn._docstring()
@@ -97,6 +113,10 @@ class ExprTemplateFunction(Function):
97
113
 
98
114
  @property
99
115
  def display_name(self) -> str:
116
+ if not self.self_name and isinstance(self.templates[0].expr, exprs.FunctionCall):
117
+ # In the common case where the templated expression is itself a FunctionCall,
118
+ # fall back on the display name of the underlying FunctionCall
119
+ return self.templates[0].expr.fn.display_name
100
120
  return self.self_name
101
121
 
102
122
  @property
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
10
10
  import sqlalchemy as sql
11
11
  from typing_extensions import Self
12
12
 
13
- import pixeltable as pxt
14
13
  import pixeltable.exceptions as excs
15
14
  import pixeltable.type_system as ts
16
15
 
@@ -155,7 +154,7 @@ class Function(ABC):
155
154
  """
156
155
  raise NotImplementedError()
157
156
 
158
- def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
157
+ def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
159
158
  from pixeltable import exprs
160
159
 
161
160
  args = [exprs.Expr.from_object(arg) for arg in args]
@@ -246,7 +245,7 @@ class Function(ABC):
246
245
  # `None` when any of its non-nullable inputs are `None`.
247
246
  for arg_name, arg in bound_args.items():
248
247
  param = self.signature.parameters[arg_name]
249
- if param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
248
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
250
249
  continue
251
250
  if arg.col_type.nullable and not param.col_type.nullable:
252
251
  return_type = return_type.copy(nullable=True)
@@ -385,10 +384,10 @@ class Function(ABC):
385
384
  else:
386
385
  var = exprs.Variable(name, param.col_type)
387
386
  bindings[name] = var
388
- if args_ok and param.kind in {
387
+ if args_ok and param.kind in (
389
388
  inspect.Parameter.POSITIONAL_ONLY,
390
389
  inspect.Parameter.POSITIONAL_OR_KEYWORD,
391
- }:
390
+ ):
392
391
  template_args.append(var)
393
392
  else:
394
393
  template_kwargs[name] = var
@@ -31,7 +31,7 @@ class FunctionRegistry:
31
31
  cls._instance = FunctionRegistry()
32
32
  return cls._instance
33
33
 
34
- def __init__(self):
34
+ def __init__(self) -> None:
35
35
  self.stored_fns_by_id: dict[UUID, Function] = {}
36
36
  self.module_fns: dict[str, Function] = {} # fqn -> Function
37
37
  self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
@@ -253,7 +253,7 @@ class Signature:
253
253
  continue # skip 'self' or 'cls' parameter
254
254
  if param.name in cls.SPECIAL_PARAM_NAMES:
255
255
  raise excs.Error(f'{param.name!r} is a reserved parameter name')
256
- if param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
256
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
257
257
  parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
258
258
  continue
259
259
 
pixeltable/func/udf.py CHANGED
@@ -43,7 +43,7 @@ def udf(
43
43
  ) -> ExprTemplateFunction: ...
44
44
 
45
45
 
46
- def udf(*args, **kwargs):
46
+ def udf(*args, **kwargs): # type: ignore[no-untyped-def]
47
47
  """A decorator to create a Function from a function definition.
48
48
 
49
49
  Examples:
@@ -79,7 +79,7 @@ def udf(*args, **kwargs):
79
79
  if len(args) > 0:
80
80
  raise excs.Error('Unexpected @udf decorator arguments.')
81
81
 
82
- def decorator(decorated_fn: Callable):
82
+ def decorator(decorated_fn: Callable) -> CallableFunction:
83
83
  return make_function(
84
84
  decorated_fn,
85
85
  batch_size=batch_size,
@@ -29,5 +29,5 @@ from .globals import count, map, max, mean, min, sum
29
29
  __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
30
30
 
31
31
 
32
- def __dir__():
32
+ def __dir__() -> list[str]:
33
33
  return __all__
@@ -39,7 +39,7 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
39
39
 
40
40
 
41
41
  class AnthropicRateLimitsInfo(env.RateLimitsInfo):
42
- def __init__(self):
42
+ def __init__(self) -> None:
43
43
  super().__init__(self._get_request_resources)
44
44
 
45
45
  def _get_request_resources(self, messages: dict, max_tokens: int) -> dict[str, int]:
@@ -236,5 +236,5 @@ def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
236
236
  __all__ = local_public_names(__name__)
237
237
 
238
238
 
239
- def __dir__():
239
+ def __dir__() -> list[str]:
240
240
  return __all__
@@ -26,5 +26,5 @@ def get_metadata(audio: pxt.Audio) -> dict:
26
26
  __all__ = local_public_names(__name__)
27
27
 
28
28
 
29
- def __dir__():
29
+ def __dir__() -> list[str]:
30
30
  return __all__
@@ -117,5 +117,5 @@ async def chat_completions(
117
117
  __all__ = local_public_names(__name__)
118
118
 
119
119
 
120
- def __dir__():
120
+ def __dir__() -> list[str]:
121
121
  return __all__
@@ -131,5 +131,5 @@ async def chat_completions(
131
131
  __all__ = local_public_names(__name__)
132
132
 
133
133
 
134
- def __dir__():
134
+ def __dir__() -> list[str]:
135
135
  return __all__