pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +20 -21
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +201 -108
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +62 -54
  129. pixeltable/utils/arrow.py +1 -2
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +6 -3
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +12 -7
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.2.dist-info/RECORD +0 -161
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -43,7 +43,7 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
43
43
  Add a computed column that applies the model `yolox_m` to an existing
44
44
  Pixeltable column `tbl.image` of the table `tbl`:
45
45
 
46
- >>> tbl['detections'] = yolox(tbl.image, model_id='yolox_m', threshold=0.8)
46
+ >>> tbl.add_computed_column(detections=yolox(tbl.image, model_id='yolox_m', threshold=0.8))
47
47
  """
48
48
  import torch
49
49
  from yolox.utils import postprocess # type: ignore[import-untyped]
@@ -55,9 +55,7 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
55
55
  with torch.no_grad():
56
56
  output_tensor = model(batch_tensor)
57
57
 
58
- outputs = postprocess(
59
- output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False
60
- )
58
+ outputs = postprocess(output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False)
61
59
 
62
60
  results: list[dict] = []
63
61
  for image in images:
@@ -65,11 +63,13 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
65
63
  if outputs[0] is None:
66
64
  results.append({'bboxes': [], 'scores': [], 'labels': []})
67
65
  else:
68
- results.append({
69
- 'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
70
- 'scores': [output[4].item() * output[5].item() for output in outputs[0]],
71
- 'labels': [int(output[6]) for output in outputs[0]]
72
- })
66
+ results.append(
67
+ {
68
+ 'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
69
+ 'scores': [output[4].item() * output[5].item() for output in outputs[0]],
70
+ 'labels': [int(output[6]) for output in outputs[0]],
71
+ }
72
+ )
73
73
  return results
74
74
 
75
75
 
@@ -90,8 +90,8 @@ def yolo_to_coco(detections: dict) -> list:
90
90
  Add a computed column that converts the output `tbl.detections` to COCO format, where `tbl.image`
91
91
  is the image for which detections were computed:
92
92
 
93
- >>> tbl['detections'] = yolox(tbl.image, model_id='yolox_m', threshold=0.8)
94
- ... tbl['detections_coco'] = yolo_to_coco(tbl.detections)
93
+ >>> tbl.add_computed_column(detections=yolox(tbl.image, model_id='yolox_m', threshold=0.8))
94
+ ... tbl.add_computed_column(detections_coco=yolo_to_coco(tbl.detections))
95
95
  """
96
96
  bboxes, labels = detections['bboxes'], detections['labels']
97
97
  num_annotations = len(detections['bboxes'])
@@ -31,6 +31,7 @@ class AggregateFunction(Function):
31
31
  allows_std_agg: if True, the aggregate function can be used as a standard aggregate function w/o a window
32
32
  allows_window: if True, the aggregate function can be used with a window
33
33
  """
34
+
34
35
  ORDER_BY_PARAM = 'order_by'
35
36
  GROUP_BY_PARAM = 'group_by'
36
37
  RESERVED_PARAMS = {ORDER_BY_PARAM, GROUP_BY_PARAM}
@@ -45,7 +46,7 @@ class AggregateFunction(Function):
45
46
  self_path: str,
46
47
  requires_order_by: bool,
47
48
  allows_std_agg: bool,
48
- allows_window: bool
49
+ allows_window: bool,
49
50
  ) -> None:
50
51
  if type_substitutions is None:
51
52
  type_substitutions = [None] # single signature with no substitutions
@@ -80,8 +81,12 @@ class AggregateFunction(Function):
80
81
  inferred signature along with the list of init_param_names (for downstream error handling).
81
82
  """
82
83
  # infer type parameters; set return_type=InvalidType() because it has no meaning here
83
- init_sig = Signature.create(py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions)
84
- update_sig = Signature.create(py_fn=cls.update, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions)
84
+ init_sig = Signature.create(
85
+ py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
86
+ )
87
+ update_sig = Signature.create(
88
+ py_fn=cls.update, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
89
+ )
85
90
  value_sig = Signature.create(py_fn=cls.value, is_cls_method=True, type_substitutions=type_substitutions)
86
91
 
87
92
  init_types = [p.col_type for p in init_sig.parameters.values()]
@@ -110,8 +115,7 @@ class AggregateFunction(Function):
110
115
  duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
111
116
  if len(duplicate_params) > 0:
112
117
  raise excs.Error(
113
- f'__init__() and update() cannot have parameters with the same name: '
114
- f'{", ".join(duplicate_params)}'
118
+ f'__init__() and update() cannot have parameters with the same name: {", ".join(duplicate_params)}'
115
119
  )
116
120
  params = update_params + init_params # init_params are keyword-only and come last
117
121
  init_param_names = [p.name for p in init_params]
@@ -166,7 +170,8 @@ class AggregateFunction(Function):
166
170
  )
167
171
  if not self.allows_window:
168
172
  raise excs.Error(
169
- f'{self.display_name}(): order_by invalid with an aggregate function that does not allow windows')
173
+ f'{self.display_name}(): order_by invalid with an aggregate function that does not allow windows'
174
+ )
170
175
  order_by_clause = kwargs.pop(self.ORDER_BY_PARAM)
171
176
  elif self.requires_order_by:
172
177
  # the first argument is the order-by expr
@@ -185,7 +190,8 @@ class AggregateFunction(Function):
185
190
  if self.GROUP_BY_PARAM in kwargs:
186
191
  if not self.allows_window:
187
192
  raise excs.Error(
188
- f'{self.display_name}(): group_by invalid with an aggregate function that does not allow windows')
193
+ f'{self.display_name}(): group_by invalid with an aggregate function that does not allow windows'
194
+ )
189
195
  group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
190
196
 
191
197
  resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
@@ -195,7 +201,7 @@ class AggregateFunction(Function):
195
201
  bound_args,
196
202
  return_type,
197
203
  order_by_clause=[order_by_clause] if order_by_clause is not None else [],
198
- group_by_clause=[group_by_clause] if group_by_clause is not None else []
204
+ group_by_clause=[group_by_clause] if group_by_clause is not None else [],
199
205
  )
200
206
 
201
207
  def validate_call(self, bound_args: dict[str, Any]) -> None:
@@ -228,7 +234,7 @@ def uda(
228
234
  requires_order_by: bool = False,
229
235
  allows_std_agg: bool = True,
230
236
  allows_window: bool = False,
231
- type_substitutions: Optional[Sequence[dict]] = None
237
+ type_substitutions: Optional[Sequence[dict]] = None,
232
238
  ) -> Callable[[type[Aggregator]], AggregateFunction]: ...
233
239
 
234
240
 
@@ -249,13 +255,11 @@ def uda(*args, **kwargs):
249
255
  - allows_window: if True, the function can be used with a window
250
256
  """
251
257
  if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
252
-
253
258
  # Decorator invoked without parentheses: @pxt.uda
254
259
  # Simply call make_aggregator with defaults.
255
260
  return make_aggregator(cls=args[0])
256
261
 
257
262
  else:
258
-
259
263
  # Decorator schema invoked with parentheses: @pxt.uda(**kwargs)
260
264
  # Create a decorator for the specified schema.
261
265
  requires_order_by = kwargs.pop('requires_order_by', False)
@@ -273,7 +277,7 @@ def uda(*args, **kwargs):
273
277
  requires_order_by=requires_order_by,
274
278
  allows_std_agg=allows_std_agg,
275
279
  allows_window=allows_window,
276
- type_substitutions=type_substitutions
280
+ type_substitutions=type_substitutions,
277
281
  )
278
282
 
279
283
  return decorator
@@ -284,7 +288,7 @@ def make_aggregator(
284
288
  requires_order_by: bool = False,
285
289
  allows_std_agg: bool = True,
286
290
  allows_window: bool = False,
287
- type_substitutions: Optional[Sequence[dict]] = None
291
+ type_substitutions: Optional[Sequence[dict]] = None,
288
292
  ) -> AggregateFunction:
289
293
  class_path = f'{cls.__module__}.{cls.__qualname__}'
290
294
  instance = AggregateFunction(cls, type_substitutions, class_path, requires_order_by, allows_std_agg, allows_window)
@@ -1,9 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import inspect
4
5
  from typing import Any, Callable, Optional, Sequence
5
6
  from uuid import UUID
6
- import asyncio
7
7
 
8
8
  import cloudpickle # type: ignore[import-untyped]
9
9
 
@@ -20,6 +20,7 @@ class CallableFunction(Function):
20
20
  - references to lambdas and functions defined in notebooks, which are pickled and serialized to the store
21
21
  - functions that are defined in modules are serialized via the default mechanism
22
22
  """
23
+
23
24
  py_fns: list[Callable]
24
25
  self_name: Optional[str]
25
26
  batch_size: Optional[int]
@@ -32,7 +33,7 @@ class CallableFunction(Function):
32
33
  self_name: Optional[str] = None,
33
34
  batch_size: Optional[int] = None,
34
35
  is_method: bool = False,
35
- is_property: bool = False
36
+ is_property: bool = False,
36
37
  ):
37
38
  assert len(signatures) > 0
38
39
  assert len(signatures) == len(py_fns)
@@ -165,6 +166,7 @@ class CallableFunction(Function):
165
166
  # this is not a module function
166
167
  assert not self.is_method and not self.is_property
167
168
  from .function_registry import FunctionRegistry
169
+
168
170
  id = FunctionRegistry.get().create_stored_function(self)
169
171
  return {'id': id.hex}
170
172
  return super()._as_dict()
@@ -173,15 +175,13 @@ class CallableFunction(Function):
173
175
  def _from_dict(cls, d: dict) -> Function:
174
176
  if 'id' in d:
175
177
  from .function_registry import FunctionRegistry
178
+
176
179
  return FunctionRegistry.get().get_stored_function(UUID(hex=d['id']))
177
180
  return super()._from_dict(d)
178
181
 
179
182
  def to_store(self) -> tuple[dict, bytes]:
180
183
  assert not self.is_polymorphic # multi-signature UDFs not allowed for stored fns
181
- md = {
182
- 'signature': self.signature.as_dict(),
183
- 'batch_size': self.batch_size,
184
- }
184
+ md = {'signature': self.signature.as_dict(), 'batch_size': self.batch_size}
185
185
  return md, cloudpickle.dumps(self.py_fn)
186
186
 
187
187
  @classmethod
@@ -14,9 +14,10 @@ class ExprTemplate:
14
14
  along with various precomputed metadata. (This is analogous to a `Callable`-`Signature` pair in a
15
15
  `CallableFunction`.)
16
16
  """
17
+
17
18
  expr: 'pixeltable.exprs.Expr'
18
19
  signature: Signature
19
- param_exprs: list['pixeltable.exprs.Variable']
20
+ param_exprs: dict[str, 'pixeltable.exprs.Variable']
20
21
 
21
22
  def __init__(self, expr: 'pixeltable.exprs.Expr', signature: Signature):
22
23
  from pixeltable import exprs
@@ -24,17 +25,18 @@ class ExprTemplate:
24
25
  self.expr = expr
25
26
  self.signature = signature
26
27
 
27
- self.param_exprs = list(set(expr.subexprs(expr_class=exprs.Variable)))
28
- # make sure there are no duplicate names
29
- assert len(self.param_exprs) == len(set(p.name for p in self.param_exprs))
30
- self.param_exprs_by_name = {p.name: p for p in self.param_exprs}
28
+ self.param_exprs = {name: exprs.Variable(name, param.col_type) for name, param in signature.parameters.items()}
29
+
30
+ # validate that all variables in the expression are parameters
31
+ for var in expr.subexprs(expr_class=exprs.Variable):
32
+ assert var.name in self.param_exprs, f"Variable '{var.name}' in expression is not a parameter"
31
33
 
32
34
  # verify default values
33
35
  self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
34
36
  for param in self.signature.parameters.values():
35
37
  if param.default is inspect.Parameter.empty:
36
38
  continue
37
- param_expr = self.param_exprs_by_name[param.name]
39
+ param_expr = self.param_exprs[param.name]
38
40
  try:
39
41
  literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
40
42
  self.defaults[param.name] = literal_default
@@ -45,6 +47,7 @@ class ExprTemplate:
45
47
 
46
48
  class ExprTemplateFunction(Function):
47
49
  """A parameterized expression from which an executable Expr is created with a function call."""
50
+
48
51
  templates: list[ExprTemplate]
49
52
  self_name: str
50
53
 
@@ -70,11 +73,12 @@ class ExprTemplateFunction(Function):
70
73
  bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
71
74
  # apply defaults, otherwise we might have Parameters left over
72
75
  bound_args.update(
73
- {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args})
76
+ {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
77
+ )
74
78
  result = template.expr.copy()
75
79
  arg_exprs: dict[exprs.Expr, exprs.Expr] = {}
76
80
  for param_name, arg in bound_args.items():
77
- param_expr = template.param_exprs_by_name[param_name]
81
+ param_expr = template.param_exprs[param_name]
78
82
  if not isinstance(arg, exprs.Expr):
79
83
  # TODO: use the available param_expr.col_type
80
84
  arg_expr = exprs.Expr.from_object(arg)
@@ -125,17 +129,14 @@ class ExprTemplateFunction(Function):
125
129
  return super()._as_dict()
126
130
  assert not self.is_polymorphic
127
131
  assert len(self.templates) == 1
128
- return {
129
- 'expr': self.template.expr.as_dict(),
130
- 'signature': self.signature.as_dict(),
131
- 'name': self.name,
132
- }
132
+ return {'expr': self.template.expr.as_dict(), 'signature': self.signature.as_dict(), 'name': self.name}
133
133
 
134
134
  @classmethod
135
135
  def _from_dict(cls, d: dict) -> Function:
136
136
  if 'expr' not in d:
137
- return super()._from_dict(d)
137
+ return super()._from_dict(d)
138
138
  assert 'signature' in d and 'name' in d
139
139
  import pixeltable.exprs as exprs
140
+
140
141
  template = ExprTemplate(exprs.Expr.from_dict(d['expr']), Signature.from_dict(d['signature']))
141
142
  return cls([template], name=d['name'])
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import importlib
4
4
  import inspect
5
- from abc import abstractmethod, ABC
5
+ from abc import ABC, abstractmethod
6
6
  from copy import copy
7
7
  from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
8
8
 
@@ -12,6 +12,7 @@ from typing_extensions import Self
12
12
  import pixeltable as pxt
13
13
  import pixeltable.exceptions as excs
14
14
  import pixeltable.type_system as ts
15
+
15
16
  from .globals import resolve_symbol
16
17
  from .signature import Signature
17
18
 
@@ -48,13 +49,12 @@ class Function(ABC):
48
49
  # of the parameters of the original function, with the same type.
49
50
  _resource_pool: Callable[..., Optional[str]]
50
51
 
51
-
52
52
  def __init__(
53
53
  self,
54
54
  signatures: list[Signature],
55
55
  self_path: Optional[str] = None,
56
56
  is_method: bool = False,
57
- is_property: bool = False
57
+ is_property: bool = False,
58
58
  ):
59
59
  # Check that stored functions cannot be declared using `is_method` or `is_property`:
60
60
  assert not ((is_method or is_property) and self_path is None)
@@ -80,7 +80,7 @@ class Function(ABC):
80
80
  return '<anonymous>'
81
81
  ptf_prefix = 'pixeltable.functions.'
82
82
  if self.self_path.startswith(ptf_prefix):
83
- return self.self_path[len(ptf_prefix):]
83
+ return self.self_path[len(ptf_prefix) :]
84
84
  return self.self_path
85
85
 
86
86
  @property
@@ -197,10 +197,13 @@ class Function(ABC):
197
197
  """Return the kwargs to pass to callable, given kwargs passed to this function"""
198
198
  bound_args = self.signature.py_signature.bind(**kwargs).arguments
199
199
  # add defaults to bound_args, if not already present
200
- bound_args.update({
201
- name: param.default
202
- for name, param in self.signature.parameters.items() if name not in bound_args and param.has_default()
203
- })
200
+ bound_args.update(
201
+ {
202
+ name: param.default
203
+ for name, param in self.signature.parameters.items()
204
+ if name not in bound_args and param.has_default()
205
+ }
206
+ )
204
207
  result: dict[str, Any] = {}
205
208
  sig = inspect.signature(callable)
206
209
  for param in sig.parameters.values():
@@ -233,7 +236,9 @@ class Function(ABC):
233
236
  for param in fn_sig.parameters.values():
234
237
  for self_sig in self.signatures:
235
238
  if param.name not in self_sig.parameters:
236
- raise ValueError(f'`conditional_return_type` has parameter `{param.name}` that is not in a signature')
239
+ raise ValueError(
240
+ f'`conditional_return_type` has parameter `{param.name}` that is not in a signature'
241
+ )
237
242
  self._conditional_return_type = fn
238
243
  return fn
239
244
 
@@ -279,9 +284,7 @@ class Function(ABC):
279
284
  raise excs.Error(f'Expected type `{param.col_type}` for parameter `{k}`; got `{expr.col_type}`')
280
285
  bindings[k] = v # Use the original value, not the Expr (The Expr is only for validation)
281
286
 
282
- residual_params = [
283
- p for p in self.signature.parameters.values() if p.name not in bindings
284
- ]
287
+ residual_params = [p for p in self.signature.parameters.values() if p.name not in bindings]
285
288
 
286
289
  # Bind each remaining parameter to a like-named variable
287
290
  for param in residual_params:
@@ -346,10 +349,7 @@ class Function(ABC):
346
349
  def _as_dict(self) -> dict:
347
350
  """Default serialization: store the path to self (which includes the module path) and signature."""
348
351
  assert self.self_path is not None
349
- return {
350
- 'path': self.self_path,
351
- 'signature': self.signature.as_dict(),
352
- }
352
+ return {'path': self.self_path, 'signature': self.signature.as_dict()}
353
353
 
354
354
  @classmethod
355
355
  def from_dict(cls, d: dict) -> Function:
@@ -18,11 +18,13 @@ from .function import Function
18
18
 
19
19
  _logger = logging.getLogger('pixeltable')
20
20
 
21
+
21
22
  class FunctionRegistry:
22
23
  """
23
24
  A central registry for all Functions. Handles interactions with the backing store.
24
25
  Function are loaded from the store on demand.
25
26
  """
27
+
26
28
  _instance: Optional[FunctionRegistry] = None
27
29
 
28
30
  @classmethod
@@ -151,14 +153,16 @@ class FunctionRegistry:
151
153
  return self.type_methods[base_type][name]
152
154
  return None
153
155
 
154
- #def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: Optional[UUID] = None) -> UUID:
156
+ # def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: Optional[UUID] = None) -> UUID:
155
157
  def create_stored_function(self, pxt_fn: Function, dir_id: Optional[UUID] = None) -> UUID:
156
158
  fn_md, binary_obj = pxt_fn.to_store()
157
159
  md = schema.FunctionMd(name=pxt_fn.name, md=fn_md, py_version=sys.version, class_name=pxt_fn.__class__.__name__)
158
160
  with env.Env.get().engine.begin() as conn:
159
161
  res = conn.execute(
160
- sql.insert(schema.Function.__table__)
161
- .values(dir_id=dir_id, md=dataclasses.asdict(md), binary_obj=binary_obj))
162
+ sql.insert(schema.Function.__table__).values(
163
+ dir_id=dir_id, md=dataclasses.asdict(md), binary_obj=binary_obj
164
+ )
165
+ )
162
166
  id = res.inserted_primary_key[0]
163
167
  _logger.info(f'Created function {pxt_fn.name} (id {id}) in store')
164
168
  self.stored_fns_by_id[id] = pxt_fn
@@ -167,8 +171,9 @@ class FunctionRegistry:
167
171
  def get_stored_function(self, id: UUID) -> Function:
168
172
  if id in self.stored_fns_by_id:
169
173
  return self.stored_fns_by_id[id]
170
- stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id)\
171
- .where(schema.Function.id == id)
174
+ stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id).where(
175
+ schema.Function.id == id
176
+ )
172
177
  with env.Env.get().engine.begin() as conn:
173
178
  row = conn.execute(stmt).fetchone()
174
179
  if row is None:
@@ -238,7 +243,5 @@ class FunctionRegistry:
238
243
  def delete_function(self, id: UUID) -> None:
239
244
  assert id is not None
240
245
  with env.Env.get().engine.begin() as conn:
241
- conn.execute(
242
- sql.delete(schema.Function.__table__)
243
- .where(schema.Function.id == id))
246
+ conn.execute(sql.delete(schema.Function.__table__).where(schema.Function.id == id))
244
247
  _logger.info(f'Deleted function with id {id} from store')
@@ -28,10 +28,12 @@ def validate_symbol_path(fn_path: str) -> None:
28
28
  fn_name = path_elems[-1]
29
29
  if any(el == '<locals>' for el in path_elems):
30
30
  raise excs.Error(
31
- f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.')
31
+ f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.'
32
+ )
32
33
  if any(not el.isidentifier() for el in path_elems):
33
34
  raise excs.Error(
34
- f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.')
35
+ f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.'
36
+ )
35
37
 
36
38
 
37
39
  def get_caller_module_path() -> str:
@@ -18,6 +18,7 @@ if TYPE_CHECKING:
18
18
 
19
19
  class QueryTemplateFunction(Function):
20
20
  """A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
21
+
21
22
  template_df: Optional['DataFrame']
22
23
  self_name: Optional[str]
23
24
  conn: Optional[sql.engine.Connection]
@@ -35,14 +36,14 @@ class QueryTemplateFunction(Function):
35
36
  var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
36
37
  template_df = template_callable(*var_exprs)
37
38
  from pixeltable import DataFrame
39
+
38
40
  assert isinstance(template_df, DataFrame)
39
41
  # we take params and return json
40
42
  sig = Signature(return_type=ts.JsonType(), parameters=params)
41
43
  return QueryTemplateFunction(template_df, sig, path=path, name=name)
42
44
 
43
45
  def __init__(
44
- self, template_df: Optional['DataFrame'], sig: Signature, path: Optional[str] = None,
45
- name: Optional[str] = None,
46
+ self, template_df: Optional['DataFrame'], sig: Signature, path: Optional[str] = None, name: Optional[str] = None
46
47
  ):
47
48
  assert sig is not None
48
49
  super().__init__([sig], self_path=path)
@@ -74,11 +75,12 @@ class QueryTemplateFunction(Function):
74
75
  return True
75
76
 
76
77
  async def aexec(self, *args: Any, **kwargs: Any) -> Any:
77
- #assert not self.is_polymorphic
78
+ # assert not self.is_polymorphic
78
79
  bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
79
80
  # apply defaults, otherwise we might have Parameters left over
80
81
  bound_args.update(
81
- {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
82
+ {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args}
83
+ )
82
84
  bound_df = self.template_df.bind(bound_args)
83
85
  result = await bound_df._acollect(self.conn)
84
86
  return list(result)
@@ -97,30 +99,27 @@ class QueryTemplateFunction(Function):
97
99
  @classmethod
98
100
  def _from_dict(cls, d: dict) -> Function:
99
101
  from pixeltable.dataframe import DataFrame
102
+
100
103
  return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
101
104
 
102
105
 
103
106
  @overload
104
107
  def query(py_fn: Callable) -> QueryTemplateFunction: ...
105
108
 
109
+
106
110
  @overload
107
- def query(
108
- *,
109
- param_types: Optional[list[ts.ColumnType]] = None
110
- ) -> Callable[[Callable], QueryTemplateFunction]: ...
111
+ def query(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], QueryTemplateFunction]: ...
112
+
111
113
 
112
114
  def query(*args: Any, **kwargs: Any) -> Any:
113
- def make_query_template(
114
- py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
115
- ) -> QueryTemplateFunction:
115
+ def make_query_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> QueryTemplateFunction:
116
116
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
117
117
  # this is a named function in a module
118
118
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
119
119
  else:
120
120
  function_path = None
121
121
  query_name = py_fn.__name__
122
- query_fn = QueryTemplateFunction.create(
123
- py_fn, param_types=param_types, path=function_path, name=query_name)
122
+ query_fn = QueryTemplateFunction.create(py_fn, param_types=param_types, path=function_path, name=query_name)
124
123
  return query_fn
125
124
 
126
125
  # TODO: verify that the inferred return type matches that of the template
@@ -63,7 +63,7 @@ class Parameter:
63
63
  col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
64
64
  kind=getattr(inspect.Parameter, d['kind']),
65
65
  is_batched=d['is_batched'],
66
- default=default
66
+ default=default,
67
67
  )
68
68
 
69
69
  def to_py_param(self) -> inspect.Parameter:
@@ -80,6 +80,7 @@ class Signature:
80
80
 
81
81
  - self.is_batched: return type is a Batch[...] type
82
82
  """
83
+
83
84
  SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
84
85
 
85
86
  def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
@@ -131,9 +132,12 @@ class Signature:
131
132
  if param_name not in other.parameters:
132
133
  return False
133
134
  other_param = other.parameters[param_name]
134
- if (param.kind != other_param.kind or
135
- (param.col_type is None) != (other_param.col_type is None) or # this can happen if they are varargs
136
- param.col_type is not None and not other_param.col_type.is_supertype_of(param.col_type, ignore_nullable=True)):
135
+ if (
136
+ param.kind != other_param.kind
137
+ or (param.col_type is None) != (other_param.col_type is None) # this can happen if they are varargs
138
+ or param.col_type is not None
139
+ and not other_param.col_type.is_supertype_of(param.col_type, ignore_nullable=True)
140
+ ):
137
141
  return False
138
142
 
139
143
  # Check (iii)
@@ -193,7 +197,7 @@ class Signature:
193
197
  py_params: Optional[list[inspect.Parameter]] = None,
194
198
  param_types: Optional[list[ts.ColumnType]] = None,
195
199
  type_substitutions: Optional[dict] = None,
196
- is_cls_method: bool = False
200
+ is_cls_method: bool = False,
197
201
  ) -> list[Parameter]:
198
202
  assert (py_fn is None) != (py_params is None)
199
203
  if py_fn is not None:
@@ -229,8 +233,11 @@ class Signature:
229
233
  if param_type is None:
230
234
  raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
231
235
 
232
- parameters.append(Parameter(
233
- param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default))
236
+ parameters.append(
237
+ Parameter(
238
+ param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default
239
+ )
240
+ )
234
241
 
235
242
  return parameters
236
243
 
@@ -241,7 +248,7 @@ class Signature:
241
248
  param_types: Optional[list[ts.ColumnType]] = None,
242
249
  return_type: Optional[ts.ColumnType] = None,
243
250
  type_substitutions: Optional[dict] = None,
244
- is_cls_method: bool = False
251
+ is_cls_method: bool = False,
245
252
  ) -> Signature:
246
253
  """Create a signature for the given Callable.
247
254
  Infer the parameter and return types, if none are specified.
@@ -250,7 +257,9 @@ class Signature:
250
257
  if type_substitutions is None:
251
258
  type_substitutions = {}
252
259
 
253
- parameters = cls.create_parameters(py_fn=py_fn, param_types=param_types, is_cls_method=is_cls_method, type_substitutions=type_substitutions)
260
+ parameters = cls.create_parameters(
261
+ py_fn=py_fn, param_types=param_types, is_cls_method=is_cls_method, type_substitutions=type_substitutions
262
+ )
254
263
  sig = inspect.signature(py_fn)
255
264
  if return_type is None:
256
265
  py_type: Optional[type]