pixeltable 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (77) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +29 -11
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +307 -186
  10. pixeltable/catalog/table_version.py +109 -63
  11. pixeltable/catalog/table_version_path.py +28 -5
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +129 -26
  14. pixeltable/env.py +29 -18
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +74 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +51 -22
  50. pixeltable/io/label_studio.py +639 -0
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +44 -0
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +66 -0
  62. pixeltable/metadata/schema.py +3 -3
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +147 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/METADATA +1 -1
  70. pixeltable-0.2.10.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/label_studio.py +0 -452
  73. pixeltable/datatransfer/remote.py +0 -85
  74. pixeltable/functions/pil/image.py +0 -147
  75. pixeltable-0.2.8.dist-info/RECORD +0 -124
  76. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/LICENSE +0 -0
  77. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/WHEEL +0 -0
@@ -1,16 +1,19 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple, Set, Iterable
3
- from dataclasses import dataclass
4
- import time
2
+
5
3
  import sys
4
+ import time
5
+ from dataclasses import dataclass
6
+ from typing import Optional, List, Any, Dict, Tuple, Set
7
+
8
+ import sqlalchemy as sql
6
9
 
10
+ import pixeltable.catalog as catalog
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.func as func
13
+ import pixeltable.utils as utils
14
+ from .data_row import DataRow
7
15
  from .expr import Expr
8
16
  from .expr_set import ExprSet
9
- from .data_row import DataRow
10
- import pixeltable.utils as utils
11
- import pixeltable.func as func
12
- import pixeltable.exceptions as excs
13
- import pixeltable.catalog as catalog
14
17
 
15
18
 
16
19
  class ExecProfile:
@@ -74,10 +77,10 @@ class RowBuilder:
74
77
  # - explicitly requested output_exprs
75
78
  # - values for computed columns
76
79
  resolve_cols = set(columns)
77
- self.output_exprs = [
80
+ self.output_exprs = ExprSet([
78
81
  self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
79
82
  for e in output_exprs
80
- ]
83
+ ])
81
84
 
82
85
  # record columns for create_table_row()
83
86
  from .column_ref import ColumnRef
@@ -88,16 +91,15 @@ class RowBuilder:
88
91
  # create a copy here so we don't reuse execution state and resolve references to computed columns
89
92
  expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
90
93
  expr = self._record_unique_expr(expr, recursive=True)
91
- self.add_table_column(col, expr.slot_idx)
92
- self.output_exprs.append(expr)
93
94
  else:
94
95
  # record a ColumnRef so that references to this column resolve to the same slot idx
95
- ref = ColumnRef(col)
96
- ref = self._record_unique_expr(ref, recursive=False)
97
- self.add_table_column(col, ref.slot_idx)
96
+ expr = ColumnRef(col)
97
+ expr = self._record_unique_expr(expr, recursive=False)
98
+ self.add_table_column(col, expr.slot_idx)
99
+ self.output_exprs.append(expr)
98
100
 
99
101
  # default eval ctx: all output exprs
100
- self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
102
+ self.default_eval_ctx = self.create_eval_ctx(list(self.output_exprs), exclude=unique_input_exprs)
101
103
 
102
104
  # references to unstored iterator columns:
103
105
  # - those ColumnRefs need to instantiate iterators
@@ -107,9 +109,11 @@ class RowBuilder:
107
109
  # - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
108
110
  # iterated object changes
109
111
  col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
112
+
110
113
  def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
111
114
  tbl = col_ref.col.tbl
112
115
  return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
116
+
113
117
  unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
114
118
  component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
115
119
  unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
@@ -154,13 +158,19 @@ class RowBuilder:
154
158
  """Return ColumnSlotIdx for output columns"""
155
159
  return self.table_columns
156
160
 
161
+ def set_conn(self, conn: sql.engine.Connection) -> None:
162
+ from .function_call import FunctionCall
163
+ for expr in self.unique_exprs:
164
+ if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
165
+ expr.fn.set_conn(conn)
166
+
157
167
  @property
158
168
  def num_materialized(self) -> int:
159
169
  return self.next_slot_idx
160
170
 
161
- def get_output_exprs(self) -> List[Expr]:
171
+ def get_output_exprs(self) -> list[Expr]:
162
172
  """Returns exprs that were requested in the c'tor and require evaluation"""
163
- return self.output_exprs
173
+ return list(self.output_exprs)
164
174
 
165
175
  def _next_slot_idx(self) -> int:
166
176
  result = self.next_slot_idx
@@ -252,7 +262,7 @@ class RowBuilder:
252
262
  result_ids.sort()
253
263
  return [self.unique_exprs[id] for id in result_ids]
254
264
 
255
- def create_eval_ctx(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> EvalCtx:
265
+ def create_eval_ctx(self, targets: list[Expr], exclude: Optional[list[Expr]] = None) -> EvalCtx:
256
266
  """Return EvalCtx for targets"""
257
267
  if exclude is None:
258
268
  exclude = []
@@ -326,4 +336,3 @@ class RowBuilder:
326
336
  table_row[col.errormsg_store_name()] = None
327
337
 
328
338
  return table_row, num_excs
329
-
@@ -18,7 +18,6 @@ class SimilarityExpr(Expr):
18
18
  super().__init__(ts.FloatType())
19
19
  self.components = [col_ref, item]
20
20
  self.id = self._create_id()
21
- assert isinstance(item, Literal)
22
21
  assert item.col_type.is_string_type() or item.col_type.is_image_type()
23
22
 
24
23
  # determine index to use
@@ -47,12 +46,14 @@ class SimilarityExpr(Expr):
47
46
  return f'{self.components[0]}.similarity({self.components[1]})'
48
47
 
49
48
  def sql_expr(self) -> Optional[sql.ClauseElement]:
50
- assert isinstance(self.components[1], Literal)
49
+ if not isinstance(self.components[1], Literal):
50
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
51
51
  item = self.components[1].val
52
52
  return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
53
53
 
54
54
  def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
55
- assert isinstance(self.components[1], Literal)
55
+ if not isinstance(self.components[1], Literal):
56
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
56
57
  item = self.components[1].val
57
58
  return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
58
59
 
@@ -14,7 +14,7 @@ from yolox.utils import postprocess
14
14
  import pixeltable as pxt
15
15
  from pixeltable import env
16
16
  from pixeltable.func import Batch
17
- from pixeltable.functions.util import resolve_torch_device
17
+ from pixeltable.functions.util import normalize_image_mode
18
18
 
19
19
  _logger = logging.getLogger('pixeltable')
20
20
 
@@ -74,6 +74,7 @@ def yolo_to_coco(detections: dict) -> list:
74
74
 
75
75
  def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
76
76
  for image in images:
77
+ image = normalize_image_mode(image)
77
78
  image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
78
79
  yield torch.from_numpy(image_transform)
79
80
 
@@ -3,5 +3,6 @@ from .callable_function import CallableFunction
3
3
  from .expr_template_function import ExprTemplateFunction
4
4
  from .function import Function
5
5
  from .function_registry import FunctionRegistry
6
+ from .query_template_function import QueryTemplateFunction
6
7
  from .signature import Signature, Parameter, Batch
7
8
  from .udf import udf, make_function, expr_udf
@@ -43,27 +43,29 @@ class AggregateFunction(Function):
43
43
 
44
44
  # our signature is the signature of 'update', but without self,
45
45
  # plus the parameters of 'init' as keyword-only parameters
46
- update_params = list(inspect.signature(self.agg_cls.update).parameters.values())[1:] # leave out self
47
- assert len(update_params) == len(update_types)
46
+ py_update_params = list(inspect.signature(self.agg_cls.update).parameters.values())[1:] # leave out self
47
+ assert len(py_update_params) == len(update_types)
48
+ update_params = [
49
+ Parameter(p.name, col_type=update_types[i], kind=p.kind, default=p.default)
50
+ for i, p in enumerate(py_update_params)
51
+ ]
52
+ # starting at 1: leave out self
53
+ py_init_params = list(inspect.signature(self.agg_cls.__init__).parameters.values())[1:]
54
+ assert len(py_init_params) == len(init_types)
48
55
  init_params = [
49
- inspect.Parameter(p.name, inspect.Parameter.KEYWORD_ONLY, default=p.default)
50
- # starting at 1: leave out self
51
- for p in itertools.islice(inspect.signature(self.agg_cls.__init__).parameters.values(), 1, None)
56
+ Parameter(p.name, col_type=init_types[i], kind=inspect.Parameter.KEYWORD_ONLY, default=p.default)
57
+ for i, p in enumerate(py_init_params)
52
58
  ]
53
- assert len(init_params) == len(init_types)
54
59
  duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
55
60
  if len(duplicate_params) > 0:
56
61
  raise excs.Error(
57
62
  f'__init__() and update() cannot have parameters with the same name: '
58
63
  f'{", ".join(duplicate_params)}'
59
64
  )
60
- py_params = update_params + init_params # init_params are keyword-only and come last
61
- py_signature = inspect.Signature(py_params)
65
+ params = update_params + init_params # init_params are keyword-only and come last
62
66
 
63
- params = [Parameter(p.name, update_types[i], p.kind, is_batched=False) for i, p in enumerate(update_params)]
64
- params.extend([Parameter(p.name, init_types[i], p.kind, is_batched=False) for i, p in enumerate(init_params)])
65
67
  signature = Signature(value_type, params)
66
- super().__init__(signature, py_signature=py_signature, self_path=self_path)
68
+ super().__init__(signature, self_path=self_path)
67
69
  self.init_param_names = [p.name for p in init_params]
68
70
 
69
71
  # make sure the signature doesn't contain reserved parameter names;
@@ -115,7 +117,7 @@ class AggregateFunction(Function):
115
117
  f'{self.display_name}(): group_by invalid with an aggregate function that does not allow windows')
116
118
  group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
117
119
 
118
- bound_args = self.py_signature.bind(*args, **kwargs)
120
+ bound_args = self.signature.py_signature.bind(*args, **kwargs)
119
121
  self.validate_call(bound_args.arguments)
120
122
  return exprs.FunctionCall(
121
123
  self, bound_args.arguments,
@@ -25,8 +25,7 @@ class CallableFunction(Function):
25
25
  self.py_fn = py_fn
26
26
  self.self_name = self_name
27
27
  self.batch_size = batch_size
28
- py_signature = inspect.signature(self.py_fn)
29
- super().__init__(signature, py_signature, self_path=self_path)
28
+ super().__init__(signature, self_path=self_path)
30
29
 
31
30
  @property
32
31
  def is_batched(self) -> bool:
@@ -91,16 +90,19 @@ class CallableFunction(Function):
91
90
  return super()._from_dict(d)
92
91
 
93
92
  def to_store(self) -> tuple[dict, bytes]:
94
- md = self.signature.as_dict()
95
- if self.batch_size is not None:
96
- md['batch_size'] = self.batch_size
93
+ md = {
94
+ 'signature': self.signature.as_dict(),
95
+ 'batch_size': self.batch_size,
96
+ }
97
97
  return md, cloudpickle.dumps(self.py_fn)
98
98
 
99
99
  @classmethod
100
100
  def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
101
101
  py_fn = cloudpickle.loads(binary_obj)
102
102
  assert isinstance(py_fn, Callable)
103
- return CallableFunction(Signature.from_dict(md), py_fn, self_name=name, batch_size=md.get('batch_size'))
103
+ sig = Signature.from_dict(md['signature'])
104
+ batch_size = md['batch_size']
105
+ return CallableFunction(sig, py_fn, self_name=name, batch_size=batch_size)
104
106
 
105
107
  def validate_call(self, bound_args: dict[str, Any]) -> None:
106
108
  import pixeltable.exprs as exprs
@@ -11,7 +11,7 @@ class ExprTemplateFunction(Function):
11
11
  """A parameterized expression from which an executable Expr is created with a function call."""
12
12
 
13
13
  def __init__(
14
- self, expr: 'pixeltable.exprs.Expr', py_signature: inspect.Signature, self_path: Optional[str] = None,
14
+ self, expr: 'pixeltable.exprs.Expr', signature: Signature, self_path: Optional[str] = None,
15
15
  name: Optional[str] = None):
16
16
  import pixeltable.exprs as exprs
17
17
  self.expr = expr
@@ -23,28 +23,21 @@ class ExprTemplateFunction(Function):
23
23
 
24
24
  # verify default values
25
25
  self.defaults: Dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
26
- for py_param in py_signature.parameters.values():
27
- if py_param.default is inspect.Parameter.empty:
26
+ for param in signature.parameters.values():
27
+ if param.default is inspect.Parameter.empty:
28
28
  continue
29
- param_expr = self.param_exprs_by_name[py_param.name]
29
+ param_expr = self.param_exprs_by_name[param.name]
30
30
  try:
31
- literal_default = exprs.Literal(py_param.default, col_type=param_expr.col_type)
32
- self.defaults[py_param.name] = literal_default
31
+ literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
32
+ self.defaults[param.name] = literal_default
33
33
  except TypeError as e:
34
34
  msg = str(e)
35
- raise excs.Error(f"Default value for parameter '{py_param.name}': {msg[0].lower() + msg[1:]}")
36
- # construct signature
37
- assert len(self.param_exprs) == len(py_signature.parameters)
38
- fn_params = [
39
- Parameter(p.name, self.param_exprs_by_name[p.name].col_type, p.kind)
40
- for p in py_signature.parameters.values()
41
- ]
42
- signature = Signature(return_type=expr.col_type, parameters=fn_params)
35
+ raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
43
36
 
44
- super().__init__(signature, py_signature=py_signature, self_path=self_path)
37
+ super().__init__(signature, self_path=self_path)
45
38
 
46
39
  def instantiate(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
47
- bound_args = self.py_signature.bind(*args, **kwargs).arguments
40
+ bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
48
41
  # apply defaults, otherwise we might have Parameters left over
49
42
  bound_args.update(
50
43
  {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
@@ -88,14 +81,15 @@ class ExprTemplateFunction(Function):
88
81
  if self.self_path is not None:
89
82
  return super()._as_dict()
90
83
  return {
91
- 'name': self.name,
92
84
  'expr': self.expr.as_dict(),
93
- **super()._as_dict()
85
+ 'signature': self.signature.as_dict(),
86
+ 'name': self.name,
94
87
  }
95
88
 
96
89
  @classmethod
97
90
  def _from_dict(cls, d: Dict) -> Function:
98
91
  if 'expr' not in d:
99
92
  return super()._from_dict(d)
93
+ assert 'signature' in d and 'name' in d
100
94
  import pixeltable.exprs as exprs
101
- return cls(exprs.Expr.from_dict(d['expr']), name=d['name'])
95
+ return cls(exprs.Expr.from_dict(d['expr']), Signature.from_dict(d['signature']), name=d['name'])
@@ -19,11 +19,8 @@ class Function(abc.ABC):
19
19
  via the member self_path.
20
20
  """
21
21
 
22
- def __init__(
23
- self, signature: Signature, py_signature: inspect.Signature, self_path: Optional[str] = None
24
- ):
22
+ def __init__(self, signature: Signature, self_path: Optional[str] = None):
25
23
  self.signature = signature
26
- self.py_signature = py_signature
27
24
  self.self_path = self_path # fully-qualified path to self
28
25
  self._conditional_return_type: Optional[Callable[..., ts.ColumnType]] = None
29
26
 
@@ -46,7 +43,7 @@ class Function(abc.ABC):
46
43
 
47
44
  def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
48
45
  from pixeltable import exprs
49
- bound_args = self.py_signature.bind(*args, **kwargs)
46
+ bound_args = self.signature.py_signature.bind(*args, **kwargs)
50
47
  self.validate_call(bound_args.arguments)
51
48
  return exprs.FunctionCall(self, bound_args.arguments)
52
49
 
@@ -58,7 +55,7 @@ class Function(abc.ABC):
58
55
  """Return the type of the value returned by calling this function with the given arguments"""
59
56
  if self._conditional_return_type is None:
60
57
  return self.signature.return_type
61
- bound_args = self.py_signature.bind(**kwargs)
58
+ bound_args = self.signature.py_signature.bind(**kwargs)
62
59
  kw_args: dict[str, Any] = {}
63
60
  sig = inspect.signature(self._conditional_return_type)
64
61
  for param in sig.parameters.values():
@@ -0,0 +1,84 @@
1
+ from __future__ import annotations
2
+ import inspect
3
+ from typing import Dict, Optional, Any, Callable
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.type_system as ts
10
+ from .function import Function
11
+ from .signature import Signature, Parameter
12
+
13
+
14
+ class QueryTemplateFunction(Function):
15
+ """A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
16
+
17
+ @classmethod
18
+ def create(
19
+ cls, template_callable: Callable, param_types: Optional[list[ts.ColumnType]], path: str, name: str
20
+ ) -> QueryTemplateFunction:
21
+ # we need to construct a template df and a signature
22
+ py_sig = inspect.signature(template_callable)
23
+ py_params = list(py_sig.parameters.values())
24
+ params = Signature.create_parameters(py_params=py_params, param_types=param_types)
25
+ # invoke template_callable with parameter expressions to construct a DataFrame with parameters
26
+ import pixeltable.exprs as exprs
27
+ var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
28
+ template_df = template_callable(*var_exprs)
29
+ from pixeltable import DataFrame
30
+ assert isinstance(template_df, DataFrame)
31
+ # we take params and return json
32
+ sig = Signature(return_type=ts.JsonType(), parameters=params)
33
+ return QueryTemplateFunction(template_df, sig, path=path, name=name)
34
+
35
+ def __init__(
36
+ self, template_df: Optional['pixeltable.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
37
+ name: Optional[str] = None,
38
+ ):
39
+ super().__init__(sig, self_path=path)
40
+ self.self_name = name
41
+ self.template_df = template_df
42
+
43
+ # if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
44
+ # we end up with a deadlock
45
+ # TODO: figure out a more general way to make execution state available
46
+ self.conn: Optional[sql.engine.Connection] = None
47
+
48
+ # convert defaults to Literals
49
+ import pixeltable.exprs as exprs
50
+ self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
51
+ param_types = self.template_df.parameters()
52
+ for param in [p for p in self.signature.parameters.values() if p.has_default()]:
53
+ assert param.name in param_types
54
+ param_type = param_types[param.name]
55
+ literal_default = exprs.Literal(param.default, col_type=param_type)
56
+ self.defaults[param.name] = literal_default
57
+
58
+ def set_conn(self, conn: Optional[sql.engine.Connection]) -> None:
59
+ self.conn = conn
60
+
61
+ def exec(self, *args: Any, **kwargs: Any) -> Any:
62
+ bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
63
+ # apply defaults, otherwise we might have Parameters left over
64
+ bound_args.update(
65
+ {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
66
+ bound_df = self.template_df.bind(bound_args)
67
+ result = bound_df._collect(self.conn)
68
+ return list(result)
69
+
70
+ @property
71
+ def display_name(self) -> str:
72
+ return self.self_name
73
+
74
+ @property
75
+ def name(self) -> str:
76
+ return self.self_name
77
+
78
+ def _as_dict(self) -> Dict:
79
+ return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
80
+
81
+ @classmethod
82
+ def _from_dict(cls, d: Dict) -> Function:
83
+ from pixeltable.dataframe import DataFrame
84
+ return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import dataclasses
4
5
  import enum
5
6
  import inspect
@@ -18,8 +19,56 @@ class Parameter:
18
19
  name: str
19
20
  col_type: Optional[ts.ColumnType] # None for variable parameters
20
21
  kind: enum.Enum # inspect.Parameter.kind; inspect._ParameterKind is private
22
+ # for some reason, this needs to precede is_batched in the dataclass definition,
23
+ # otherwise Python complains that an argument with a default is followed by an argument without a default
24
+ default: Any = inspect.Parameter.empty # default value for the parameter
21
25
  is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
22
26
 
27
+ def __post_init__(self) -> None:
28
+ # make sure that default is json-serializable and of the correct type
29
+ if self.default is inspect.Parameter.empty or self.default is None:
30
+ return
31
+ try:
32
+ _ = json.dumps(self.default)
33
+ except TypeError:
34
+ raise excs.Error(f'Default value for parameter {self.name} is not JSON-serializable: {str(self.default)}')
35
+ if self.col_type is not None:
36
+ try:
37
+ self.col_type.validate_literal(self.default)
38
+ except TypeError as e:
39
+ raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
40
+
41
+ def has_default(self) -> bool:
42
+ return self.default is not inspect.Parameter.empty
43
+
44
+ def as_dict(self) -> dict[str, Any]:
45
+ return {
46
+ 'name': self.name,
47
+ 'col_type': self.col_type.as_dict() if self.col_type is not None else None,
48
+ 'kind': self.kind.name,
49
+ 'is_batched': self.is_batched,
50
+ 'has_default': self.has_default(),
51
+ 'default': self.default if self.has_default() else None,
52
+ }
53
+
54
+ @classmethod
55
+ def from_dict(cls, d: dict[str, Any]) -> Parameter:
56
+ has_default = d['has_default']
57
+ if has_default:
58
+ default = d['default']
59
+ else:
60
+ default = inspect.Parameter.empty
61
+ return cls(
62
+ name=d['name'],
63
+ col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
64
+ kind=getattr(inspect.Parameter, d['kind']),
65
+ is_batched=d['is_batched'],
66
+ default=default
67
+ )
68
+
69
+ def to_py_param(self) -> inspect.Parameter:
70
+ return inspect.Parameter(self.name, self.kind, default=self.default)
71
+
23
72
 
24
73
  T = typing.TypeVar('T')
25
74
  Batch = typing.Annotated[list[T], 'pxt-batch']
@@ -42,6 +91,7 @@ class Signature:
42
91
  self.parameters_by_pos = parameters.copy()
43
92
  self.constant_parameters = [p for p in parameters if not p.is_batched]
44
93
  self.batched_parameters = [p for p in parameters if p.is_batched]
94
+ self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
45
95
 
46
96
  def get_return_type(self) -> ts.ColumnType:
47
97
  assert isinstance(self.return_type, ts.ColumnType)
@@ -50,17 +100,15 @@ class Signature:
50
100
  def as_dict(self) -> Dict[str, Any]:
51
101
  result = {
52
102
  'return_type': self.get_return_type().as_dict(),
53
- 'parameters': [
54
- [p.name, p.col_type.as_dict() if p.col_type is not None else None, p.kind, p.is_batched]
55
- for p in self.parameters.values()
56
- ]
103
+ 'parameters': [p.as_dict() for p in self.parameters.values()],
104
+ 'is_batched': self.is_batched,
57
105
  }
58
106
  return result
59
107
 
60
108
  @classmethod
61
109
  def from_dict(cls, d: Dict[str, Any]) -> Signature:
62
- parameters = [Parameter(p[0], ts.ColumnType.from_dict(p[1]), p[2], p[3]) for p in d['parameters']]
63
- return cls(ts.ColumnType.from_dict(d['return_type']), parameters)
110
+ parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
111
+ return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
64
112
 
65
113
  def __eq__(self, other: Signature) -> bool:
66
114
  if self.get_return_type() != other.get_return_type():
@@ -105,16 +153,20 @@ class Signature:
105
153
 
106
154
  @classmethod
107
155
  def create_parameters(
108
- cls, c: Callable, param_types: Optional[List[ts.ColumnType]] = None) -> List[Parameter]:
109
- sig = inspect.signature(c)
110
- py_parameters = list(sig.parameters.values())
156
+ cls, py_fn: Optional[Callable] = None, py_params: Optional[list[inspect.Parameter]] = None,
157
+ param_types: Optional[List[ts.ColumnType]] = None
158
+ ) -> List[Parameter]:
159
+ assert (py_fn is None) != (py_params is None)
160
+ if py_fn is not None:
161
+ sig = inspect.signature(py_fn)
162
+ py_params = list(sig.parameters.values())
111
163
  parameters: List[Parameter] = []
112
164
 
113
- for idx, param in enumerate(py_parameters):
165
+ for idx, param in enumerate(py_params):
114
166
  if param.name in cls.SPECIAL_PARAM_NAMES:
115
167
  raise excs.Error(f"'{param.name}' is a reserved parameter name")
116
168
  if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
117
- parameters.append(Parameter(param.name, None, param.kind, False))
169
+ parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
118
170
  continue
119
171
 
120
172
  # check non-var parameters for name collisions and default value compatibility
@@ -128,21 +180,14 @@ class Signature:
128
180
  if param_type is None:
129
181
  raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
130
182
 
131
- # check default value compatibility
132
- default_val = sig.parameters[param.name].default
133
- if default_val != inspect.Parameter.empty and default_val is not None:
134
- try:
135
- _ = param_type.create_literal(default_val)
136
- except TypeError as e:
137
- raise excs.Error(f'Default value for parameter {param.name}: {str(e)}')
138
-
139
- parameters.append(Parameter(param.name, param_type, param.kind, is_batched))
183
+ parameters.append(Parameter(
184
+ param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default))
140
185
 
141
186
  return parameters
142
187
 
143
188
  @classmethod
144
189
  def create(
145
- cls, c: Callable,
190
+ cls, py_fn: Callable,
146
191
  param_types: Optional[List[ts.ColumnType]] = None,
147
192
  return_type: Optional[Union[ts.ColumnType, Callable]] = None
148
193
  ) -> Signature:
@@ -150,8 +195,8 @@ class Signature:
150
195
  Infer the parameter and return types, if none are specified.
151
196
  Raises an exception if the types cannot be inferred.
152
197
  """
153
- parameters = cls.create_parameters(c, param_types)
154
- sig = inspect.signature(c)
198
+ parameters = cls.create_parameters(py_fn=py_fn, param_types=param_types)
199
+ sig = inspect.signature(py_fn)
155
200
  if return_type is None:
156
201
  return_type, return_is_batched = cls._infer_type(sig.return_annotation)
157
202
  if return_type is None:
pixeltable/func/udf.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import inspect
4
3
  from typing import List, Callable, Optional, overload, Any
5
4
 
6
5
  import pixeltable as pxt
@@ -56,8 +55,12 @@ def udf(*args, **kwargs):
56
55
  return_type = kwargs.pop('return_type', None)
57
56
  param_types = kwargs.pop('param_types', None)
58
57
  batch_size = kwargs.pop('batch_size', None)
59
- substitute_fn = kwargs.pop('py_fn', None)
58
+ substitute_fn = kwargs.pop('substitute_fn', None)
60
59
  force_stored = kwargs.pop('_force_stored', False)
60
+ if len(kwargs) > 0:
61
+ raise excs.Error(f'Invalid @udf decorator kwargs: {", ".join(kwargs.keys())}')
62
+ if len(args) > 0:
63
+ raise excs.Error('Unexpected @udf decorator arguments.')
61
64
 
62
65
  def decorator(decorated_fn: Callable):
63
66
  return make_function(
@@ -134,7 +137,7 @@ def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
134
137
  def expr_udf(*, param_types: Optional[List[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
135
138
 
136
139
  def expr_udf(*args: Any, **kwargs: Any) -> Any:
137
- def decorator(py_fn: Callable, param_types: Optional[List[ts.ColumnType]]) -> ExprTemplateFunction:
140
+ def make_expr_template(py_fn: Callable, param_types: Optional[List[ts.ColumnType]]) -> ExprTemplateFunction:
138
141
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
139
142
  # this is a named function in a module
140
143
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
@@ -144,21 +147,21 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
144
147
  # TODO: verify that the inferred return type matches that of the template
145
148
  # TODO: verify that the signature doesn't contain batched parameters
146
149
 
147
- # construct Parameters from the function signature
148
- params = Signature.create_parameters(py_fn, param_types=param_types)
150
+ # construct Signature from the function signature
151
+ sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
149
152
  import pixeltable.exprs as exprs
150
- var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
153
+ var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
151
154
  # call the function with the parameter expressions to construct an Expr with parameters
152
155
  template = py_fn(*var_exprs)
153
156
  assert isinstance(template, exprs.Expr)
154
- py_sig = inspect.signature(py_fn)
157
+ sig.return_type = template.col_type
155
158
  if function_path is not None:
156
159
  validate_symbol_path(function_path)
157
- return ExprTemplateFunction(template, py_signature=py_sig, self_path=function_path, name=py_fn.__name__)
160
+ return ExprTemplateFunction(template, sig, self_path=function_path, name=py_fn.__name__)
158
161
 
159
162
  if len(args) == 1:
160
163
  assert len(kwargs) == 0 and callable(args[0])
161
- return decorator(args[0], None)
164
+ return make_expr_template(args[0], None)
162
165
  else:
163
166
  assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
164
- return lambda py_fn: decorator(py_fn, kwargs['param_types'])
167
+ return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])