pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,7 @@
1
+ from .aggregate_function import Aggregator, AggregateFunction, uda
2
+ from .callable_function import CallableFunction
3
+ from .expr_template_function import ExprTemplateFunction
4
+ from .function import Function
5
+ from .function_registry import FunctionRegistry
6
+ from .signature import Signature, Parameter, Batch
7
+ from .udf import udf, make_function, expr_udf
@@ -0,0 +1,197 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import importlib
5
+ import inspect
6
+ from typing import Optional, Any, Type, List, Dict, Callable
7
+ import itertools
8
+
9
+ import pixeltable.exceptions as excs
10
+ import pixeltable.type_system as ts
11
+ from .function import Function
12
+ from .signature import Signature, Parameter
13
+ from .globals import validate_symbol_path
14
+
15
+
16
+ class Aggregator(abc.ABC):
17
+ def update(self, *args: Any, **kwargs: Any) -> None:
18
+ pass
19
+ def value(self) -> Any:
20
+ pass
21
+
22
+
23
+ class AggregateFunction(Function):
24
+ """Function interface for an aggregation operation.
25
+
26
+ requires_order_by: if True, the first parameter to an aggregate function defines the order in which the function
27
+ sees rows in update()
28
+ allows_std_agg: if True, the aggregate function can be used as a standard aggregate function w/o a window
29
+ allows_window: if True, the aggregate function can be used with a window
30
+ """
31
+ ORDER_BY_PARAM = 'order_by'
32
+ GROUP_BY_PARAM = 'group_by'
33
+ RESERVED_PARAMS = {ORDER_BY_PARAM, GROUP_BY_PARAM}
34
+
35
+ def __init__(
36
+ self, aggregator_class: Type[Aggregator], self_path: str,
37
+ init_types: List[ts.ColumnType], update_types: List[ts.ColumnType], value_type: ts.ColumnType,
38
+ requires_order_by: bool, allows_std_agg: bool, allows_window: bool):
39
+ self.agg_cls = aggregator_class
40
+ self.requires_order_by = requires_order_by
41
+ self.allows_std_agg = allows_std_agg
42
+ self.allows_window = allows_window
43
+
44
+ # our signature is the signature of 'update', but without self,
45
+ # plus the parameters of 'init' as keyword-only parameters
46
+ update_params = list(inspect.signature(self.agg_cls.update).parameters.values())[1:] # leave out self
47
+ assert len(update_params) == len(update_types)
48
+ init_params = [
49
+ inspect.Parameter(p.name, inspect.Parameter.KEYWORD_ONLY, default=p.default)
50
+ # starting at 1: leave out self
51
+ for p in itertools.islice(inspect.signature(self.agg_cls.__init__).parameters.values(), 1, None)
52
+ ]
53
+ assert len(init_params) == len(init_types)
54
+ duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
55
+ if len(duplicate_params) > 0:
56
+ raise excs.Error(
57
+ f'__init__() and update() cannot have parameters with the same name: '
58
+ f'{", ".join(duplicate_params)}'
59
+ )
60
+ py_params = update_params + init_params # init_params are keyword-only and come last
61
+ py_signature = inspect.Signature(py_params)
62
+
63
+ params = [Parameter(p.name, update_types[i], p.kind, is_batched=False) for i, p in enumerate(update_params)]
64
+ params.extend([Parameter(p.name, init_types[i], p.kind, is_batched=False) for i, p in enumerate(init_params)])
65
+ signature = Signature(value_type, params)
66
+ super().__init__(signature, py_signature=py_signature, self_path=self_path)
67
+ self.init_param_names = [p.name for p in init_params]
68
+
69
+ # make sure the signature doesn't contain reserved parameter names;
70
+ # do this after super().__init__(), otherwise self.name is invalid
71
+ for param in signature.parameters:
72
+ if param.lower() in self.RESERVED_PARAMS:
73
+ raise excs.Error(f'{self.name}(): parameter name {param} is reserved')
74
+
75
+ def exec(self, *args: Any, **kwargs: Any) -> Any:
76
+ raise NotImplementedError
77
+
78
+ def help_str(self) -> str:
79
+ res = super().help_str()
80
+ res += '\n\n' + inspect.getdoc(self.agg_cls.update)
81
+ return res
82
+
83
+ def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
84
+ from pixeltable import exprs
85
+
86
+ # perform semantic analysis of special parameters 'order_by' and 'group_by'
87
+ order_by_clause: Optional[Any] = None
88
+ if self.ORDER_BY_PARAM in kwargs:
89
+ if self.requires_order_by:
90
+ raise excs.Error(
91
+ f'{self.display_name}(): order_by invalid, this function requires the first argument to be the '
92
+ f'ordering expression'
93
+ )
94
+ if not self.allows_window:
95
+ raise excs.Error(
96
+ f'{self.display_name}(): order_by invalid with an aggregate function that does not allow windows')
97
+ order_by_clause = kwargs.pop(self.ORDER_BY_PARAM)
98
+ elif self.requires_order_by:
99
+ # the first argument is the order-by expr
100
+ if len(args) == 0:
101
+ raise excs.Error(f'{self.display_name}(): requires an ordering expression as its first argument')
102
+ order_by_clause = args[0]
103
+ if not isinstance(order_by_clause, exprs.Expr):
104
+ raise excs.Error(
105
+ f'{self.display_name}(): the first argument needs to be a Pixeltable expression, but instead is a '
106
+ f'{type(order_by_clause)}'
107
+ )
108
+ # don't pass the first parameter on, the Function doesn't get to see it
109
+ args = args[1:]
110
+
111
+ group_by_clause: Optional[Any] = None
112
+ if self.GROUP_BY_PARAM in kwargs:
113
+ if not self.allows_window:
114
+ raise excs.Error(
115
+ f'{self.display_name}(): group_by invalid with an aggregate function that does not allow windows')
116
+ group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
117
+
118
+ bound_args = self.py_signature.bind(*args, **kwargs)
119
+ self.validate_call(bound_args.arguments)
120
+ return exprs.FunctionCall(
121
+ self, bound_args.arguments,
122
+ order_by_clause=[order_by_clause] if order_by_clause is not None else [],
123
+ group_by_clause=[group_by_clause] if group_by_clause is not None else [])
124
+
125
+ def validate_call(self, bound_args: Dict[str, Any]) -> None:
126
+ # check that init parameters are not Exprs
127
+ # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
128
+ import pixeltable.exprs as exprs
129
+ for param_name in self.init_param_names:
130
+ if param_name in bound_args and isinstance(bound_args[param_name], exprs.Expr):
131
+ raise excs.Error(
132
+ f'{self.display_name}(): init() parameter {param_name} needs to be a constant, not a Pixeltable '
133
+ f'expression'
134
+ )
135
+
136
+
137
+ def uda(
138
+ *,
139
+ value_type: ts.ColumnType,
140
+ update_types: List[ts.ColumnType],
141
+ init_types: Optional[List[ts.ColumnType]] = None,
142
+ requires_order_by: bool = False, allows_std_agg: bool = True, allows_window: bool = False,
143
+ ) -> Callable:
144
+ """Decorator for user-defined aggregate functions.
145
+
146
+ The decorated class must inherit from Aggregator and implement the following methods:
147
+ - __init__(self, ...) to initialize the aggregator
148
+ - update(self, ...) to update the aggregator with a new value
149
+ - value(self) to return the final result
150
+
151
+ The decorator creates an AggregateFunction instance from the class and adds it
152
+ to the module where the class is defined.
153
+
154
+ Parameters:
155
+ - init_types: list of types for the __init__() parameters; must match the number of parameters
156
+ - update_types: list of types for the update() parameters; must match the number of parameters
157
+ - value_type: return type of the aggregator
158
+ - requires_order_by: if True, the first parameter to the function is the order-by expression
159
+ - allows_std_agg: if True, the function can be used as a standard aggregate function w/o a window
160
+ - allows_window: if True, the function can be used with a window
161
+ """
162
+ if init_types is None:
163
+ init_types = []
164
+
165
+ def decorator(cls: Type[Aggregator]) -> Type[Function]:
166
+ # validate type parameters
167
+ num_init_params = len(inspect.signature(cls.__init__).parameters) - 1
168
+ if num_init_params > 0:
169
+ if len(init_types) != num_init_params:
170
+ raise excs.Error(
171
+ f'init_types must be a list of {num_init_params} types, one for each parameter of __init__()')
172
+ num_update_params = len(inspect.signature(cls.update).parameters) - 1
173
+ if num_update_params == 0:
174
+ raise excs.Error('update() must have at least one parameter')
175
+ if len(update_types) != num_update_params:
176
+ raise excs.Error(
177
+ f'update_types must be a list of {num_update_params} types, one for each parameter of update()')
178
+ assert value_type is not None
179
+
180
+ # the AggregateFunction instance resides in the same module as cls
181
+ class_path = f'{cls.__module__}.{cls.__qualname__}'
182
+ # nonlocal name
183
+ # name = name or cls.__name__
184
+ # instance_path_elements = class_path.split('.')[:-1] + [name]
185
+ # instance_path = '.'.join(instance_path_elements)
186
+
187
+ # create the corresponding AggregateFunction instance
188
+ instance = AggregateFunction(
189
+ cls, class_path, init_types, update_types, value_type, requires_order_by, allows_std_agg, allows_window)
190
+ # do the path validation at the very end, in order to be able to write tests for the other failure cases
191
+ validate_symbol_path(class_path)
192
+ #module = importlib.import_module(cls.__module__)
193
+ #setattr(module, name, instance)
194
+
195
+ return instance
196
+
197
+ return decorator
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from typing import Optional, Callable, Tuple, Any
5
+ from uuid import UUID
6
+
7
+ import cloudpickle
8
+
9
+ from .function import Function
10
+ from .signature import Signature
11
+
12
+
13
+ class CallableFunction(Function):
14
+ """Pixeltable Function backed by a Python Callable.
15
+
16
+ CallableFunctions come in two flavors:
17
+ - references to lambdas and functions defined in notebooks, which are pickled and serialized to the store
18
+ - functions that are defined in modules are serialized via the default mechanism
19
+ """
20
+
21
+ def __init__(
22
+ self, signature: Signature, py_fn: Callable, self_path: Optional[str] = None,
23
+ self_name: Optional[str] = None, batch_size: Optional[int] = None):
24
+ assert py_fn is not None
25
+ self.py_fn = py_fn
26
+ self.self_name = self_name
27
+ self.batch_size = batch_size
28
+ py_signature = inspect.signature(self.py_fn)
29
+ super().__init__(signature, py_signature, self_path=self_path)
30
+
31
+ @property
32
+ def is_batched(self) -> bool:
33
+ return self.batch_size is not None
34
+
35
+ def exec(self, *args: Any, **kwargs: Any) -> Any:
36
+ if self.is_batched:
37
+ # Pack the batched parameters into singleton lists
38
+ constant_param_names = [p.name for p in self.signature.constant_parameters]
39
+ batched_args = [[arg] for arg in args]
40
+ constant_kwargs = {k: v for k, v in kwargs.items() if k in constant_param_names}
41
+ batched_kwargs = {k: [v] for k, v in kwargs.items() if k not in constant_param_names}
42
+ result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
43
+ assert len(result) == 1
44
+ return result[0]
45
+ else:
46
+ return self.py_fn(*args, **kwargs)
47
+
48
+ def exec_batch(self, *args: Any, **kwargs: Any) -> list:
49
+ """Execute the function with the given arguments and return the result.
50
+ The arguments are expected to be batched: if the corresponding parameter has type T,
51
+ then the argument should have type T if it's a constant parameter, or list[T] if it's
52
+ a batched parameter.
53
+ """
54
+ assert self.is_batched
55
+ # Unpack the constant parameters
56
+ constant_param_names = [p.name for p in self.signature.constant_parameters]
57
+ constant_kwargs = {k: v[0] for k, v in kwargs.items() if k in constant_param_names}
58
+ batched_kwargs = {k: v for k, v in kwargs.items() if k not in constant_param_names}
59
+ return self.py_fn(*args, **constant_kwargs, **batched_kwargs)
60
+
61
+ # TODO(aaron-siegel): Implement conditional batch sizing
62
+ def get_batch_size(self, *args: Any, **kwargs: Any) -> Optional[int]:
63
+ return self.batch_size
64
+
65
+ @property
66
+ def display_name(self) -> str:
67
+ return self.self_name
68
+
69
+ @property
70
+ def name(self) -> str:
71
+ return self.self_name
72
+
73
+ def help_str(self) -> str:
74
+ res = super().help_str()
75
+ res += '\n\n' + inspect.getdoc(self.py_fn)
76
+ return res
77
+
78
+ def _as_dict(self) -> dict:
79
+ if self.self_path is None:
80
+ # this is not a module function
81
+ from .function_registry import FunctionRegistry
82
+ id = FunctionRegistry.get().create_stored_function(self)
83
+ return {'id': id.hex}
84
+ return super()._as_dict()
85
+
86
+ @classmethod
87
+ def _from_dict(cls, d: dict) -> Function:
88
+ if 'id' in d:
89
+ from .function_registry import FunctionRegistry
90
+ return FunctionRegistry.get().get_stored_function(UUID(hex=d['id']))
91
+ return super()._from_dict(d)
92
+
93
+ def to_store(self) -> tuple[dict, bytes]:
94
+ md = self.signature.as_dict()
95
+ if self.batch_size is not None:
96
+ md['batch_size'] = self.batch_size
97
+ return md, cloudpickle.dumps(self.py_fn)
98
+
99
+ @classmethod
100
+ def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
101
+ py_fn = cloudpickle.loads(binary_obj)
102
+ assert isinstance(py_fn, Callable)
103
+ return CallableFunction(Signature.from_dict(md), py_fn, self_name=name, batch_size=md.get('batch_size'))
104
+
105
+ def validate_call(self, bound_args: dict[str, Any]) -> None:
106
+ import pixeltable.exprs as exprs
107
+ if self.is_batched:
108
+ for param in self.signature.constant_parameters:
109
+ if param.name in bound_args and isinstance(bound_args[param.name], exprs.Expr):
110
+ raise ValueError(
111
+ f'{self.display_name}(): '
112
+ f'parameter {param.name} must be a constant value, not a Pixeltable expression'
113
+ )
@@ -0,0 +1,99 @@
1
+ import inspect
2
+ from typing import Dict, Optional, Any
3
+
4
+ import pixeltable
5
+ import pixeltable.exceptions as excs
6
+ from .function import Function
7
+ from .signature import Signature, Parameter
8
+
9
+
10
+ class ExprTemplateFunction(Function):
11
+ """A parameterized expression from which an executable Expr is created with a function call."""
12
+
13
+ def __init__(
14
+ self, expr: 'pixeltable.exprs.Expr', py_signature: inspect.Signature, self_path: Optional[str] = None,
15
+ name: Optional[str] = None):
16
+ import pixeltable.exprs as exprs
17
+ self.expr = expr
18
+ self.self_name = name
19
+ self.param_exprs = list(set(expr.subexprs(expr_class=exprs.Variable)))
20
+ # make sure there are no duplicate names
21
+ assert len(self.param_exprs) == len(set(p.name for p in self.param_exprs))
22
+ self.param_exprs_by_name = {p.name: p for p in self.param_exprs}
23
+
24
+ # verify default values
25
+ self.defaults: Dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
26
+ for py_param in py_signature.parameters.values():
27
+ if py_param.default is inspect.Parameter.empty:
28
+ continue
29
+ param_expr = self.param_exprs_by_name[py_param.name]
30
+ try:
31
+ literal_default = exprs.Literal(py_param.default, col_type=param_expr.col_type)
32
+ self.defaults[py_param.name] = literal_default
33
+ except TypeError as e:
34
+ msg = str(e)
35
+ raise excs.Error(f"Default value for parameter '{py_param.name}': {msg[0].lower() + msg[1:]}")
36
+ # construct signature
37
+ assert len(self.param_exprs) == len(py_signature.parameters)
38
+ fn_params = [
39
+ Parameter(p.name, self.param_exprs_by_name[p.name].col_type, p.kind)
40
+ for p in py_signature.parameters.values()
41
+ ]
42
+ signature = Signature(return_type=expr.col_type, parameters=fn_params)
43
+
44
+ super().__init__(signature, py_signature=py_signature, self_path=self_path)
45
+
46
+ def instantiate(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
47
+ bound_args = self.py_signature.bind(*args, **kwargs).arguments
48
+ # apply defaults, otherwise we might have Parameters left over
49
+ bound_args.update(
50
+ {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
51
+ result = self.expr.copy()
52
+ import pixeltable.exprs as exprs
53
+ for param_name, arg in bound_args.items():
54
+ param_expr = self.param_exprs_by_name[param_name]
55
+ if not isinstance(arg, exprs.Expr):
56
+ # TODO: use the available param_expr.col_type
57
+ arg_expr = exprs.Expr.from_object(arg)
58
+ if arg_expr is None:
59
+ raise excs.Error(f'{self.self_name}(): cannot convert argument {arg} to a Pixeltable expression')
60
+ else:
61
+ arg_expr = arg
62
+ result = result.substitute(param_expr, arg_expr)
63
+ import pixeltable.exprs as exprs
64
+ assert not result.contains(exprs.Variable)
65
+ return result
66
+
67
+ def exec(self, *args: Any, **kwargs: Any) -> Any:
68
+ expr = self.instantiate(*args, **kwargs)
69
+ import pixeltable.exprs as exprs
70
+ row_builder = exprs.RowBuilder(output_exprs=[expr], columns=[], input_exprs=[])
71
+ import pixeltable.exec as exec
72
+ row_batch = exec.DataRowBatch(tbl=None, row_builder=row_builder, len=1)
73
+ row = row_batch[0]
74
+ row_builder.eval(row, ctx=row_builder.default_eval_ctx)
75
+ return row[row_builder.get_output_exprs()[0].slot_idx]
76
+
77
+ @property
78
+ def display_name(self) -> str:
79
+ return self.self_name
80
+
81
+ @property
82
+ def name(self) -> str:
83
+ return self.self_name
84
+
85
+ def _as_dict(self) -> Dict:
86
+ if self.self_path is not None:
87
+ return super()._as_dict()
88
+ return {
89
+ 'name': self.name,
90
+ 'expr': self.expr.as_dict(),
91
+ **super()._as_dict()
92
+ }
93
+
94
+ @classmethod
95
+ def _from_dict(cls, d: Dict) -> Function:
96
+ if 'expr' not in d:
97
+ return super()._from_dict(d)
98
+ import pixeltable.exprs as exprs
99
+ return cls(exprs.Expr.from_dict(d['expr']), name=d['name'])
@@ -0,0 +1,141 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import importlib
5
+ import inspect
6
+ from typing import Optional, Dict, Any, Tuple, Callable
7
+
8
+ import pixeltable
9
+ import pixeltable.type_system as ts
10
+ from .globals import resolve_symbol
11
+ from .signature import Signature
12
+
13
+
14
+ class Function(abc.ABC):
15
+ """Base class for Pixeltable's function interface.
16
+
17
+ A function in Pixeltable is an object that has a signature and implements __call__().
18
+ This base class provides a default serialization mechanism for Function instances provided by Python modules,
19
+ via the member self_path.
20
+ """
21
+
22
+ def __init__(
23
+ self, signature: Signature, py_signature: inspect.Signature, self_path: Optional[str] = None
24
+ ):
25
+ self.signature = signature
26
+ self.py_signature = py_signature
27
+ self.self_path = self_path # fully-qualified path to self
28
+ self._conditional_return_type: Optional[Callable[..., ts.ColumnType]] = None
29
+
30
+ @property
31
+ def name(self) -> str:
32
+ assert self.self_path is not None
33
+ return self.self_path.split('.')[-1]
34
+
35
+ @property
36
+ def display_name(self) -> str:
37
+ if self.self_path is None:
38
+ return '<anonymous>'
39
+ ptf_prefix = 'pixeltable.functions.'
40
+ if self.self_path.startswith(ptf_prefix):
41
+ return self.self_path[len(ptf_prefix):]
42
+ return self.self_path
43
+
44
+ def help_str(self) -> str:
45
+ return self.display_name + str(self.signature)
46
+
47
+ def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
48
+ from pixeltable import exprs
49
+ bound_args = self.py_signature.bind(*args, **kwargs)
50
+ self.validate_call(bound_args.arguments)
51
+ return exprs.FunctionCall(self, bound_args.arguments)
52
+
53
+ def validate_call(self, bound_args: Dict[str, Any]) -> None:
54
+ """Override this to do custom validation of the arguments"""
55
+ pass
56
+
57
+ def call_return_type(self, kwargs: dict[str, Any]) -> ts.ColumnType:
58
+ """Return the type of the value returned by calling this function with the given arguments"""
59
+ if self._conditional_return_type is None:
60
+ return self.signature.return_type
61
+ bound_args = self.py_signature.bind(**kwargs)
62
+ kw_args: dict[str, Any] = {}
63
+ sig = inspect.signature(self._conditional_return_type)
64
+ for param in sig.parameters.values():
65
+ if param.name in bound_args.arguments:
66
+ kw_args[param.name] = bound_args.arguments[param.name]
67
+ return self._conditional_return_type(**kw_args)
68
+
69
+ def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
70
+ """Instance decorator for specifying a conditional return type for this function"""
71
+ # verify that call_return_type only has parameters that are also present in the signature
72
+ sig = inspect.signature(fn)
73
+ for param in sig.parameters.values():
74
+ if param.name not in self.signature.parameters:
75
+ raise ValueError(f'`conditional_return_type` has parameter `{param.name}` that is not in the signature')
76
+ self._conditional_return_type = fn
77
+ return fn
78
+
79
+ @abc.abstractmethod
80
+ def exec(self, *args: Any, **kwargs: Any) -> Any:
81
+ """Execute the function with the given arguments and return the result."""
82
+ pass
83
+
84
+ def __eq__(self, other: object) -> bool:
85
+ if not isinstance(other, self.__class__):
86
+ return False
87
+ return self.self_path == other.self_path
88
+
89
+ def source(self) -> None:
90
+ """Print source code"""
91
+ print('source not available')
92
+
93
+ def as_dict(self) -> Dict:
94
+ """
95
+ Return a serialized reference to the instance that can be passed to json.dumps() and converted back
96
+ to an instance with from_dict().
97
+ Subclasses can override _as_dict().
98
+ """
99
+ classpath = f'{self.__class__.__module__}.{self.__class__.__qualname__}'
100
+ return {'_classpath': classpath, **self._as_dict()}
101
+
102
+ def _as_dict(self) -> Dict:
103
+ """Default serialization: store the path to self (which includes the module path)"""
104
+ assert self.self_path is not None
105
+ return {'path': self.self_path}
106
+
107
+ @classmethod
108
+ def from_dict(cls, d: Dict) -> Function:
109
+ """
110
+ Turn dict that was produced by calling as_dict() into an instance of the correct Function subclass.
111
+ """
112
+ assert '_classpath' in d
113
+ module_path, class_name = d['_classpath'].rsplit('.', 1)
114
+ class_module = importlib.import_module(module_path)
115
+ func_class = getattr(class_module, class_name)
116
+ return func_class._from_dict(d)
117
+
118
+ @classmethod
119
+ def _from_dict(cls, d: Dict) -> Function:
120
+ """Default deserialization: load the symbol indicated by the stored symbol_path"""
121
+ assert 'path' in d and d['path'] is not None
122
+ instance = resolve_symbol(d['path'])
123
+ assert isinstance(instance, Function)
124
+ return instance
125
+
126
+ def to_store(self) -> Tuple[Dict, bytes]:
127
+ """
128
+ Serialize the function to a format that can be stored in the Pixeltable store
129
+ Returns:
130
+ - a dict that can be passed to json.dumps()
131
+ - additional binary data
132
+ Only Function subclasses that can be stored need to override this.
133
+ """
134
+ raise NotImplementedError()
135
+
136
+ @classmethod
137
+ def from_store(cls, name: Optional[str], md: Dict, binary_obj: bytes) -> Function:
138
+ """
139
+ Create a Function instance from the serialized representation returned by to_store()
140
+ """
141
+ raise NotImplementedError()