pixeltable 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +15 -33
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +1 -1
- pixeltable/catalog/column.py +29 -11
- pixeltable/catalog/dir.py +2 -2
- pixeltable/catalog/insertable_table.py +5 -55
- pixeltable/catalog/named_function.py +2 -2
- pixeltable/catalog/schema_object.py +2 -7
- pixeltable/catalog/table.py +307 -186
- pixeltable/catalog/table_version.py +109 -63
- pixeltable/catalog/table_version_path.py +28 -5
- pixeltable/catalog/view.py +20 -10
- pixeltable/dataframe.py +129 -26
- pixeltable/env.py +29 -18
- pixeltable/exec/exec_context.py +5 -0
- pixeltable/exec/exec_node.py +1 -0
- pixeltable/exec/in_memory_data_node.py +29 -24
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/column_ref.py +13 -8
- pixeltable/exprs/data_row.py +4 -0
- pixeltable/exprs/expr.py +16 -1
- pixeltable/exprs/function_call.py +4 -4
- pixeltable/exprs/row_builder.py +29 -20
- pixeltable/exprs/similarity_expr.py +4 -3
- pixeltable/ext/functions/yolox.py +2 -1
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +14 -12
- pixeltable/func/callable_function.py +8 -6
- pixeltable/func/expr_template_function.py +13 -19
- pixeltable/func/function.py +3 -6
- pixeltable/func/query_template_function.py +84 -0
- pixeltable/func/signature.py +68 -23
- pixeltable/func/udf.py +13 -10
- pixeltable/functions/__init__.py +6 -91
- pixeltable/functions/eval.py +26 -14
- pixeltable/functions/fireworks.py +25 -23
- pixeltable/functions/globals.py +62 -0
- pixeltable/functions/huggingface.py +20 -16
- pixeltable/functions/image.py +170 -1
- pixeltable/functions/openai.py +95 -128
- pixeltable/functions/string.py +10 -2
- pixeltable/functions/together.py +95 -84
- pixeltable/functions/util.py +16 -0
- pixeltable/functions/video.py +94 -16
- pixeltable/functions/whisper.py +74 -0
- pixeltable/globals.py +1 -1
- pixeltable/io/__init__.py +10 -0
- pixeltable/io/external_store.py +370 -0
- pixeltable/io/globals.py +51 -22
- pixeltable/io/label_studio.py +639 -0
- pixeltable/io/parquet.py +1 -1
- pixeltable/iterators/__init__.py +9 -0
- pixeltable/iterators/string.py +40 -0
- pixeltable/metadata/__init__.py +6 -8
- pixeltable/metadata/converters/convert_10.py +2 -4
- pixeltable/metadata/converters/convert_12.py +7 -2
- pixeltable/metadata/converters/convert_13.py +6 -8
- pixeltable/metadata/converters/convert_14.py +2 -4
- pixeltable/metadata/converters/convert_15.py +44 -0
- pixeltable/metadata/converters/convert_16.py +18 -0
- pixeltable/metadata/converters/util.py +66 -0
- pixeltable/metadata/schema.py +3 -3
- pixeltable/plan.py +8 -7
- pixeltable/store.py +1 -1
- pixeltable/tool/create_test_db_dump.py +147 -54
- pixeltable/tool/embed_udf.py +9 -0
- pixeltable/type_system.py +1 -2
- pixeltable/utils/code.py +34 -0
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/METADATA +1 -1
- pixeltable-0.2.10.dist-info/RECORD +131 -0
- pixeltable/datatransfer/__init__.py +0 -1
- pixeltable/datatransfer/label_studio.py +0 -452
- pixeltable/datatransfer/remote.py +0 -85
- pixeltable/functions/pil/image.py +0 -147
- pixeltable-0.2.8.dist-info/RECORD +0 -124
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/WHEEL +0 -0
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
import time
|
|
2
|
+
|
|
5
3
|
import sys
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional, List, Any, Dict, Tuple, Set
|
|
7
|
+
|
|
8
|
+
import sqlalchemy as sql
|
|
6
9
|
|
|
10
|
+
import pixeltable.catalog as catalog
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.func as func
|
|
13
|
+
import pixeltable.utils as utils
|
|
14
|
+
from .data_row import DataRow
|
|
7
15
|
from .expr import Expr
|
|
8
16
|
from .expr_set import ExprSet
|
|
9
|
-
from .data_row import DataRow
|
|
10
|
-
import pixeltable.utils as utils
|
|
11
|
-
import pixeltable.func as func
|
|
12
|
-
import pixeltable.exceptions as excs
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class ExecProfile:
|
|
@@ -74,10 +77,10 @@ class RowBuilder:
|
|
|
74
77
|
# - explicitly requested output_exprs
|
|
75
78
|
# - values for computed columns
|
|
76
79
|
resolve_cols = set(columns)
|
|
77
|
-
self.output_exprs = [
|
|
80
|
+
self.output_exprs = ExprSet([
|
|
78
81
|
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
79
82
|
for e in output_exprs
|
|
80
|
-
]
|
|
83
|
+
])
|
|
81
84
|
|
|
82
85
|
# record columns for create_table_row()
|
|
83
86
|
from .column_ref import ColumnRef
|
|
@@ -88,16 +91,15 @@ class RowBuilder:
|
|
|
88
91
|
# create a copy here so we don't reuse execution state and resolve references to computed columns
|
|
89
92
|
expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
|
|
90
93
|
expr = self._record_unique_expr(expr, recursive=True)
|
|
91
|
-
self.add_table_column(col, expr.slot_idx)
|
|
92
|
-
self.output_exprs.append(expr)
|
|
93
94
|
else:
|
|
94
95
|
# record a ColumnRef so that references to this column resolve to the same slot idx
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
expr = ColumnRef(col)
|
|
97
|
+
expr = self._record_unique_expr(expr, recursive=False)
|
|
98
|
+
self.add_table_column(col, expr.slot_idx)
|
|
99
|
+
self.output_exprs.append(expr)
|
|
98
100
|
|
|
99
101
|
# default eval ctx: all output exprs
|
|
100
|
-
self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
|
|
102
|
+
self.default_eval_ctx = self.create_eval_ctx(list(self.output_exprs), exclude=unique_input_exprs)
|
|
101
103
|
|
|
102
104
|
# references to unstored iterator columns:
|
|
103
105
|
# - those ColumnRefs need to instantiate iterators
|
|
@@ -107,9 +109,11 @@ class RowBuilder:
|
|
|
107
109
|
# - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
|
|
108
110
|
# iterated object changes
|
|
109
111
|
col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
|
|
112
|
+
|
|
110
113
|
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
111
114
|
tbl = col_ref.col.tbl
|
|
112
115
|
return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
116
|
+
|
|
113
117
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
114
118
|
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
115
119
|
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
@@ -154,13 +158,19 @@ class RowBuilder:
|
|
|
154
158
|
"""Return ColumnSlotIdx for output columns"""
|
|
155
159
|
return self.table_columns
|
|
156
160
|
|
|
161
|
+
def set_conn(self, conn: sql.engine.Connection) -> None:
|
|
162
|
+
from .function_call import FunctionCall
|
|
163
|
+
for expr in self.unique_exprs:
|
|
164
|
+
if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
|
|
165
|
+
expr.fn.set_conn(conn)
|
|
166
|
+
|
|
157
167
|
@property
|
|
158
168
|
def num_materialized(self) -> int:
|
|
159
169
|
return self.next_slot_idx
|
|
160
170
|
|
|
161
|
-
def get_output_exprs(self) ->
|
|
171
|
+
def get_output_exprs(self) -> list[Expr]:
|
|
162
172
|
"""Returns exprs that were requested in the c'tor and require evaluation"""
|
|
163
|
-
return self.output_exprs
|
|
173
|
+
return list(self.output_exprs)
|
|
164
174
|
|
|
165
175
|
def _next_slot_idx(self) -> int:
|
|
166
176
|
result = self.next_slot_idx
|
|
@@ -252,7 +262,7 @@ class RowBuilder:
|
|
|
252
262
|
result_ids.sort()
|
|
253
263
|
return [self.unique_exprs[id] for id in result_ids]
|
|
254
264
|
|
|
255
|
-
def create_eval_ctx(self, targets:
|
|
265
|
+
def create_eval_ctx(self, targets: list[Expr], exclude: Optional[list[Expr]] = None) -> EvalCtx:
|
|
256
266
|
"""Return EvalCtx for targets"""
|
|
257
267
|
if exclude is None:
|
|
258
268
|
exclude = []
|
|
@@ -326,4 +336,3 @@ class RowBuilder:
|
|
|
326
336
|
table_row[col.errormsg_store_name()] = None
|
|
327
337
|
|
|
328
338
|
return table_row, num_excs
|
|
329
|
-
|
|
@@ -18,7 +18,6 @@ class SimilarityExpr(Expr):
|
|
|
18
18
|
super().__init__(ts.FloatType())
|
|
19
19
|
self.components = [col_ref, item]
|
|
20
20
|
self.id = self._create_id()
|
|
21
|
-
assert isinstance(item, Literal)
|
|
22
21
|
assert item.col_type.is_string_type() or item.col_type.is_image_type()
|
|
23
22
|
|
|
24
23
|
# determine index to use
|
|
@@ -47,12 +46,14 @@ class SimilarityExpr(Expr):
|
|
|
47
46
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
48
47
|
|
|
49
48
|
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
50
|
-
|
|
49
|
+
if not isinstance(self.components[1], Literal):
|
|
50
|
+
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
51
51
|
item = self.components[1].val
|
|
52
52
|
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
53
53
|
|
|
54
54
|
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
|
|
55
|
-
|
|
55
|
+
if not isinstance(self.components[1], Literal):
|
|
56
|
+
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
56
57
|
item = self.components[1].val
|
|
57
58
|
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
58
59
|
|
|
@@ -14,7 +14,7 @@ from yolox.utils import postprocess
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
from pixeltable import env
|
|
16
16
|
from pixeltable.func import Batch
|
|
17
|
-
from pixeltable.functions.util import
|
|
17
|
+
from pixeltable.functions.util import normalize_image_mode
|
|
18
18
|
|
|
19
19
|
_logger = logging.getLogger('pixeltable')
|
|
20
20
|
|
|
@@ -74,6 +74,7 @@ def yolo_to_coco(detections: dict) -> list:
|
|
|
74
74
|
|
|
75
75
|
def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
|
|
76
76
|
for image in images:
|
|
77
|
+
image = normalize_image_mode(image)
|
|
77
78
|
image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
|
|
78
79
|
yield torch.from_numpy(image_transform)
|
|
79
80
|
|
pixeltable/func/__init__.py
CHANGED
|
@@ -3,5 +3,6 @@ from .callable_function import CallableFunction
|
|
|
3
3
|
from .expr_template_function import ExprTemplateFunction
|
|
4
4
|
from .function import Function
|
|
5
5
|
from .function_registry import FunctionRegistry
|
|
6
|
+
from .query_template_function import QueryTemplateFunction
|
|
6
7
|
from .signature import Signature, Parameter, Batch
|
|
7
8
|
from .udf import udf, make_function, expr_udf
|
|
@@ -43,27 +43,29 @@ class AggregateFunction(Function):
|
|
|
43
43
|
|
|
44
44
|
# our signature is the signature of 'update', but without self,
|
|
45
45
|
# plus the parameters of 'init' as keyword-only parameters
|
|
46
|
-
|
|
47
|
-
assert len(
|
|
46
|
+
py_update_params = list(inspect.signature(self.agg_cls.update).parameters.values())[1:] # leave out self
|
|
47
|
+
assert len(py_update_params) == len(update_types)
|
|
48
|
+
update_params = [
|
|
49
|
+
Parameter(p.name, col_type=update_types[i], kind=p.kind, default=p.default)
|
|
50
|
+
for i, p in enumerate(py_update_params)
|
|
51
|
+
]
|
|
52
|
+
# starting at 1: leave out self
|
|
53
|
+
py_init_params = list(inspect.signature(self.agg_cls.__init__).parameters.values())[1:]
|
|
54
|
+
assert len(py_init_params) == len(init_types)
|
|
48
55
|
init_params = [
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
for p in itertools.islice(inspect.signature(self.agg_cls.__init__).parameters.values(), 1, None)
|
|
56
|
+
Parameter(p.name, col_type=init_types[i], kind=inspect.Parameter.KEYWORD_ONLY, default=p.default)
|
|
57
|
+
for i, p in enumerate(py_init_params)
|
|
52
58
|
]
|
|
53
|
-
assert len(init_params) == len(init_types)
|
|
54
59
|
duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
|
|
55
60
|
if len(duplicate_params) > 0:
|
|
56
61
|
raise excs.Error(
|
|
57
62
|
f'__init__() and update() cannot have parameters with the same name: '
|
|
58
63
|
f'{", ".join(duplicate_params)}'
|
|
59
64
|
)
|
|
60
|
-
|
|
61
|
-
py_signature = inspect.Signature(py_params)
|
|
65
|
+
params = update_params + init_params # init_params are keyword-only and come last
|
|
62
66
|
|
|
63
|
-
params = [Parameter(p.name, update_types[i], p.kind, is_batched=False) for i, p in enumerate(update_params)]
|
|
64
|
-
params.extend([Parameter(p.name, init_types[i], p.kind, is_batched=False) for i, p in enumerate(init_params)])
|
|
65
67
|
signature = Signature(value_type, params)
|
|
66
|
-
super().__init__(signature,
|
|
68
|
+
super().__init__(signature, self_path=self_path)
|
|
67
69
|
self.init_param_names = [p.name for p in init_params]
|
|
68
70
|
|
|
69
71
|
# make sure the signature doesn't contain reserved parameter names;
|
|
@@ -115,7 +117,7 @@ class AggregateFunction(Function):
|
|
|
115
117
|
f'{self.display_name}(): group_by invalid with an aggregate function that does not allow windows')
|
|
116
118
|
group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
|
|
117
119
|
|
|
118
|
-
bound_args = self.py_signature.bind(*args, **kwargs)
|
|
120
|
+
bound_args = self.signature.py_signature.bind(*args, **kwargs)
|
|
119
121
|
self.validate_call(bound_args.arguments)
|
|
120
122
|
return exprs.FunctionCall(
|
|
121
123
|
self, bound_args.arguments,
|
|
@@ -25,8 +25,7 @@ class CallableFunction(Function):
|
|
|
25
25
|
self.py_fn = py_fn
|
|
26
26
|
self.self_name = self_name
|
|
27
27
|
self.batch_size = batch_size
|
|
28
|
-
|
|
29
|
-
super().__init__(signature, py_signature, self_path=self_path)
|
|
28
|
+
super().__init__(signature, self_path=self_path)
|
|
30
29
|
|
|
31
30
|
@property
|
|
32
31
|
def is_batched(self) -> bool:
|
|
@@ -91,16 +90,19 @@ class CallableFunction(Function):
|
|
|
91
90
|
return super()._from_dict(d)
|
|
92
91
|
|
|
93
92
|
def to_store(self) -> tuple[dict, bytes]:
|
|
94
|
-
md =
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
md = {
|
|
94
|
+
'signature': self.signature.as_dict(),
|
|
95
|
+
'batch_size': self.batch_size,
|
|
96
|
+
}
|
|
97
97
|
return md, cloudpickle.dumps(self.py_fn)
|
|
98
98
|
|
|
99
99
|
@classmethod
|
|
100
100
|
def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
|
|
101
101
|
py_fn = cloudpickle.loads(binary_obj)
|
|
102
102
|
assert isinstance(py_fn, Callable)
|
|
103
|
-
|
|
103
|
+
sig = Signature.from_dict(md['signature'])
|
|
104
|
+
batch_size = md['batch_size']
|
|
105
|
+
return CallableFunction(sig, py_fn, self_name=name, batch_size=batch_size)
|
|
104
106
|
|
|
105
107
|
def validate_call(self, bound_args: dict[str, Any]) -> None:
|
|
106
108
|
import pixeltable.exprs as exprs
|
|
@@ -11,7 +11,7 @@ class ExprTemplateFunction(Function):
|
|
|
11
11
|
"""A parameterized expression from which an executable Expr is created with a function call."""
|
|
12
12
|
|
|
13
13
|
def __init__(
|
|
14
|
-
self, expr: 'pixeltable.exprs.Expr',
|
|
14
|
+
self, expr: 'pixeltable.exprs.Expr', signature: Signature, self_path: Optional[str] = None,
|
|
15
15
|
name: Optional[str] = None):
|
|
16
16
|
import pixeltable.exprs as exprs
|
|
17
17
|
self.expr = expr
|
|
@@ -23,28 +23,21 @@ class ExprTemplateFunction(Function):
|
|
|
23
23
|
|
|
24
24
|
# verify default values
|
|
25
25
|
self.defaults: Dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
|
|
26
|
-
for
|
|
27
|
-
if
|
|
26
|
+
for param in signature.parameters.values():
|
|
27
|
+
if param.default is inspect.Parameter.empty:
|
|
28
28
|
continue
|
|
29
|
-
param_expr = self.param_exprs_by_name[
|
|
29
|
+
param_expr = self.param_exprs_by_name[param.name]
|
|
30
30
|
try:
|
|
31
|
-
literal_default = exprs.Literal(
|
|
32
|
-
self.defaults[
|
|
31
|
+
literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
|
|
32
|
+
self.defaults[param.name] = literal_default
|
|
33
33
|
except TypeError as e:
|
|
34
34
|
msg = str(e)
|
|
35
|
-
raise excs.Error(f"Default value for parameter '{
|
|
36
|
-
# construct signature
|
|
37
|
-
assert len(self.param_exprs) == len(py_signature.parameters)
|
|
38
|
-
fn_params = [
|
|
39
|
-
Parameter(p.name, self.param_exprs_by_name[p.name].col_type, p.kind)
|
|
40
|
-
for p in py_signature.parameters.values()
|
|
41
|
-
]
|
|
42
|
-
signature = Signature(return_type=expr.col_type, parameters=fn_params)
|
|
35
|
+
raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
|
|
43
36
|
|
|
44
|
-
super().__init__(signature,
|
|
37
|
+
super().__init__(signature, self_path=self_path)
|
|
45
38
|
|
|
46
39
|
def instantiate(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
|
|
47
|
-
bound_args = self.py_signature.bind(*args, **kwargs).arguments
|
|
40
|
+
bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
|
|
48
41
|
# apply defaults, otherwise we might have Parameters left over
|
|
49
42
|
bound_args.update(
|
|
50
43
|
{param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
|
|
@@ -88,14 +81,15 @@ class ExprTemplateFunction(Function):
|
|
|
88
81
|
if self.self_path is not None:
|
|
89
82
|
return super()._as_dict()
|
|
90
83
|
return {
|
|
91
|
-
'name': self.name,
|
|
92
84
|
'expr': self.expr.as_dict(),
|
|
93
|
-
|
|
85
|
+
'signature': self.signature.as_dict(),
|
|
86
|
+
'name': self.name,
|
|
94
87
|
}
|
|
95
88
|
|
|
96
89
|
@classmethod
|
|
97
90
|
def _from_dict(cls, d: Dict) -> Function:
|
|
98
91
|
if 'expr' not in d:
|
|
99
92
|
return super()._from_dict(d)
|
|
93
|
+
assert 'signature' in d and 'name' in d
|
|
100
94
|
import pixeltable.exprs as exprs
|
|
101
|
-
return cls(exprs.Expr.from_dict(d['expr']), name=d['name'])
|
|
95
|
+
return cls(exprs.Expr.from_dict(d['expr']), Signature.from_dict(d['signature']), name=d['name'])
|
pixeltable/func/function.py
CHANGED
|
@@ -19,11 +19,8 @@ class Function(abc.ABC):
|
|
|
19
19
|
via the member self_path.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(
|
|
23
|
-
self, signature: Signature, py_signature: inspect.Signature, self_path: Optional[str] = None
|
|
24
|
-
):
|
|
22
|
+
def __init__(self, signature: Signature, self_path: Optional[str] = None):
|
|
25
23
|
self.signature = signature
|
|
26
|
-
self.py_signature = py_signature
|
|
27
24
|
self.self_path = self_path # fully-qualified path to self
|
|
28
25
|
self._conditional_return_type: Optional[Callable[..., ts.ColumnType]] = None
|
|
29
26
|
|
|
@@ -46,7 +43,7 @@ class Function(abc.ABC):
|
|
|
46
43
|
|
|
47
44
|
def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
|
|
48
45
|
from pixeltable import exprs
|
|
49
|
-
bound_args = self.py_signature.bind(*args, **kwargs)
|
|
46
|
+
bound_args = self.signature.py_signature.bind(*args, **kwargs)
|
|
50
47
|
self.validate_call(bound_args.arguments)
|
|
51
48
|
return exprs.FunctionCall(self, bound_args.arguments)
|
|
52
49
|
|
|
@@ -58,7 +55,7 @@ class Function(abc.ABC):
|
|
|
58
55
|
"""Return the type of the value returned by calling this function with the given arguments"""
|
|
59
56
|
if self._conditional_return_type is None:
|
|
60
57
|
return self.signature.return_type
|
|
61
|
-
bound_args = self.py_signature.bind(**kwargs)
|
|
58
|
+
bound_args = self.signature.py_signature.bind(**kwargs)
|
|
62
59
|
kw_args: dict[str, Any] = {}
|
|
63
60
|
sig = inspect.signature(self._conditional_return_type)
|
|
64
61
|
for param in sig.parameters.values():
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import inspect
|
|
3
|
+
from typing import Dict, Optional, Any, Callable
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
|
+
from .function import Function
|
|
11
|
+
from .signature import Signature, Parameter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class QueryTemplateFunction(Function):
|
|
15
|
+
"""A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def create(
|
|
19
|
+
cls, template_callable: Callable, param_types: Optional[list[ts.ColumnType]], path: str, name: str
|
|
20
|
+
) -> QueryTemplateFunction:
|
|
21
|
+
# we need to construct a template df and a signature
|
|
22
|
+
py_sig = inspect.signature(template_callable)
|
|
23
|
+
py_params = list(py_sig.parameters.values())
|
|
24
|
+
params = Signature.create_parameters(py_params=py_params, param_types=param_types)
|
|
25
|
+
# invoke template_callable with parameter expressions to construct a DataFrame with parameters
|
|
26
|
+
import pixeltable.exprs as exprs
|
|
27
|
+
var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
|
|
28
|
+
template_df = template_callable(*var_exprs)
|
|
29
|
+
from pixeltable import DataFrame
|
|
30
|
+
assert isinstance(template_df, DataFrame)
|
|
31
|
+
# we take params and return json
|
|
32
|
+
sig = Signature(return_type=ts.JsonType(), parameters=params)
|
|
33
|
+
return QueryTemplateFunction(template_df, sig, path=path, name=name)
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self, template_df: Optional['pixeltable.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
|
|
37
|
+
name: Optional[str] = None,
|
|
38
|
+
):
|
|
39
|
+
super().__init__(sig, self_path=path)
|
|
40
|
+
self.self_name = name
|
|
41
|
+
self.template_df = template_df
|
|
42
|
+
|
|
43
|
+
# if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
|
|
44
|
+
# we end up with a deadlock
|
|
45
|
+
# TODO: figure out a more general way to make execution state available
|
|
46
|
+
self.conn: Optional[sql.engine.Connection] = None
|
|
47
|
+
|
|
48
|
+
# convert defaults to Literals
|
|
49
|
+
import pixeltable.exprs as exprs
|
|
50
|
+
self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
|
|
51
|
+
param_types = self.template_df.parameters()
|
|
52
|
+
for param in [p for p in self.signature.parameters.values() if p.has_default()]:
|
|
53
|
+
assert param.name in param_types
|
|
54
|
+
param_type = param_types[param.name]
|
|
55
|
+
literal_default = exprs.Literal(param.default, col_type=param_type)
|
|
56
|
+
self.defaults[param.name] = literal_default
|
|
57
|
+
|
|
58
|
+
def set_conn(self, conn: Optional[sql.engine.Connection]) -> None:
|
|
59
|
+
self.conn = conn
|
|
60
|
+
|
|
61
|
+
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
62
|
+
bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
|
|
63
|
+
# apply defaults, otherwise we might have Parameters left over
|
|
64
|
+
bound_args.update(
|
|
65
|
+
{param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
|
|
66
|
+
bound_df = self.template_df.bind(bound_args)
|
|
67
|
+
result = bound_df._collect(self.conn)
|
|
68
|
+
return list(result)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def display_name(self) -> str:
|
|
72
|
+
return self.self_name
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def name(self) -> str:
|
|
76
|
+
return self.self_name
|
|
77
|
+
|
|
78
|
+
def _as_dict(self) -> Dict:
|
|
79
|
+
return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def _from_dict(cls, d: Dict) -> Function:
|
|
83
|
+
from pixeltable.dataframe import DataFrame
|
|
84
|
+
return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
|
pixeltable/func/signature.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import dataclasses
|
|
4
5
|
import enum
|
|
5
6
|
import inspect
|
|
@@ -18,8 +19,56 @@ class Parameter:
|
|
|
18
19
|
name: str
|
|
19
20
|
col_type: Optional[ts.ColumnType] # None for variable parameters
|
|
20
21
|
kind: enum.Enum # inspect.Parameter.kind; inspect._ParameterKind is private
|
|
22
|
+
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
23
|
+
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
24
|
+
default: Any = inspect.Parameter.empty # default value for the parameter
|
|
21
25
|
is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
|
|
22
26
|
|
|
27
|
+
def __post_init__(self) -> None:
|
|
28
|
+
# make sure that default is json-serializable and of the correct type
|
|
29
|
+
if self.default is inspect.Parameter.empty or self.default is None:
|
|
30
|
+
return
|
|
31
|
+
try:
|
|
32
|
+
_ = json.dumps(self.default)
|
|
33
|
+
except TypeError:
|
|
34
|
+
raise excs.Error(f'Default value for parameter {self.name} is not JSON-serializable: {str(self.default)}')
|
|
35
|
+
if self.col_type is not None:
|
|
36
|
+
try:
|
|
37
|
+
self.col_type.validate_literal(self.default)
|
|
38
|
+
except TypeError as e:
|
|
39
|
+
raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
|
|
40
|
+
|
|
41
|
+
def has_default(self) -> bool:
|
|
42
|
+
return self.default is not inspect.Parameter.empty
|
|
43
|
+
|
|
44
|
+
def as_dict(self) -> dict[str, Any]:
|
|
45
|
+
return {
|
|
46
|
+
'name': self.name,
|
|
47
|
+
'col_type': self.col_type.as_dict() if self.col_type is not None else None,
|
|
48
|
+
'kind': self.kind.name,
|
|
49
|
+
'is_batched': self.is_batched,
|
|
50
|
+
'has_default': self.has_default(),
|
|
51
|
+
'default': self.default if self.has_default() else None,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_dict(cls, d: dict[str, Any]) -> Parameter:
|
|
56
|
+
has_default = d['has_default']
|
|
57
|
+
if has_default:
|
|
58
|
+
default = d['default']
|
|
59
|
+
else:
|
|
60
|
+
default = inspect.Parameter.empty
|
|
61
|
+
return cls(
|
|
62
|
+
name=d['name'],
|
|
63
|
+
col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
|
|
64
|
+
kind=getattr(inspect.Parameter, d['kind']),
|
|
65
|
+
is_batched=d['is_batched'],
|
|
66
|
+
default=default
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def to_py_param(self) -> inspect.Parameter:
|
|
70
|
+
return inspect.Parameter(self.name, self.kind, default=self.default)
|
|
71
|
+
|
|
23
72
|
|
|
24
73
|
T = typing.TypeVar('T')
|
|
25
74
|
Batch = typing.Annotated[list[T], 'pxt-batch']
|
|
@@ -42,6 +91,7 @@ class Signature:
|
|
|
42
91
|
self.parameters_by_pos = parameters.copy()
|
|
43
92
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
44
93
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
94
|
+
self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
|
|
45
95
|
|
|
46
96
|
def get_return_type(self) -> ts.ColumnType:
|
|
47
97
|
assert isinstance(self.return_type, ts.ColumnType)
|
|
@@ -50,17 +100,15 @@ class Signature:
|
|
|
50
100
|
def as_dict(self) -> Dict[str, Any]:
|
|
51
101
|
result = {
|
|
52
102
|
'return_type': self.get_return_type().as_dict(),
|
|
53
|
-
'parameters': [
|
|
54
|
-
|
|
55
|
-
for p in self.parameters.values()
|
|
56
|
-
]
|
|
103
|
+
'parameters': [p.as_dict() for p in self.parameters.values()],
|
|
104
|
+
'is_batched': self.is_batched,
|
|
57
105
|
}
|
|
58
106
|
return result
|
|
59
107
|
|
|
60
108
|
@classmethod
|
|
61
109
|
def from_dict(cls, d: Dict[str, Any]) -> Signature:
|
|
62
|
-
parameters = [Parameter
|
|
63
|
-
return cls(ts.ColumnType.from_dict(d['return_type']), parameters)
|
|
110
|
+
parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
|
|
111
|
+
return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
|
|
64
112
|
|
|
65
113
|
def __eq__(self, other: Signature) -> bool:
|
|
66
114
|
if self.get_return_type() != other.get_return_type():
|
|
@@ -105,16 +153,20 @@ class Signature:
|
|
|
105
153
|
|
|
106
154
|
@classmethod
|
|
107
155
|
def create_parameters(
|
|
108
|
-
cls,
|
|
109
|
-
|
|
110
|
-
|
|
156
|
+
cls, py_fn: Optional[Callable] = None, py_params: Optional[list[inspect.Parameter]] = None,
|
|
157
|
+
param_types: Optional[List[ts.ColumnType]] = None
|
|
158
|
+
) -> List[Parameter]:
|
|
159
|
+
assert (py_fn is None) != (py_params is None)
|
|
160
|
+
if py_fn is not None:
|
|
161
|
+
sig = inspect.signature(py_fn)
|
|
162
|
+
py_params = list(sig.parameters.values())
|
|
111
163
|
parameters: List[Parameter] = []
|
|
112
164
|
|
|
113
|
-
for idx, param in enumerate(
|
|
165
|
+
for idx, param in enumerate(py_params):
|
|
114
166
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
115
167
|
raise excs.Error(f"'{param.name}' is a reserved parameter name")
|
|
116
168
|
if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
117
|
-
parameters.append(Parameter(param.name, None, param.kind
|
|
169
|
+
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
118
170
|
continue
|
|
119
171
|
|
|
120
172
|
# check non-var parameters for name collisions and default value compatibility
|
|
@@ -128,21 +180,14 @@ class Signature:
|
|
|
128
180
|
if param_type is None:
|
|
129
181
|
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
|
|
130
182
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if default_val != inspect.Parameter.empty and default_val is not None:
|
|
134
|
-
try:
|
|
135
|
-
_ = param_type.create_literal(default_val)
|
|
136
|
-
except TypeError as e:
|
|
137
|
-
raise excs.Error(f'Default value for parameter {param.name}: {str(e)}')
|
|
138
|
-
|
|
139
|
-
parameters.append(Parameter(param.name, param_type, param.kind, is_batched))
|
|
183
|
+
parameters.append(Parameter(
|
|
184
|
+
param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default))
|
|
140
185
|
|
|
141
186
|
return parameters
|
|
142
187
|
|
|
143
188
|
@classmethod
|
|
144
189
|
def create(
|
|
145
|
-
cls,
|
|
190
|
+
cls, py_fn: Callable,
|
|
146
191
|
param_types: Optional[List[ts.ColumnType]] = None,
|
|
147
192
|
return_type: Optional[Union[ts.ColumnType, Callable]] = None
|
|
148
193
|
) -> Signature:
|
|
@@ -150,8 +195,8 @@ class Signature:
|
|
|
150
195
|
Infer the parameter and return types, if none are specified.
|
|
151
196
|
Raises an exception if the types cannot be inferred.
|
|
152
197
|
"""
|
|
153
|
-
parameters = cls.create_parameters(
|
|
154
|
-
sig = inspect.signature(
|
|
198
|
+
parameters = cls.create_parameters(py_fn=py_fn, param_types=param_types)
|
|
199
|
+
sig = inspect.signature(py_fn)
|
|
155
200
|
if return_type is None:
|
|
156
201
|
return_type, return_is_batched = cls._infer_type(sig.return_annotation)
|
|
157
202
|
if return_type is None:
|
pixeltable/func/udf.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import inspect
|
|
4
3
|
from typing import List, Callable, Optional, overload, Any
|
|
5
4
|
|
|
6
5
|
import pixeltable as pxt
|
|
@@ -56,8 +55,12 @@ def udf(*args, **kwargs):
|
|
|
56
55
|
return_type = kwargs.pop('return_type', None)
|
|
57
56
|
param_types = kwargs.pop('param_types', None)
|
|
58
57
|
batch_size = kwargs.pop('batch_size', None)
|
|
59
|
-
substitute_fn = kwargs.pop('
|
|
58
|
+
substitute_fn = kwargs.pop('substitute_fn', None)
|
|
60
59
|
force_stored = kwargs.pop('_force_stored', False)
|
|
60
|
+
if len(kwargs) > 0:
|
|
61
|
+
raise excs.Error(f'Invalid @udf decorator kwargs: {", ".join(kwargs.keys())}')
|
|
62
|
+
if len(args) > 0:
|
|
63
|
+
raise excs.Error('Unexpected @udf decorator arguments.')
|
|
61
64
|
|
|
62
65
|
def decorator(decorated_fn: Callable):
|
|
63
66
|
return make_function(
|
|
@@ -134,7 +137,7 @@ def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
|
|
|
134
137
|
def expr_udf(*, param_types: Optional[List[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
|
|
135
138
|
|
|
136
139
|
def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
137
|
-
def
|
|
140
|
+
def make_expr_template(py_fn: Callable, param_types: Optional[List[ts.ColumnType]]) -> ExprTemplateFunction:
|
|
138
141
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
139
142
|
# this is a named function in a module
|
|
140
143
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -144,21 +147,21 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
144
147
|
# TODO: verify that the inferred return type matches that of the template
|
|
145
148
|
# TODO: verify that the signature doesn't contain batched parameters
|
|
146
149
|
|
|
147
|
-
# construct
|
|
148
|
-
|
|
150
|
+
# construct Signature from the function signature
|
|
151
|
+
sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
|
|
149
152
|
import pixeltable.exprs as exprs
|
|
150
|
-
var_exprs = [exprs.Variable(param.name, param.col_type) for param in
|
|
153
|
+
var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
|
|
151
154
|
# call the function with the parameter expressions to construct an Expr with parameters
|
|
152
155
|
template = py_fn(*var_exprs)
|
|
153
156
|
assert isinstance(template, exprs.Expr)
|
|
154
|
-
|
|
157
|
+
sig.return_type = template.col_type
|
|
155
158
|
if function_path is not None:
|
|
156
159
|
validate_symbol_path(function_path)
|
|
157
|
-
return ExprTemplateFunction(template,
|
|
160
|
+
return ExprTemplateFunction(template, sig, self_path=function_path, name=py_fn.__name__)
|
|
158
161
|
|
|
159
162
|
if len(args) == 1:
|
|
160
163
|
assert len(kwargs) == 0 and callable(args[0])
|
|
161
|
-
return
|
|
164
|
+
return make_expr_template(args[0], None)
|
|
162
165
|
else:
|
|
163
166
|
assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
|
|
164
|
-
return lambda py_fn:
|
|
167
|
+
return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
|