pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -62,14 +62,14 @@ class SimilarityExpr(Expr):
|
|
|
62
62
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
63
63
|
|
|
64
64
|
def _id_attrs(self):
|
|
65
|
-
return super()._id_attrs()
|
|
65
|
+
return [*super()._id_attrs(), ('idx_name', self.idx_info.name)]
|
|
66
66
|
|
|
67
67
|
def default_column_name(self) -> str:
|
|
68
68
|
return 'similarity'
|
|
69
69
|
|
|
70
70
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
71
71
|
if not isinstance(self.components[1], Literal):
|
|
72
|
-
raise excs.Error(
|
|
72
|
+
raise excs.Error('similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
73
73
|
item = self.components[1].val
|
|
74
74
|
from pixeltable import index
|
|
75
75
|
|
|
@@ -78,7 +78,7 @@ class SimilarityExpr(Expr):
|
|
|
78
78
|
|
|
79
79
|
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ColumnElement]:
|
|
80
80
|
if not isinstance(self.components[1], Literal):
|
|
81
|
-
raise excs.Error(
|
|
81
|
+
raise excs.Error('similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
82
82
|
item = self.components[1].val
|
|
83
83
|
from pixeltable import index
|
|
84
84
|
|
|
@@ -87,14 +87,14 @@ class SimilarityExpr(Expr):
|
|
|
87
87
|
|
|
88
88
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
89
89
|
# this should never get called
|
|
90
|
-
|
|
90
|
+
raise AssertionError()
|
|
91
91
|
|
|
92
92
|
def _as_dict(self) -> dict:
|
|
93
93
|
return {'idx_name': self.idx_info.name, **super()._as_dict()}
|
|
94
94
|
|
|
95
95
|
@classmethod
|
|
96
96
|
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
97
|
-
iname = d
|
|
97
|
+
iname = d.get('idx_name')
|
|
98
98
|
assert len(components) == 2
|
|
99
99
|
assert isinstance(components[0], ColumnRef)
|
|
100
100
|
return cls(components[0], components[1], idx_name=iname)
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
import pixeltable.type_system as ts
|
|
5
|
+
from pixeltable import type_system as ts
|
|
7
6
|
|
|
8
7
|
from .expr import DataRow, Expr
|
|
9
8
|
from .literal import Literal
|
pixeltable/exprs/variable.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, NoReturn
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
from pixeltable import type_system as ts
|
|
6
6
|
|
|
7
7
|
from .data_row import DataRow
|
|
8
8
|
from .expr import Expr
|
|
@@ -22,7 +22,7 @@ class Variable(Expr):
|
|
|
22
22
|
self.id = self._create_id()
|
|
23
23
|
|
|
24
24
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
25
|
-
return super()._id_attrs()
|
|
25
|
+
return [*super()._id_attrs(), ('name', self.name)]
|
|
26
26
|
|
|
27
27
|
def default_column_name(self) -> NoReturn:
|
|
28
28
|
raise NotImplementedError()
|
pixeltable/ext/__init__.py
CHANGED
|
@@ -111,10 +111,10 @@ def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: 'Exp') -> Iterato
|
|
|
111
111
|
import torch
|
|
112
112
|
from yolox.data import ValTransform # type: ignore[import-untyped]
|
|
113
113
|
|
|
114
|
-
|
|
114
|
+
val_transform = ValTransform(legacy=False)
|
|
115
115
|
for image in images:
|
|
116
|
-
|
|
117
|
-
image_transform, _ =
|
|
116
|
+
normalized_image = normalize_image_mode(image)
|
|
117
|
+
image_transform, _ = val_transform(np.array(normalized_image), None, exp.test_size)
|
|
118
118
|
yield torch.from_numpy(image_transform)
|
|
119
119
|
|
|
120
120
|
|
pixeltable/func/__init__.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
# ruff: noqa: F401
|
|
2
|
+
|
|
1
3
|
from .aggregate_function import AggregateFunction, Aggregator, uda
|
|
2
4
|
from .callable_function import CallableFunction
|
|
3
5
|
from .expr_template_function import ExprTemplateFunction
|
|
4
|
-
from .function import Function
|
|
6
|
+
from .function import Function, InvalidFunction
|
|
5
7
|
from .function_registry import FunctionRegistry
|
|
6
8
|
from .query_template_function import QueryTemplateFunction, query
|
|
7
9
|
from .signature import Batch, Parameter, Signature
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import inspect
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, overload
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Sequence, overload
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
import pixeltable.type_system as ts
|
|
@@ -16,11 +16,11 @@ if TYPE_CHECKING:
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Aggregator(abc.ABC):
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def update(self, *args: Any, **kwargs: Any) -> None: ...
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
def value(self) -> Any: ...
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class AggregateFunction(Function):
|
|
@@ -32,9 +32,9 @@ class AggregateFunction(Function):
|
|
|
32
32
|
allows_window: if True, the aggregate function can be used with a window
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
|
-
ORDER_BY_PARAM = 'order_by'
|
|
36
|
-
GROUP_BY_PARAM = 'group_by'
|
|
37
|
-
RESERVED_PARAMS = {ORDER_BY_PARAM, GROUP_BY_PARAM}
|
|
35
|
+
ORDER_BY_PARAM: ClassVar[str] = 'order_by'
|
|
36
|
+
GROUP_BY_PARAM: ClassVar[str] = 'group_by'
|
|
37
|
+
RESERVED_PARAMS: ClassVar[set[str]] = {ORDER_BY_PARAM, GROUP_BY_PARAM}
|
|
38
38
|
|
|
39
39
|
agg_classes: list[type[Aggregator]] # classes for each signature, in signature order
|
|
40
40
|
init_param_names: list[list[str]] # names of the __init__ parameters for each signature
|
|
@@ -124,7 +124,7 @@ class AggregateFunction(Function):
|
|
|
124
124
|
)
|
|
125
125
|
for i, p in enumerate(py_init_params)
|
|
126
126
|
]
|
|
127
|
-
duplicate_params =
|
|
127
|
+
duplicate_params = {p.name for p in init_params} & {p.name for p in update_params}
|
|
128
128
|
if len(duplicate_params) > 0:
|
|
129
129
|
raise excs.Error(
|
|
130
130
|
f'__init__() and update() cannot have parameters with the same name: {", ".join(duplicate_params)}'
|
|
@@ -98,11 +98,10 @@ class CallableFunction(Function):
|
|
|
98
98
|
result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
|
|
99
99
|
assert len(result) == 1
|
|
100
100
|
return result[0]
|
|
101
|
+
elif inspect.iscoroutinefunction(self.py_fn):
|
|
102
|
+
return asyncio.run(self.py_fn(*args, **kwargs))
|
|
101
103
|
else:
|
|
102
|
-
|
|
103
|
-
return asyncio.run(self.py_fn(*args, **kwargs))
|
|
104
|
-
else:
|
|
105
|
-
return self.py_fn(*args, **kwargs)
|
|
104
|
+
return self.py_fn(*args, **kwargs)
|
|
106
105
|
|
|
107
106
|
async def aexec_batch(self, *args: Any, **kwargs: Any) -> list:
|
|
108
107
|
"""Execute the function with the given arguments and return the result.
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
from typing import Any, Optional, Sequence
|
|
3
2
|
|
|
4
|
-
import
|
|
5
|
-
import pixeltable.exceptions as excs
|
|
3
|
+
from pixeltable import exceptions as excs, exprs
|
|
6
4
|
|
|
7
5
|
from .function import Function
|
|
8
6
|
from .signature import Signature
|
|
@@ -15,13 +13,11 @@ class ExprTemplate:
|
|
|
15
13
|
`CallableFunction`.)
|
|
16
14
|
"""
|
|
17
15
|
|
|
18
|
-
expr: '
|
|
16
|
+
expr: 'exprs.Expr'
|
|
19
17
|
signature: Signature
|
|
20
|
-
param_exprs: dict[str, '
|
|
21
|
-
|
|
22
|
-
def __init__(self, expr: 'pixeltable.exprs.Expr', signature: Signature):
|
|
23
|
-
from pixeltable import exprs
|
|
18
|
+
param_exprs: dict[str, 'exprs.Variable']
|
|
24
19
|
|
|
20
|
+
def __init__(self, expr: 'exprs.Expr', signature: Signature):
|
|
25
21
|
self.expr = expr
|
|
26
22
|
self.signature = signature
|
|
27
23
|
|
|
@@ -59,9 +55,7 @@ class ExprTemplateFunction(Function):
|
|
|
59
55
|
assert not self.is_polymorphic
|
|
60
56
|
return self.templates[0]
|
|
61
57
|
|
|
62
|
-
def instantiate(self, args: Sequence[Any], kwargs: dict[str, Any]) -> '
|
|
63
|
-
from pixeltable import exprs
|
|
64
|
-
|
|
58
|
+
def instantiate(self, args: Sequence[Any], kwargs: dict[str, Any]) -> 'exprs.Expr':
|
|
65
59
|
assert not self.is_polymorphic
|
|
66
60
|
template = self.template
|
|
67
61
|
bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
|
|
@@ -86,14 +80,12 @@ class ExprTemplateFunction(Function):
|
|
|
86
80
|
return result
|
|
87
81
|
|
|
88
82
|
def _docstring(self) -> Optional[str]:
|
|
89
|
-
from pixeltable import exprs
|
|
90
|
-
|
|
91
83
|
if isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
92
84
|
return self.templates[0].expr.fn._docstring()
|
|
93
85
|
return None
|
|
94
86
|
|
|
95
87
|
def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
|
|
96
|
-
from pixeltable import exec
|
|
88
|
+
from pixeltable import exec
|
|
97
89
|
|
|
98
90
|
assert not self.is_polymorphic
|
|
99
91
|
expr = self.instantiate(args, kwargs)
|
|
@@ -130,7 +122,5 @@ class ExprTemplateFunction(Function):
|
|
|
130
122
|
if 'expr' not in d:
|
|
131
123
|
return super()._from_dict(d)
|
|
132
124
|
assert 'signature' in d and 'name' in d
|
|
133
|
-
import pixeltable.exprs as exprs
|
|
134
|
-
|
|
135
125
|
template = ExprTemplate(exprs.Expr.from_dict(d['expr']), Signature.from_dict(d['signature']))
|
|
136
126
|
return cls([template], name=d['name'])
|
pixeltable/func/function.py
CHANGED
|
@@ -62,7 +62,6 @@ class Function(ABC):
|
|
|
62
62
|
# Check that stored functions cannot be declared using `is_method` or `is_property`:
|
|
63
63
|
assert not ((is_method or is_property) and self_path is None)
|
|
64
64
|
assert isinstance(signatures, list)
|
|
65
|
-
assert len(signatures) > 0
|
|
66
65
|
self.signatures = signatures
|
|
67
66
|
self.self_path = self_path # fully-qualified path to self
|
|
68
67
|
self.is_method = is_method
|
|
@@ -72,6 +71,10 @@ class Function(ABC):
|
|
|
72
71
|
self._to_sql = self.__default_to_sql
|
|
73
72
|
self._resource_pool = self.__default_resource_pool
|
|
74
73
|
|
|
74
|
+
@property
|
|
75
|
+
def is_valid(self) -> bool:
|
|
76
|
+
return len(self.signatures) > 0
|
|
77
|
+
|
|
75
78
|
@property
|
|
76
79
|
def name(self) -> str:
|
|
77
80
|
assert self.self_path is not None
|
|
@@ -243,7 +246,7 @@ class Function(ABC):
|
|
|
243
246
|
# `None` when any of its non-nullable inputs are `None`.
|
|
244
247
|
for arg_name, arg in bound_args.items():
|
|
245
248
|
param = self.signature.parameters[arg_name]
|
|
246
|
-
if param.kind in
|
|
249
|
+
if param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
|
|
247
250
|
continue
|
|
248
251
|
if arg.col_type.nullable and not param.col_type.nullable:
|
|
249
252
|
return_type = return_type.copy(nullable=True)
|
|
@@ -301,13 +304,12 @@ class Function(ABC):
|
|
|
301
304
|
callable_args[param.name] = arg
|
|
302
305
|
else:
|
|
303
306
|
return None
|
|
304
|
-
|
|
307
|
+
elif isinstance(arg, exprs.Literal):
|
|
305
308
|
# The callable is expecting `param.name` to be a constant Python value. Unpack a Literal if we find
|
|
306
309
|
# one; otherwise return None.
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
return None
|
|
310
|
+
callable_args[param.name] = arg.val
|
|
311
|
+
else:
|
|
312
|
+
return None
|
|
311
313
|
|
|
312
314
|
return callable_args
|
|
313
315
|
|
|
@@ -383,10 +385,10 @@ class Function(ABC):
|
|
|
383
385
|
else:
|
|
384
386
|
var = exprs.Variable(name, param.col_type)
|
|
385
387
|
bindings[name] = var
|
|
386
|
-
if args_ok and param.kind in
|
|
388
|
+
if args_ok and param.kind in {
|
|
387
389
|
inspect.Parameter.POSITIONAL_ONLY,
|
|
388
390
|
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
389
|
-
|
|
391
|
+
}:
|
|
390
392
|
template_args.append(var)
|
|
391
393
|
else:
|
|
392
394
|
template_kwargs[name] = var
|
|
@@ -433,6 +435,9 @@ class Function(ABC):
|
|
|
433
435
|
return False
|
|
434
436
|
return self.self_path == other.self_path
|
|
435
437
|
|
|
438
|
+
def __hash__(self) -> int:
|
|
439
|
+
return hash(self.self_path)
|
|
440
|
+
|
|
436
441
|
def source(self) -> None:
|
|
437
442
|
"""Print source code"""
|
|
438
443
|
print('source not available')
|
|
@@ -468,11 +473,18 @@ class Function(ABC):
|
|
|
468
473
|
@classmethod
|
|
469
474
|
def _from_dict(cls, d: dict) -> Function:
|
|
470
475
|
"""Default deserialization: load the symbol indicated by the stored symbol_path"""
|
|
471
|
-
|
|
472
|
-
assert
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
+
path = d.get('path')
|
|
477
|
+
assert path is not None
|
|
478
|
+
try:
|
|
479
|
+
instance = resolve_symbol(path)
|
|
480
|
+
if isinstance(instance, Function):
|
|
481
|
+
return instance
|
|
482
|
+
else:
|
|
483
|
+
return InvalidFunction(
|
|
484
|
+
path, d, f'the symbol {path!r} is no longer a UDF. (Was the `@pxt.udf` decorator removed?)'
|
|
485
|
+
)
|
|
486
|
+
except (AttributeError, ImportError):
|
|
487
|
+
return InvalidFunction(path, d, f'the symbol {path!r} no longer exists. (Was the UDF moved or renamed?)')
|
|
476
488
|
|
|
477
489
|
def to_store(self) -> tuple[dict, bytes]:
|
|
478
490
|
"""
|
|
@@ -490,3 +502,25 @@ class Function(ABC):
|
|
|
490
502
|
Create a Function instance from the serialized representation returned by to_store()
|
|
491
503
|
"""
|
|
492
504
|
raise NotImplementedError()
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
class InvalidFunction(Function):
|
|
508
|
+
fn_dict: dict[str, Any]
|
|
509
|
+
errormsg: str
|
|
510
|
+
|
|
511
|
+
def __init__(self, self_path: str, fn_dict: dict[str, Any], errormsg: str):
|
|
512
|
+
super().__init__([], self_path)
|
|
513
|
+
self.fn_dict = fn_dict
|
|
514
|
+
self.errormsg = errormsg
|
|
515
|
+
|
|
516
|
+
def _as_dict(self) -> dict:
|
|
517
|
+
"""
|
|
518
|
+
Here we write out (verbatim) the original metadata that failed to load (and that resulted in the
|
|
519
|
+
InvalidFunction). Note that the InvalidFunction itself is never serlialized, so there is no corresponding
|
|
520
|
+
from_dict() method.
|
|
521
|
+
"""
|
|
522
|
+
return self.fn_dict
|
|
523
|
+
|
|
524
|
+
@property
|
|
525
|
+
def is_async(self) -> bool:
|
|
526
|
+
return False
|
|
@@ -9,9 +9,7 @@ from uuid import UUID
|
|
|
9
9
|
|
|
10
10
|
import sqlalchemy as sql
|
|
11
11
|
|
|
12
|
-
import
|
|
13
|
-
import pixeltable.exceptions as excs
|
|
14
|
-
import pixeltable.type_system as ts
|
|
12
|
+
from pixeltable import env, exceptions as excs, type_system as ts
|
|
15
13
|
from pixeltable.metadata import schema
|
|
16
14
|
|
|
17
15
|
from .function import Function
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, overload
|
|
5
5
|
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
import pixeltable.exceptions as excs
|
|
9
|
-
import pixeltable.type_system as ts
|
|
10
|
-
from pixeltable import exprs
|
|
6
|
+
from pixeltable import exprs, type_system as ts
|
|
11
7
|
|
|
12
8
|
from .function import Function
|
|
13
9
|
from .signature import Signature
|
|
@@ -21,7 +17,7 @@ class QueryTemplateFunction(Function):
|
|
|
21
17
|
|
|
22
18
|
template_df: Optional['DataFrame']
|
|
23
19
|
self_name: Optional[str]
|
|
24
|
-
conn: Optional[sql.engine.Connection]
|
|
20
|
+
# conn: Optional[sql.engine.Connection]
|
|
25
21
|
defaults: dict[str, exprs.Literal]
|
|
26
22
|
|
|
27
23
|
@classmethod
|
|
@@ -53,7 +49,7 @@ class QueryTemplateFunction(Function):
|
|
|
53
49
|
# if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
|
|
54
50
|
# we end up with a deadlock
|
|
55
51
|
# TODO: figure out a more general way to make execution state available
|
|
56
|
-
self.conn = None
|
|
52
|
+
# self.conn = None
|
|
57
53
|
|
|
58
54
|
# convert defaults to Literals
|
|
59
55
|
self.defaults = {} # key: param name, value: default value converted to a Literal
|
|
@@ -67,9 +63,6 @@ class QueryTemplateFunction(Function):
|
|
|
67
63
|
def _update_as_overload_resolution(self, signature_idx: int) -> None:
|
|
68
64
|
pass # only one signature supported for QueryTemplateFunction
|
|
69
65
|
|
|
70
|
-
def set_conn(self, conn: Optional[sql.engine.Connection]) -> None:
|
|
71
|
-
self.conn = conn
|
|
72
|
-
|
|
73
66
|
@property
|
|
74
67
|
def is_async(self) -> bool:
|
|
75
68
|
return True
|
|
@@ -82,7 +75,7 @@ class QueryTemplateFunction(Function):
|
|
|
82
75
|
{param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args}
|
|
83
76
|
)
|
|
84
77
|
bound_df = self.template_df.bind(bound_args)
|
|
85
|
-
result = await bound_df._acollect(
|
|
78
|
+
result = await bound_df._acollect()
|
|
86
79
|
return list(result)
|
|
87
80
|
|
|
88
81
|
@property
|
pixeltable/func/signature.py
CHANGED
|
@@ -2,10 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import inspect
|
|
5
|
-
import json
|
|
6
5
|
import logging
|
|
7
6
|
import typing
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional
|
|
9
8
|
|
|
10
9
|
import pixeltable.exceptions as excs
|
|
11
10
|
import pixeltable.type_system as ts
|
|
@@ -69,6 +68,9 @@ class Parameter:
|
|
|
69
68
|
py_default = self.default.val if self.default is not None else inspect.Parameter.empty
|
|
70
69
|
return inspect.Parameter(self.name, self.kind, default=py_default)
|
|
71
70
|
|
|
71
|
+
def __hash__(self) -> int:
|
|
72
|
+
return hash((self.name, self.col_type, self.kind, self.default, self.is_batched))
|
|
73
|
+
|
|
72
74
|
|
|
73
75
|
T = typing.TypeVar('T')
|
|
74
76
|
Batch = typing.Annotated[list[T], 'pxt-batch']
|
|
@@ -81,7 +83,7 @@ class Signature:
|
|
|
81
83
|
- self.is_batched: return type is a Batch[...] type
|
|
82
84
|
"""
|
|
83
85
|
|
|
84
|
-
SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
|
|
86
|
+
SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
|
|
85
87
|
|
|
86
88
|
def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
|
|
87
89
|
assert isinstance(return_type, ts.ColumnType)
|
|
@@ -135,26 +137,28 @@ class Signature:
|
|
|
135
137
|
if (
|
|
136
138
|
param.kind != other_param.kind
|
|
137
139
|
or (param.col_type is None) != (other_param.col_type is None) # this can happen if they are varargs
|
|
138
|
-
or
|
|
139
|
-
|
|
140
|
+
or (
|
|
141
|
+
param.col_type is not None
|
|
142
|
+
and not other_param.col_type.is_supertype_of(param.col_type, ignore_nullable=True)
|
|
143
|
+
)
|
|
140
144
|
):
|
|
141
145
|
return False
|
|
142
146
|
|
|
143
147
|
# Check (iii)
|
|
144
|
-
for other_param in other.required_parameters:
|
|
148
|
+
for other_param in other.required_parameters: # noqa: SIM110
|
|
145
149
|
if other_param.name not in self.parameters:
|
|
146
150
|
return False
|
|
147
151
|
|
|
148
152
|
return True
|
|
149
153
|
|
|
150
154
|
def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
|
|
151
|
-
if context
|
|
155
|
+
if context:
|
|
152
156
|
context = f' ({context})'
|
|
153
157
|
|
|
154
158
|
for param_name, arg in bound_args.items():
|
|
155
159
|
assert param_name in self.parameters
|
|
156
160
|
param = self.parameters[param_name]
|
|
157
|
-
is_var_param = param.kind in
|
|
161
|
+
is_var_param = param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
|
|
158
162
|
if is_var_param:
|
|
159
163
|
continue
|
|
160
164
|
assert param.col_type is not None
|
|
@@ -191,6 +195,9 @@ class Signature:
|
|
|
191
195
|
return False
|
|
192
196
|
return True
|
|
193
197
|
|
|
198
|
+
def __hash__(self) -> int:
|
|
199
|
+
return hash((self.return_type, self.parameters))
|
|
200
|
+
|
|
194
201
|
def __str__(self) -> str:
|
|
195
202
|
param_strs: list[str] = []
|
|
196
203
|
for p in self.parameters.values():
|
|
@@ -199,8 +206,8 @@ class Signature:
|
|
|
199
206
|
elif p.kind == inspect.Parameter.VAR_KEYWORD:
|
|
200
207
|
param_strs.append(f'**{p.name}')
|
|
201
208
|
else:
|
|
202
|
-
param_strs.append(f'{p.name}: {
|
|
203
|
-
return f'({", ".join(param_strs)}) -> {
|
|
209
|
+
param_strs.append(f'{p.name}: {p.col_type}')
|
|
210
|
+
return f'({", ".join(param_strs)}) -> {self.get_return_type()}'
|
|
204
211
|
|
|
205
212
|
@classmethod
|
|
206
213
|
def _infer_type(cls, annotation: Optional[type]) -> tuple[Optional[ts.ColumnType], Optional[bool]]:
|
|
@@ -213,7 +220,7 @@ class Signature:
|
|
|
213
220
|
type_args = typing.get_args(annotation)
|
|
214
221
|
if len(type_args) == 2 and type_args[1] == 'pxt-batch':
|
|
215
222
|
# this is our Batch
|
|
216
|
-
assert typing.get_origin(type_args[0])
|
|
223
|
+
assert typing.get_origin(type_args[0]) is list
|
|
217
224
|
is_batched = True
|
|
218
225
|
py_type = typing.get_args(type_args[0])[0]
|
|
219
226
|
if py_type is None:
|
|
@@ -246,7 +253,7 @@ class Signature:
|
|
|
246
253
|
continue # skip 'self' or 'cls' parameter
|
|
247
254
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
248
255
|
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
249
|
-
if param.kind
|
|
256
|
+
if param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
|
|
250
257
|
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
251
258
|
continue
|
|
252
259
|
|
|
@@ -257,11 +264,8 @@ class Signature:
|
|
|
257
264
|
param_type = param_types[idx]
|
|
258
265
|
is_batched = False
|
|
259
266
|
else:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
py_type = type_substitutions[param.annotation]
|
|
263
|
-
else:
|
|
264
|
-
py_type = param.annotation
|
|
267
|
+
# Look up the substitution for param.annotation, defaulting to param.annotation if there is none
|
|
268
|
+
py_type = type_substitutions.get(param.annotation, param.annotation)
|
|
265
269
|
param_type, is_batched = cls._infer_type(py_type)
|
|
266
270
|
if param_type is None:
|
|
267
271
|
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
|
|
@@ -297,11 +301,8 @@ class Signature:
|
|
|
297
301
|
)
|
|
298
302
|
sig = inspect.signature(py_fn)
|
|
299
303
|
if return_type is None:
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
py_type = type_substitutions[sig.return_annotation]
|
|
303
|
-
else:
|
|
304
|
-
py_type = sig.return_annotation
|
|
304
|
+
# Look up the substitution for sig.return_annotation, defaulting to return_annotation if there is none
|
|
305
|
+
py_type = type_substitutions.get(sig.return_annotation, sig.return_annotation)
|
|
305
306
|
return_type, return_is_batched = cls._infer_type(py_type)
|
|
306
307
|
if return_type is None:
|
|
307
308
|
raise excs.Error('Cannot infer pixeltable return type')
|
pixeltable/func/tools.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
@@ -69,7 +69,7 @@ class Tool(pydantic.BaseModel):
|
|
|
69
69
|
return _extract_float_tool_arg(kwargs, param_name=param.name)
|
|
70
70
|
if param.col_type.is_bool_type():
|
|
71
71
|
return _extract_bool_tool_arg(kwargs, param_name=param.name)
|
|
72
|
-
|
|
72
|
+
raise AssertionError()
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class ToolChoice(pydantic.BaseModel):
|
|
@@ -113,7 +113,7 @@ class Tools(pydantic.BaseModel):
|
|
|
113
113
|
)
|
|
114
114
|
tool_name = tool_obj.name or tool_obj.fn.name
|
|
115
115
|
except StopIteration:
|
|
116
|
-
raise excs.Error(f'That tool is not in the specified list of tools: {tool}')
|
|
116
|
+
raise excs.Error(f'That tool is not in the specified list of tools: {tool}') from None
|
|
117
117
|
return ToolChoice(auto=auto, required=required, tool=tool_name, parallel_tool_calls=parallel_tool_calls)
|
|
118
118
|
|
|
119
119
|
|
pixeltable/func/udf.py
CHANGED
|
@@ -146,7 +146,8 @@ def make_function(
|
|
|
146
146
|
raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
|
|
147
147
|
if is_property and len(sig.parameters) != 1:
|
|
148
148
|
raise excs.Error(
|
|
149
|
-
|
|
149
|
+
'`is_property=True` expects a UDF with exactly 1 parameter, but '
|
|
150
|
+
f'`{function_name}` has {len(sig.parameters)}'
|
|
150
151
|
)
|
|
151
152
|
if (is_method or is_property) and function_path is None:
|
|
152
153
|
raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
|
|
@@ -205,6 +206,8 @@ def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[
|
|
|
205
206
|
|
|
206
207
|
def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
207
208
|
def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
|
|
209
|
+
from pixeltable import exprs
|
|
210
|
+
|
|
208
211
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
209
212
|
# this is a named function in a module
|
|
210
213
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -216,7 +219,6 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
216
219
|
|
|
217
220
|
# construct Signature from the function signature
|
|
218
221
|
sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
|
|
219
|
-
import pixeltable.exprs as exprs
|
|
220
222
|
|
|
221
223
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
|
|
222
224
|
# call the function with the parameter expressions to construct an Expr with parameters
|
|
@@ -260,7 +262,7 @@ def from_table(
|
|
|
260
262
|
"""
|
|
261
263
|
from pixeltable import exprs
|
|
262
264
|
|
|
263
|
-
ancestors = [tbl
|
|
265
|
+
ancestors = [tbl, *tbl._bases]
|
|
264
266
|
ancestors.reverse() # We must traverse the ancestors in order from base to derived
|
|
265
267
|
|
|
266
268
|
subst: dict[exprs.Expr, exprs.Expr] = {}
|
|
@@ -268,7 +270,7 @@ def from_table(
|
|
|
268
270
|
params: list[Parameter] = []
|
|
269
271
|
|
|
270
272
|
for t in ancestors:
|
|
271
|
-
for name, col in t._tbl_version.cols_by_name.items():
|
|
273
|
+
for name, col in t._tbl_version.get().cols_by_name.items():
|
|
272
274
|
assert name not in result_dict, f'Column name is not unique: {name}'
|
|
273
275
|
if col.is_computed:
|
|
274
276
|
# Computed column. Apply any existing substitutions and add the new expression to the subst dict.
|