pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/func/signature.py
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
-
import enum
|
|
5
4
|
import inspect
|
|
6
|
-
import json
|
|
7
5
|
import logging
|
|
8
6
|
import typing
|
|
9
|
-
from typing import Any, Callable,
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
|
10
8
|
|
|
11
9
|
import pixeltable.exceptions as excs
|
|
12
10
|
import pixeltable.type_system as ts
|
|
13
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from pixeltable import exprs
|
|
14
|
+
|
|
14
15
|
_logger = logging.getLogger('pixeltable')
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
@dataclasses.dataclass
|
|
18
19
|
class Parameter:
|
|
19
20
|
name: str
|
|
20
|
-
col_type:
|
|
21
|
+
col_type: ts.ColumnType | None # None for variable parameters
|
|
21
22
|
kind: inspect._ParameterKind
|
|
22
23
|
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
23
24
|
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
24
|
-
default:
|
|
25
|
+
default: 'exprs.Literal' | None = None # default value for the parameter
|
|
25
26
|
is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
|
|
26
27
|
|
|
27
28
|
def __post_init__(self) -> None:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
|
|
29
|
+
from pixeltable import exprs
|
|
30
|
+
|
|
31
|
+
if self.default is not None:
|
|
32
|
+
if self.col_type is None:
|
|
33
|
+
raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
|
|
34
|
+
if not isinstance(self.default, exprs.Literal):
|
|
35
|
+
raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
|
|
36
|
+
if not self.col_type.is_supertype_of(self.default.col_type):
|
|
37
|
+
raise excs.Error(
|
|
38
|
+
f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
|
|
39
|
+
)
|
|
40
40
|
|
|
41
41
|
def has_default(self) -> bool:
|
|
42
|
-
return self.default is not
|
|
42
|
+
return self.default is not None
|
|
43
43
|
|
|
44
44
|
def as_dict(self) -> dict[str, Any]:
|
|
45
45
|
return {
|
|
@@ -47,27 +47,29 @@ class Parameter:
|
|
|
47
47
|
'col_type': self.col_type.as_dict() if self.col_type is not None else None,
|
|
48
48
|
'kind': self.kind.name,
|
|
49
49
|
'is_batched': self.is_batched,
|
|
50
|
-
'
|
|
51
|
-
'default': self.default if self.has_default() else None,
|
|
50
|
+
'default': None if self.default is None else self.default.as_dict(),
|
|
52
51
|
}
|
|
53
52
|
|
|
54
53
|
@classmethod
|
|
55
54
|
def from_dict(cls, d: dict[str, Any]) -> Parameter:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
else
|
|
60
|
-
default = inspect.Parameter.empty
|
|
55
|
+
from pixeltable import exprs
|
|
56
|
+
|
|
57
|
+
assert d['default'] is None or isinstance(d['default'], dict), d
|
|
58
|
+
default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
|
|
61
59
|
return cls(
|
|
62
60
|
name=d['name'],
|
|
63
61
|
col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
|
|
64
62
|
kind=getattr(inspect.Parameter, d['kind']),
|
|
65
63
|
is_batched=d['is_batched'],
|
|
66
|
-
default=default
|
|
64
|
+
default=default,
|
|
67
65
|
)
|
|
68
66
|
|
|
69
67
|
def to_py_param(self) -> inspect.Parameter:
|
|
70
|
-
|
|
68
|
+
py_default = self.default.val if self.default is not None else inspect.Parameter.empty
|
|
69
|
+
return inspect.Parameter(self.name, self.kind, default=py_default)
|
|
70
|
+
|
|
71
|
+
def __hash__(self) -> int:
|
|
72
|
+
return hash((self.name, self.col_type, self.kind, self.default, self.is_batched))
|
|
71
73
|
|
|
72
74
|
|
|
73
75
|
T = typing.TypeVar('T')
|
|
@@ -80,9 +82,30 @@ class Signature:
|
|
|
80
82
|
|
|
81
83
|
- self.is_batched: return type is a Batch[...] type
|
|
82
84
|
"""
|
|
83
|
-
SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
|
|
84
85
|
|
|
85
|
-
|
|
86
|
+
SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
|
|
87
|
+
SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
|
|
88
|
+
|
|
89
|
+
return_type: ts.ColumnType
|
|
90
|
+
is_batched: bool
|
|
91
|
+
parameters: dict[str, Parameter] # name -> Parameter
|
|
92
|
+
parameters_by_pos: list[Parameter] # ordered by position in the signature
|
|
93
|
+
constant_parameters: list[Parameter] # parameters that are not batched
|
|
94
|
+
batched_parameters: list[Parameter] # parameters that are batched
|
|
95
|
+
required_parameters: list[Parameter] # parameters that do not have a default value
|
|
96
|
+
|
|
97
|
+
# the names of recognized system parameters in the signature; these are excluded from self.parameters
|
|
98
|
+
system_parameters: list[str]
|
|
99
|
+
|
|
100
|
+
py_signature: inspect.Signature
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
return_type: ts.ColumnType,
|
|
105
|
+
parameters: list[Parameter],
|
|
106
|
+
is_batched: bool = False,
|
|
107
|
+
system_parameters: list[str] | None = None,
|
|
108
|
+
):
|
|
86
109
|
assert isinstance(return_type, ts.ColumnType)
|
|
87
110
|
self.return_type = return_type
|
|
88
111
|
self.is_batched = is_batched
|
|
@@ -92,6 +115,7 @@ class Signature:
|
|
|
92
115
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
93
116
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
94
117
|
self.required_parameters = [p for p in parameters if not p.has_default()]
|
|
118
|
+
self.system_parameters = system_parameters if system_parameters is not None else []
|
|
95
119
|
self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
|
|
96
120
|
|
|
97
121
|
def get_return_type(self) -> ts.ColumnType:
|
|
@@ -111,6 +135,74 @@ class Signature:
|
|
|
111
135
|
parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
|
|
112
136
|
return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
|
|
113
137
|
|
|
138
|
+
def is_consistent_with(self, other: Signature) -> bool:
|
|
139
|
+
"""
|
|
140
|
+
Returns True if this signature is consistent with the other signature.
|
|
141
|
+
S is consistent with T if we could safely replace S by T in any call where S is used. Specifically:
|
|
142
|
+
(i) S.return_type is a supertype of T.return_type
|
|
143
|
+
(ii) For each parameter p in S, there is a parameter q in T such that:
|
|
144
|
+
- p and q have the same name and kind
|
|
145
|
+
- q.col_type is a supertype of p.col_type
|
|
146
|
+
(iii) For each *required* parameter q in T, there is a parameter p in S with the same name (in which
|
|
147
|
+
case the kinds and types must also match, by condition (ii)).
|
|
148
|
+
"""
|
|
149
|
+
# Check (i)
|
|
150
|
+
if not self.get_return_type().is_supertype_of(other.get_return_type(), ignore_nullable=True):
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
# Check (ii)
|
|
154
|
+
for param_name, param in self.parameters.items():
|
|
155
|
+
if param_name not in other.parameters:
|
|
156
|
+
return False
|
|
157
|
+
other_param = other.parameters[param_name]
|
|
158
|
+
if (
|
|
159
|
+
param.kind != other_param.kind
|
|
160
|
+
or (param.col_type is None) != (other_param.col_type is None) # this can happen if they are varargs
|
|
161
|
+
or (
|
|
162
|
+
param.col_type is not None
|
|
163
|
+
and not other_param.col_type.is_supertype_of(param.col_type, ignore_nullable=True)
|
|
164
|
+
)
|
|
165
|
+
):
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
# Check (iii)
|
|
169
|
+
for other_param in other.required_parameters: # noqa: SIM110
|
|
170
|
+
if other_param.name not in self.parameters:
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
def validate_args(self, bound_args: dict[str, 'exprs.Expr' | None], context: str = '') -> None:
|
|
176
|
+
if context:
|
|
177
|
+
context = f' ({context})'
|
|
178
|
+
|
|
179
|
+
for param_name, arg in bound_args.items():
|
|
180
|
+
assert param_name in self.parameters, f'{param_name!r} not in {list(self.parameters.keys())}'
|
|
181
|
+
param = self.parameters[param_name]
|
|
182
|
+
is_var_param = param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
|
|
183
|
+
if is_var_param:
|
|
184
|
+
continue
|
|
185
|
+
assert param.col_type is not None
|
|
186
|
+
|
|
187
|
+
if arg is None:
|
|
188
|
+
raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
|
|
189
|
+
|
|
190
|
+
# Check that the argument is consistent with the expected parameter type, with the allowance that
|
|
191
|
+
# non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
|
|
192
|
+
# detects the Nones and skips evaluation).
|
|
193
|
+
if not (
|
|
194
|
+
param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
|
|
195
|
+
# TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
|
|
196
|
+
# types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
|
|
197
|
+
# (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
|
|
198
|
+
# We need to think through the right way to handle this scenario.
|
|
199
|
+
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
200
|
+
):
|
|
201
|
+
raise excs.Error(
|
|
202
|
+
f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
|
|
203
|
+
f' match parameter type {param.col_type}'
|
|
204
|
+
)
|
|
205
|
+
|
|
114
206
|
def __eq__(self, other: object) -> bool:
|
|
115
207
|
if not isinstance(other, Signature):
|
|
116
208
|
return False
|
|
@@ -124,6 +216,9 @@ class Signature:
|
|
|
124
216
|
return False
|
|
125
217
|
return True
|
|
126
218
|
|
|
219
|
+
def __hash__(self) -> int:
|
|
220
|
+
return hash((self.return_type, self.parameters))
|
|
221
|
+
|
|
127
222
|
def __str__(self) -> str:
|
|
128
223
|
param_strs: list[str] = []
|
|
129
224
|
for p in self.parameters.values():
|
|
@@ -132,21 +227,21 @@ class Signature:
|
|
|
132
227
|
elif p.kind == inspect.Parameter.VAR_KEYWORD:
|
|
133
228
|
param_strs.append(f'**{p.name}')
|
|
134
229
|
else:
|
|
135
|
-
param_strs.append(f'{p.name}: {
|
|
136
|
-
return f'({", ".join(param_strs)}) -> {
|
|
230
|
+
param_strs.append(f'{p.name}: pxt.{p.col_type}')
|
|
231
|
+
return f'({", ".join(param_strs)}) -> pxt.{self.get_return_type()}'
|
|
137
232
|
|
|
138
233
|
@classmethod
|
|
139
|
-
def _infer_type(cls, annotation:
|
|
234
|
+
def _infer_type(cls, annotation: type | None) -> tuple[ts.ColumnType | None, bool | None]:
|
|
140
235
|
"""Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
|
|
141
236
|
if annotation is None:
|
|
142
237
|
return (None, None)
|
|
143
|
-
py_type:
|
|
238
|
+
py_type: type | None = None
|
|
144
239
|
is_batched = False
|
|
145
240
|
if typing.get_origin(annotation) == typing.Annotated:
|
|
146
241
|
type_args = typing.get_args(annotation)
|
|
147
242
|
if len(type_args) == 2 and type_args[1] == 'pxt-batch':
|
|
148
243
|
# this is our Batch
|
|
149
|
-
assert typing.get_origin(type_args[0])
|
|
244
|
+
assert typing.get_origin(type_args[0]) is list
|
|
150
245
|
is_batched = True
|
|
151
246
|
py_type = typing.get_args(type_args[0])[0]
|
|
152
247
|
if py_type is None:
|
|
@@ -156,55 +251,89 @@ class Signature:
|
|
|
156
251
|
|
|
157
252
|
@classmethod
|
|
158
253
|
def create_parameters(
|
|
159
|
-
|
|
160
|
-
|
|
254
|
+
cls,
|
|
255
|
+
py_fn: Callable | None = None,
|
|
256
|
+
py_params: list[inspect.Parameter] | None = None,
|
|
257
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
258
|
+
type_substitutions: dict | None = None,
|
|
259
|
+
is_cls_method: bool = False,
|
|
161
260
|
) -> list[Parameter]:
|
|
261
|
+
"""Ignores parameters starting with '_'."""
|
|
262
|
+
from pixeltable import exprs
|
|
263
|
+
|
|
162
264
|
assert (py_fn is None) != (py_params is None)
|
|
163
265
|
if py_fn is not None:
|
|
164
266
|
sig = inspect.signature(py_fn)
|
|
165
267
|
py_params = list(sig.parameters.values())
|
|
166
268
|
parameters: list[Parameter] = []
|
|
167
269
|
|
|
270
|
+
if type_substitutions is None:
|
|
271
|
+
type_substitutions = {}
|
|
272
|
+
|
|
168
273
|
for idx, param in enumerate(py_params):
|
|
274
|
+
if is_cls_method and idx == 0:
|
|
275
|
+
continue # skip 'self' or 'cls' parameter
|
|
276
|
+
if param.name in cls.SYSTEM_PARAM_NAMES:
|
|
277
|
+
continue # skip system parameters
|
|
278
|
+
if param.name.startswith('_'):
|
|
279
|
+
raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
|
|
169
280
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
170
|
-
raise excs.Error(f
|
|
171
|
-
if param.kind
|
|
281
|
+
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
282
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
172
283
|
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
173
284
|
continue
|
|
174
285
|
|
|
175
286
|
# check non-var parameters for name collisions and default value compatibility
|
|
176
287
|
if param_types is not None:
|
|
177
288
|
if idx >= len(param_types):
|
|
178
|
-
raise excs.Error(f'Missing type for parameter {param.name}')
|
|
289
|
+
raise excs.Error(f'Missing type for parameter {param.name!r}')
|
|
179
290
|
param_type = param_types[idx]
|
|
180
291
|
is_batched = False
|
|
181
292
|
else:
|
|
182
|
-
|
|
293
|
+
# Look up the substitution for param.annotation, defaulting to param.annotation if there is none
|
|
294
|
+
py_type = type_substitutions.get(param.annotation, param.annotation)
|
|
295
|
+
param_type, is_batched = cls._infer_type(py_type)
|
|
183
296
|
if param_type is None:
|
|
184
|
-
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
|
|
297
|
+
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
|
|
185
298
|
|
|
186
|
-
|
|
187
|
-
|
|
299
|
+
default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
|
|
300
|
+
if not (default is None or isinstance(default, exprs.Literal)):
|
|
301
|
+
raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
|
|
302
|
+
|
|
303
|
+
parameters.append(
|
|
304
|
+
Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
|
|
305
|
+
)
|
|
188
306
|
|
|
189
307
|
return parameters
|
|
190
308
|
|
|
191
309
|
@classmethod
|
|
192
310
|
def create(
|
|
193
|
-
cls,
|
|
194
|
-
|
|
195
|
-
|
|
311
|
+
cls,
|
|
312
|
+
py_fn: Callable,
|
|
313
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
314
|
+
return_type: ts.ColumnType | None = None,
|
|
315
|
+
type_substitutions: dict | None = None,
|
|
316
|
+
is_cls_method: bool = False,
|
|
196
317
|
) -> Signature:
|
|
197
318
|
"""Create a signature for the given Callable.
|
|
198
319
|
Infer the parameter and return types, if none are specified.
|
|
199
320
|
Raises an exception if the types cannot be inferred.
|
|
200
321
|
"""
|
|
201
|
-
|
|
322
|
+
if type_substitutions is None:
|
|
323
|
+
type_substitutions = {}
|
|
324
|
+
|
|
325
|
+
parameters = cls.create_parameters(
|
|
326
|
+
py_fn=py_fn, param_types=param_types, is_cls_method=is_cls_method, type_substitutions=type_substitutions
|
|
327
|
+
)
|
|
202
328
|
sig = inspect.signature(py_fn)
|
|
203
329
|
if return_type is None:
|
|
204
|
-
|
|
330
|
+
# Look up the substitution for sig.return_annotation, defaulting to return_annotation if there is none
|
|
331
|
+
py_type = type_substitutions.get(sig.return_annotation, sig.return_annotation)
|
|
332
|
+
return_type, return_is_batched = cls._infer_type(py_type)
|
|
205
333
|
if return_type is None:
|
|
206
334
|
raise excs.Error('Cannot infer pixeltable return type')
|
|
207
335
|
else:
|
|
208
336
|
_, return_is_batched = cls._infer_type(sig.return_annotation)
|
|
337
|
+
system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
|
|
209
338
|
|
|
210
|
-
return Signature(return_type, parameters, return_is_batched)
|
|
339
|
+
return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
|
pixeltable/func/tools.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, TypeVar
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
|
|
7
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
8
|
+
|
|
9
|
+
from .function import Function
|
|
10
|
+
from .signature import Parameter
|
|
11
|
+
from .udf import udf
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pixeltable import exprs
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# The Tool and Tools classes are containers that hold Pixeltable UDFs and related metadata, so that they can be
|
|
18
|
+
# realized as LLM tools. They are implemented as Pydantic models in order to provide a canonical way of converting
|
|
19
|
+
# to JSON, via the Pydantic `model_serializer` interface. In this way, they can be passed directly as UDF
|
|
20
|
+
# parameters as described in the `pixeltable.tools` and `pixeltable.tool` docstrings.
|
|
21
|
+
#
|
|
22
|
+
# (The dataclass dict serializer is insufficiently flexible for this purpose: `Tool` contains a member of type
|
|
23
|
+
# `Function`, which is not natively JSON-serializable; Pydantic provides a way of customizing its default
|
|
24
|
+
# serialization behavior, whereas dataclasses do not.)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Tool(pydantic.BaseModel):
|
|
28
|
+
# Allow arbitrary types so that we can include a Pixeltable function in the schema.
|
|
29
|
+
# We will implement a model_serializer to ensure the Tool model can be serialized.
|
|
30
|
+
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
|
31
|
+
|
|
32
|
+
fn: Function
|
|
33
|
+
name: str | None = None
|
|
34
|
+
description: str | None = None
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def parameters(self) -> dict[str, Parameter]:
|
|
38
|
+
return self.fn.signature.parameters
|
|
39
|
+
|
|
40
|
+
@pydantic.model_serializer
|
|
41
|
+
def ser_model(self) -> dict[str, Any]:
|
|
42
|
+
return {
|
|
43
|
+
'name': self.name or self.fn.name,
|
|
44
|
+
'description': self.description or self.fn.comment(),
|
|
45
|
+
'parameters': {
|
|
46
|
+
'type': 'object',
|
|
47
|
+
'properties': {param.name: param.col_type._to_json_schema() for param in self.parameters.values()},
|
|
48
|
+
},
|
|
49
|
+
'required': [param.name for param in self.parameters.values() if not param.col_type.nullable],
|
|
50
|
+
'additionalProperties': False, # TODO Handle kwargs?
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# The output of `tool_calls` must be a dict in standardized tool invocation format:
|
|
54
|
+
# {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
|
|
55
|
+
def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
|
|
56
|
+
import pixeltable.functions as pxtf
|
|
57
|
+
|
|
58
|
+
func_name = self.name or self.fn.name
|
|
59
|
+
return pxtf.map(tool_calls[func_name]['*'], lambda x: self.__invoke_kwargs(x.args))
|
|
60
|
+
|
|
61
|
+
def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
62
|
+
kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
|
|
63
|
+
return self.fn(**kwargs)
|
|
64
|
+
|
|
65
|
+
def __extract_tool_arg(self, param: Parameter, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
66
|
+
if param.col_type.is_string_type():
|
|
67
|
+
return _extract_str_tool_arg(kwargs, param_name=param.name)
|
|
68
|
+
if param.col_type.is_int_type():
|
|
69
|
+
return _extract_int_tool_arg(kwargs, param_name=param.name)
|
|
70
|
+
if param.col_type.is_float_type():
|
|
71
|
+
return _extract_float_tool_arg(kwargs, param_name=param.name)
|
|
72
|
+
if param.col_type.is_bool_type():
|
|
73
|
+
return _extract_bool_tool_arg(kwargs, param_name=param.name)
|
|
74
|
+
if param.col_type.is_json_type():
|
|
75
|
+
return _extract_json_tool_arg(kwargs, param_name=param.name)
|
|
76
|
+
if param.col_type.is_uuid_type():
|
|
77
|
+
return _extract_uuid_tool_arg(kwargs, param_name=param.name)
|
|
78
|
+
raise AssertionError(param.col_type)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ToolChoice(pydantic.BaseModel):
|
|
82
|
+
auto: bool
|
|
83
|
+
required: bool
|
|
84
|
+
tool: str | None
|
|
85
|
+
parallel_tool_calls: bool
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class Tools(pydantic.BaseModel):
|
|
89
|
+
tools: list[Tool]
|
|
90
|
+
|
|
91
|
+
@pydantic.model_serializer
|
|
92
|
+
def ser_model(self) -> list[dict[str, Any]]:
|
|
93
|
+
return [tool.ser_model() for tool in self.tools]
|
|
94
|
+
|
|
95
|
+
# `tool_calls` must be in standardized tool invocation format:
|
|
96
|
+
# {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
|
|
97
|
+
def _invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.InlineDict':
|
|
98
|
+
from pixeltable import exprs
|
|
99
|
+
|
|
100
|
+
return exprs.InlineDict({tool.name or tool.fn.name: tool.invoke(tool_calls) for tool in self.tools})
|
|
101
|
+
|
|
102
|
+
def choice(
|
|
103
|
+
self,
|
|
104
|
+
auto: bool = False,
|
|
105
|
+
required: bool = False,
|
|
106
|
+
tool: str | Function | None = None,
|
|
107
|
+
parallel_tool_calls: bool = True,
|
|
108
|
+
) -> ToolChoice:
|
|
109
|
+
if sum([auto, required, tool is not None]) != 1:
|
|
110
|
+
raise excs.Error('Exactly one of `auto`, `required`, or `tool` must be specified.')
|
|
111
|
+
tool_name: str | None = None
|
|
112
|
+
if tool is not None:
|
|
113
|
+
try:
|
|
114
|
+
tool_obj = next(
|
|
115
|
+
t
|
|
116
|
+
for t in self.tools
|
|
117
|
+
if (isinstance(tool, Function) and t.fn == tool)
|
|
118
|
+
or (isinstance(tool, str) and (t.name or t.fn.name) == tool)
|
|
119
|
+
)
|
|
120
|
+
tool_name = tool_obj.name or tool_obj.fn.name
|
|
121
|
+
except StopIteration:
|
|
122
|
+
raise excs.Error(f'That tool is not in the specified list of tools: {tool}') from None
|
|
123
|
+
return ToolChoice(auto=auto, required=required, tool=tool_name, parallel_tool_calls=parallel_tool_calls)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@udf
|
|
127
|
+
def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> str | None:
|
|
128
|
+
return _extract_arg(str, kwargs, param_name)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@udf
|
|
132
|
+
def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> int | None:
|
|
133
|
+
return _extract_arg(int, kwargs, param_name)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@udf
|
|
137
|
+
def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> float | None:
|
|
138
|
+
return _extract_arg(float, kwargs, param_name)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@udf
|
|
142
|
+
def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> bool | None:
|
|
143
|
+
return _extract_arg(bool, kwargs, param_name)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@udf
|
|
147
|
+
def _extract_json_tool_arg(kwargs: dict[str, Any], param_name: str) -> ts.Json | None:
|
|
148
|
+
if param_name in kwargs:
|
|
149
|
+
return json.loads(kwargs[param_name])
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@udf
|
|
154
|
+
def _extract_uuid_tool_arg(kwargs: dict[str, Any], param_name: str) -> uuid.UUID | None:
|
|
155
|
+
return _extract_arg(uuid.UUID, kwargs, param_name)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
T = TypeVar('T')
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> T | None:
|
|
162
|
+
if param_name in kwargs:
|
|
163
|
+
return eval_fn(kwargs[param_name])
|
|
164
|
+
return None
|