pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/func/mcp.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import inspect
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
import pixeltable as pxt
|
|
6
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
7
|
+
from pixeltable.func.signature import Parameter
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import mcp
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def mcp_udfs(url: str) -> list['pxt.func.Function']:
|
|
14
|
+
return asyncio.run(mcp_udfs_async(url))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def mcp_udfs_async(url: str) -> list['pxt.func.Function']:
|
|
18
|
+
import mcp
|
|
19
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
20
|
+
|
|
21
|
+
list_tools_result: mcp.types.ListToolsResult | None = None
|
|
22
|
+
async with (
|
|
23
|
+
streamablehttp_client(url) as (read_stream, write_stream, _),
|
|
24
|
+
mcp.ClientSession(read_stream, write_stream) as session,
|
|
25
|
+
):
|
|
26
|
+
await session.initialize()
|
|
27
|
+
list_tools_result = await session.list_tools()
|
|
28
|
+
assert list_tools_result is not None
|
|
29
|
+
|
|
30
|
+
return [mcp_tool_to_udf(url, tool) for tool in list_tools_result.tools]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def mcp_tool_to_udf(url: str, mcp_tool: 'mcp.types.Tool') -> 'pxt.func.Function':
|
|
34
|
+
import mcp
|
|
35
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
36
|
+
|
|
37
|
+
async def invoke(**kwargs: Any) -> str:
|
|
38
|
+
# TODO: Cache session objects rather than creating a new one each time?
|
|
39
|
+
async with (
|
|
40
|
+
streamablehttp_client(url) as (read_stream, write_stream, _),
|
|
41
|
+
mcp.ClientSession(read_stream, write_stream) as session,
|
|
42
|
+
):
|
|
43
|
+
await session.initialize()
|
|
44
|
+
res = await session.call_tool(name=mcp_tool.name, arguments=kwargs)
|
|
45
|
+
# TODO Handle image/audio responses?
|
|
46
|
+
return res.content[0].text # type: ignore[union-attr]
|
|
47
|
+
|
|
48
|
+
if mcp_tool.description is not None:
|
|
49
|
+
invoke.__doc__ = mcp_tool.description
|
|
50
|
+
|
|
51
|
+
input_schema = mcp_tool.inputSchema
|
|
52
|
+
params = {
|
|
53
|
+
name: __mcp_param_to_pxt_type(mcp_tool.name, name, param) for name, param in input_schema['properties'].items()
|
|
54
|
+
}
|
|
55
|
+
required = input_schema.get('required', [])
|
|
56
|
+
|
|
57
|
+
# Ensure that any params not appearing in `required` are nullable.
|
|
58
|
+
# (A required param might or might not be nullable, since its type might be an 'anyOf' containing a null.)
|
|
59
|
+
for name in params.keys() - required:
|
|
60
|
+
params[name] = params[name].copy(nullable=True)
|
|
61
|
+
|
|
62
|
+
signature = pxt.func.Signature(
|
|
63
|
+
return_type=ts.StringType(), # Return type is always string
|
|
64
|
+
parameters=[Parameter(name, col_type, inspect.Parameter.KEYWORD_ONLY) for name, col_type in params.items()],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return pxt.func.CallableFunction(signatures=[signature], py_fns=[invoke], self_name=mcp_tool.name)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def __mcp_param_to_pxt_type(tool_name: str, name: str, param: dict[str, Any]) -> ts.ColumnType:
|
|
71
|
+
pxt_type = ts.ColumnType.from_json_schema(param)
|
|
72
|
+
if pxt_type is None:
|
|
73
|
+
raise excs.Error(f'Unknown type schema for MCP parameter {name!r} of tool {tool_name!r}: {param}')
|
|
74
|
+
return pxt_type
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
from functools import reduce
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, overload
|
|
6
6
|
|
|
7
7
|
from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
|
|
8
8
|
|
|
@@ -10,41 +10,41 @@ from .function import Function
|
|
|
10
10
|
from .signature import Signature
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
|
-
from pixeltable import
|
|
13
|
+
from pixeltable import Query
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class QueryTemplateFunction(Function):
|
|
17
|
-
"""A parameterized query
|
|
17
|
+
"""A parameterized query from which an executable Query is created with a function call."""
|
|
18
18
|
|
|
19
|
-
template_df:
|
|
20
|
-
self_name:
|
|
21
|
-
_comment:
|
|
19
|
+
template_df: 'Query' | None
|
|
20
|
+
self_name: str | None
|
|
21
|
+
_comment: str | None
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def create(
|
|
25
|
-
cls, template_callable: Callable, param_types:
|
|
25
|
+
cls, template_callable: Callable, param_types: list[ts.ColumnType] | None, path: str, name: str
|
|
26
26
|
) -> QueryTemplateFunction:
|
|
27
27
|
# we need to construct a template df and a signature
|
|
28
28
|
py_sig = inspect.signature(template_callable)
|
|
29
29
|
py_params = list(py_sig.parameters.values())
|
|
30
30
|
params = Signature.create_parameters(py_params=py_params, param_types=param_types)
|
|
31
|
-
# invoke template_callable with parameter expressions to construct a
|
|
31
|
+
# invoke template_callable with parameter expressions to construct a Query with parameters
|
|
32
32
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
|
|
33
33
|
template_df = template_callable(*var_exprs)
|
|
34
|
-
from pixeltable import
|
|
34
|
+
from pixeltable import Query
|
|
35
35
|
|
|
36
|
-
assert isinstance(template_df,
|
|
36
|
+
assert isinstance(template_df, Query)
|
|
37
37
|
# we take params and return json
|
|
38
38
|
sig = Signature(return_type=ts.JsonType(), parameters=params)
|
|
39
39
|
return QueryTemplateFunction(template_df, sig, path=path, name=name, comment=inspect.getdoc(template_callable))
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self,
|
|
43
|
-
template_df:
|
|
43
|
+
template_df: 'Query' | None,
|
|
44
44
|
sig: Signature,
|
|
45
|
-
path:
|
|
46
|
-
name:
|
|
47
|
-
comment:
|
|
45
|
+
path: str | None = None,
|
|
46
|
+
name: str | None = None,
|
|
47
|
+
comment: str | None = None,
|
|
48
48
|
):
|
|
49
49
|
assert sig is not None
|
|
50
50
|
super().__init__([sig], self_path=path)
|
|
@@ -82,7 +82,7 @@ class QueryTemplateFunction(Function):
|
|
|
82
82
|
def name(self) -> str:
|
|
83
83
|
return self.self_name
|
|
84
84
|
|
|
85
|
-
def comment(self) ->
|
|
85
|
+
def comment(self) -> str | None:
|
|
86
86
|
return self._comment
|
|
87
87
|
|
|
88
88
|
def _as_dict(self) -> dict:
|
|
@@ -90,9 +90,9 @@ class QueryTemplateFunction(Function):
|
|
|
90
90
|
|
|
91
91
|
@classmethod
|
|
92
92
|
def _from_dict(cls, d: dict) -> Function:
|
|
93
|
-
from pixeltable.
|
|
93
|
+
from pixeltable._query import Query
|
|
94
94
|
|
|
95
|
-
return cls(
|
|
95
|
+
return cls(Query.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
|
|
96
96
|
|
|
97
97
|
|
|
98
98
|
@overload
|
|
@@ -100,11 +100,11 @@ def query(py_fn: Callable) -> QueryTemplateFunction: ...
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
@overload
|
|
103
|
-
def query(*, param_types:
|
|
103
|
+
def query(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], QueryTemplateFunction]: ...
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
def query(*args: Any, **kwargs: Any) -> Any:
|
|
107
|
-
def make_query_template(py_fn: Callable, param_types:
|
|
107
|
+
def make_query_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> QueryTemplateFunction:
|
|
108
108
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
109
109
|
# this is a named function in a module
|
|
110
110
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -127,10 +127,10 @@ def query(*args: Any, **kwargs: Any) -> Any:
|
|
|
127
127
|
|
|
128
128
|
def retrieval_udf(
|
|
129
129
|
table: catalog.Table,
|
|
130
|
-
name:
|
|
131
|
-
description:
|
|
132
|
-
parameters:
|
|
133
|
-
limit:
|
|
130
|
+
name: str | None = None,
|
|
131
|
+
description: str | None = None,
|
|
132
|
+
parameters: Iterable[str | exprs.ColumnRef] | None = None,
|
|
133
|
+
limit: int | None = 10,
|
|
134
134
|
) -> func.QueryTemplateFunction:
|
|
135
135
|
"""
|
|
136
136
|
Constructs a retrieval UDF for the given table. The retrieval UDF is a UDF whose parameters are
|
|
@@ -157,18 +157,20 @@ def retrieval_udf(
|
|
|
157
157
|
"""
|
|
158
158
|
# Argument validation
|
|
159
159
|
col_refs: list[exprs.ColumnRef]
|
|
160
|
+
# TODO: get rid of references to ColumnRef internals and replace instead with a public interface
|
|
161
|
+
col_names = table.columns()
|
|
160
162
|
if parameters is None:
|
|
161
|
-
col_refs = [table[col_name] for col_name in
|
|
163
|
+
col_refs = [table[col_name] for col_name in col_names if not table[col_name].col.is_computed]
|
|
162
164
|
else:
|
|
163
165
|
for param in parameters:
|
|
164
|
-
if isinstance(param, str) and param not in
|
|
165
|
-
raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path!r}')
|
|
166
|
+
if isinstance(param, str) and param not in col_names:
|
|
167
|
+
raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path()!r}')
|
|
166
168
|
col_refs = [table[param] if isinstance(param, str) else param for param in parameters]
|
|
167
169
|
|
|
168
170
|
if len(col_refs) == 0:
|
|
169
171
|
raise excs.Error('Parameter list cannot be empty.')
|
|
170
172
|
|
|
171
|
-
# Construct the
|
|
173
|
+
# Construct the Query
|
|
172
174
|
predicates = [col_ref == exprs.Variable(col_ref.col.name, col_ref.col.col_type) for col_ref in col_refs]
|
|
173
175
|
where_clause = reduce(lambda c1, c2: c1 & c2, predicates)
|
|
174
176
|
df = table.select().where(where_clause)
|
pixeltable/func/signature.py
CHANGED
|
@@ -4,7 +4,7 @@ import dataclasses
|
|
|
4
4
|
import inspect
|
|
5
5
|
import logging
|
|
6
6
|
import typing
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
|
8
8
|
|
|
9
9
|
import pixeltable.exceptions as excs
|
|
10
10
|
import pixeltable.type_system as ts
|
|
@@ -18,11 +18,11 @@ _logger = logging.getLogger('pixeltable')
|
|
|
18
18
|
@dataclasses.dataclass
|
|
19
19
|
class Parameter:
|
|
20
20
|
name: str
|
|
21
|
-
col_type:
|
|
21
|
+
col_type: ts.ColumnType | None # None for variable parameters
|
|
22
22
|
kind: inspect._ParameterKind
|
|
23
23
|
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
24
24
|
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
25
|
-
default:
|
|
25
|
+
default: 'exprs.Literal' | None = None # default value for the parameter
|
|
26
26
|
is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
|
|
27
27
|
|
|
28
28
|
def __post_init__(self) -> None:
|
|
@@ -84,8 +84,28 @@ class Signature:
|
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
86
|
SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
|
|
88
|
+
|
|
89
|
+
return_type: ts.ColumnType
|
|
90
|
+
is_batched: bool
|
|
91
|
+
parameters: dict[str, Parameter] # name -> Parameter
|
|
92
|
+
parameters_by_pos: list[Parameter] # ordered by position in the signature
|
|
93
|
+
constant_parameters: list[Parameter] # parameters that are not batched
|
|
94
|
+
batched_parameters: list[Parameter] # parameters that are batched
|
|
95
|
+
required_parameters: list[Parameter] # parameters that do not have a default value
|
|
96
|
+
|
|
97
|
+
# the names of recognized system parameters in the signature; these are excluded from self.parameters
|
|
98
|
+
system_parameters: list[str]
|
|
99
|
+
|
|
100
|
+
py_signature: inspect.Signature
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
return_type: ts.ColumnType,
|
|
105
|
+
parameters: list[Parameter],
|
|
106
|
+
is_batched: bool = False,
|
|
107
|
+
system_parameters: list[str] | None = None,
|
|
108
|
+
):
|
|
89
109
|
assert isinstance(return_type, ts.ColumnType)
|
|
90
110
|
self.return_type = return_type
|
|
91
111
|
self.is_batched = is_batched
|
|
@@ -95,6 +115,7 @@ class Signature:
|
|
|
95
115
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
96
116
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
97
117
|
self.required_parameters = [p for p in parameters if not p.has_default()]
|
|
118
|
+
self.system_parameters = system_parameters if system_parameters is not None else []
|
|
98
119
|
self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
|
|
99
120
|
|
|
100
121
|
def get_return_type(self) -> ts.ColumnType:
|
|
@@ -151,12 +172,12 @@ class Signature:
|
|
|
151
172
|
|
|
152
173
|
return True
|
|
153
174
|
|
|
154
|
-
def validate_args(self, bound_args: dict[str,
|
|
175
|
+
def validate_args(self, bound_args: dict[str, 'exprs.Expr' | None], context: str = '') -> None:
|
|
155
176
|
if context:
|
|
156
177
|
context = f' ({context})'
|
|
157
178
|
|
|
158
179
|
for param_name, arg in bound_args.items():
|
|
159
|
-
assert param_name in self.parameters
|
|
180
|
+
assert param_name in self.parameters, f'{param_name!r} not in {list(self.parameters.keys())}'
|
|
160
181
|
param = self.parameters[param_name]
|
|
161
182
|
is_var_param = param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
|
|
162
183
|
if is_var_param:
|
|
@@ -206,15 +227,15 @@ class Signature:
|
|
|
206
227
|
elif p.kind == inspect.Parameter.VAR_KEYWORD:
|
|
207
228
|
param_strs.append(f'**{p.name}')
|
|
208
229
|
else:
|
|
209
|
-
param_strs.append(f'{p.name}: {p.col_type}')
|
|
210
|
-
return f'({", ".join(param_strs)}) -> {self.get_return_type()}'
|
|
230
|
+
param_strs.append(f'{p.name}: pxt.{p.col_type}')
|
|
231
|
+
return f'({", ".join(param_strs)}) -> pxt.{self.get_return_type()}'
|
|
211
232
|
|
|
212
233
|
@classmethod
|
|
213
|
-
def _infer_type(cls, annotation:
|
|
234
|
+
def _infer_type(cls, annotation: type | None) -> tuple[ts.ColumnType | None, bool | None]:
|
|
214
235
|
"""Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
|
|
215
236
|
if annotation is None:
|
|
216
237
|
return (None, None)
|
|
217
|
-
py_type:
|
|
238
|
+
py_type: type | None = None
|
|
218
239
|
is_batched = False
|
|
219
240
|
if typing.get_origin(annotation) == typing.Annotated:
|
|
220
241
|
type_args = typing.get_args(annotation)
|
|
@@ -231,12 +252,13 @@ class Signature:
|
|
|
231
252
|
@classmethod
|
|
232
253
|
def create_parameters(
|
|
233
254
|
cls,
|
|
234
|
-
py_fn:
|
|
235
|
-
py_params:
|
|
236
|
-
param_types:
|
|
237
|
-
type_substitutions:
|
|
255
|
+
py_fn: Callable | None = None,
|
|
256
|
+
py_params: list[inspect.Parameter] | None = None,
|
|
257
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
258
|
+
type_substitutions: dict | None = None,
|
|
238
259
|
is_cls_method: bool = False,
|
|
239
260
|
) -> list[Parameter]:
|
|
261
|
+
"""Ignores parameters starting with '_'."""
|
|
240
262
|
from pixeltable import exprs
|
|
241
263
|
|
|
242
264
|
assert (py_fn is None) != (py_params is None)
|
|
@@ -251,6 +273,10 @@ class Signature:
|
|
|
251
273
|
for idx, param in enumerate(py_params):
|
|
252
274
|
if is_cls_method and idx == 0:
|
|
253
275
|
continue # skip 'self' or 'cls' parameter
|
|
276
|
+
if param.name in cls.SYSTEM_PARAM_NAMES:
|
|
277
|
+
continue # skip system parameters
|
|
278
|
+
if param.name.startswith('_'):
|
|
279
|
+
raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
|
|
254
280
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
255
281
|
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
256
282
|
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
@@ -284,9 +310,9 @@ class Signature:
|
|
|
284
310
|
def create(
|
|
285
311
|
cls,
|
|
286
312
|
py_fn: Callable,
|
|
287
|
-
param_types:
|
|
288
|
-
return_type:
|
|
289
|
-
type_substitutions:
|
|
313
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
314
|
+
return_type: ts.ColumnType | None = None,
|
|
315
|
+
type_substitutions: dict | None = None,
|
|
290
316
|
is_cls_method: bool = False,
|
|
291
317
|
) -> Signature:
|
|
292
318
|
"""Create a signature for the given Callable.
|
|
@@ -308,5 +334,6 @@ class Signature:
|
|
|
308
334
|
raise excs.Error('Cannot infer pixeltable return type')
|
|
309
335
|
else:
|
|
310
336
|
_, return_is_batched = cls._infer_type(sig.return_annotation)
|
|
337
|
+
system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
|
|
311
338
|
|
|
312
|
-
return Signature(return_type, parameters, return_is_batched)
|
|
339
|
+
return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
|
pixeltable/func/tools.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, TypeVar
|
|
2
4
|
|
|
3
5
|
import pydantic
|
|
4
6
|
|
|
5
|
-
import
|
|
7
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
6
8
|
|
|
7
9
|
from .function import Function
|
|
8
10
|
from .signature import Parameter
|
|
@@ -28,8 +30,8 @@ class Tool(pydantic.BaseModel):
|
|
|
28
30
|
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
|
29
31
|
|
|
30
32
|
fn: Function
|
|
31
|
-
name:
|
|
32
|
-
description:
|
|
33
|
+
name: str | None = None
|
|
34
|
+
description: str | None = None
|
|
33
35
|
|
|
34
36
|
@property
|
|
35
37
|
def parameters(self) -> dict[str, Parameter]:
|
|
@@ -69,13 +71,17 @@ class Tool(pydantic.BaseModel):
|
|
|
69
71
|
return _extract_float_tool_arg(kwargs, param_name=param.name)
|
|
70
72
|
if param.col_type.is_bool_type():
|
|
71
73
|
return _extract_bool_tool_arg(kwargs, param_name=param.name)
|
|
72
|
-
|
|
74
|
+
if param.col_type.is_json_type():
|
|
75
|
+
return _extract_json_tool_arg(kwargs, param_name=param.name)
|
|
76
|
+
if param.col_type.is_uuid_type():
|
|
77
|
+
return _extract_uuid_tool_arg(kwargs, param_name=param.name)
|
|
78
|
+
raise AssertionError(param.col_type)
|
|
73
79
|
|
|
74
80
|
|
|
75
81
|
class ToolChoice(pydantic.BaseModel):
|
|
76
82
|
auto: bool
|
|
77
83
|
required: bool
|
|
78
|
-
tool:
|
|
84
|
+
tool: str | None
|
|
79
85
|
parallel_tool_calls: bool
|
|
80
86
|
|
|
81
87
|
|
|
@@ -97,12 +103,12 @@ class Tools(pydantic.BaseModel):
|
|
|
97
103
|
self,
|
|
98
104
|
auto: bool = False,
|
|
99
105
|
required: bool = False,
|
|
100
|
-
tool:
|
|
106
|
+
tool: str | Function | None = None,
|
|
101
107
|
parallel_tool_calls: bool = True,
|
|
102
108
|
) -> ToolChoice:
|
|
103
109
|
if sum([auto, required, tool is not None]) != 1:
|
|
104
110
|
raise excs.Error('Exactly one of `auto`, `required`, or `tool` must be specified.')
|
|
105
|
-
tool_name:
|
|
111
|
+
tool_name: str | None = None
|
|
106
112
|
if tool is not None:
|
|
107
113
|
try:
|
|
108
114
|
tool_obj = next(
|
|
@@ -118,29 +124,41 @@ class Tools(pydantic.BaseModel):
|
|
|
118
124
|
|
|
119
125
|
|
|
120
126
|
@udf
|
|
121
|
-
def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) ->
|
|
127
|
+
def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> str | None:
|
|
122
128
|
return _extract_arg(str, kwargs, param_name)
|
|
123
129
|
|
|
124
130
|
|
|
125
131
|
@udf
|
|
126
|
-
def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) ->
|
|
132
|
+
def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> int | None:
|
|
127
133
|
return _extract_arg(int, kwargs, param_name)
|
|
128
134
|
|
|
129
135
|
|
|
130
136
|
@udf
|
|
131
|
-
def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) ->
|
|
137
|
+
def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> float | None:
|
|
132
138
|
return _extract_arg(float, kwargs, param_name)
|
|
133
139
|
|
|
134
140
|
|
|
135
141
|
@udf
|
|
136
|
-
def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) ->
|
|
142
|
+
def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> bool | None:
|
|
137
143
|
return _extract_arg(bool, kwargs, param_name)
|
|
138
144
|
|
|
139
145
|
|
|
146
|
+
@udf
|
|
147
|
+
def _extract_json_tool_arg(kwargs: dict[str, Any], param_name: str) -> ts.Json | None:
|
|
148
|
+
if param_name in kwargs:
|
|
149
|
+
return json.loads(kwargs[param_name])
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@udf
|
|
154
|
+
def _extract_uuid_tool_arg(kwargs: dict[str, Any], param_name: str) -> uuid.UUID | None:
|
|
155
|
+
return _extract_arg(uuid.UUID, kwargs, param_name)
|
|
156
|
+
|
|
157
|
+
|
|
140
158
|
T = TypeVar('T')
|
|
141
159
|
|
|
142
160
|
|
|
143
|
-
def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) ->
|
|
161
|
+
def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> T | None:
|
|
144
162
|
if param_name in kwargs:
|
|
145
163
|
return eval_fn(kwargs[param_name])
|
|
146
164
|
return None
|
pixeltable/func/udf.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence, overload
|
|
5
5
|
|
|
6
6
|
import pixeltable.exceptions as excs
|
|
7
7
|
import pixeltable.type_system as ts
|
|
@@ -26,12 +26,12 @@ def udf(decorated_fn: Callable) -> CallableFunction: ...
|
|
|
26
26
|
@overload
|
|
27
27
|
def udf(
|
|
28
28
|
*,
|
|
29
|
-
batch_size:
|
|
30
|
-
substitute_fn:
|
|
29
|
+
batch_size: int | None = None,
|
|
30
|
+
substitute_fn: Callable | None = None,
|
|
31
31
|
is_method: bool = False,
|
|
32
32
|
is_property: bool = False,
|
|
33
|
-
resource_pool:
|
|
34
|
-
type_substitutions:
|
|
33
|
+
resource_pool: str | None = None,
|
|
34
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
35
35
|
_force_stored: bool = False,
|
|
36
36
|
) -> Callable[[Callable], CallableFunction]: ...
|
|
37
37
|
|
|
@@ -39,7 +39,7 @@ def udf(
|
|
|
39
39
|
# pxt.udf() called explicitly on a Table:
|
|
40
40
|
@overload
|
|
41
41
|
def udf(
|
|
42
|
-
table: catalog.Table, /, *, return_value: Any = None, description:
|
|
42
|
+
table: catalog.Table, /, *, return_value: Any = None, description: str | None = None
|
|
43
43
|
) -> ExprTemplateFunction: ...
|
|
44
44
|
|
|
45
45
|
|
|
@@ -96,15 +96,15 @@ def udf(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
|
96
96
|
|
|
97
97
|
def make_function(
|
|
98
98
|
decorated_fn: Callable,
|
|
99
|
-
return_type:
|
|
100
|
-
param_types:
|
|
101
|
-
batch_size:
|
|
102
|
-
substitute_fn:
|
|
99
|
+
return_type: ts.ColumnType | None = None,
|
|
100
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
101
|
+
batch_size: int | None = None,
|
|
102
|
+
substitute_fn: Callable | None = None,
|
|
103
103
|
is_method: bool = False,
|
|
104
104
|
is_property: bool = False,
|
|
105
|
-
resource_pool:
|
|
106
|
-
type_substitutions:
|
|
107
|
-
function_name:
|
|
105
|
+
resource_pool: str | None = None,
|
|
106
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
107
|
+
function_name: str | None = None,
|
|
108
108
|
force_stored: bool = False,
|
|
109
109
|
) -> CallableFunction:
|
|
110
110
|
"""
|
|
@@ -201,11 +201,11 @@ def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
|
|
|
201
201
|
|
|
202
202
|
|
|
203
203
|
@overload
|
|
204
|
-
def expr_udf(*, param_types:
|
|
204
|
+
def expr_udf(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], ExprTemplateFunction]: ...
|
|
205
205
|
|
|
206
206
|
|
|
207
207
|
def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
208
|
-
def make_expr_template(py_fn: Callable, param_types:
|
|
208
|
+
def make_expr_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> ExprTemplateFunction:
|
|
209
209
|
from pixeltable import exprs
|
|
210
210
|
|
|
211
211
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
@@ -237,9 +237,7 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
237
237
|
return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
|
|
238
238
|
|
|
239
239
|
|
|
240
|
-
def from_table(
|
|
241
|
-
tbl: catalog.Table, return_value: Optional['exprs.Expr'], description: Optional[str]
|
|
242
|
-
) -> ExprTemplateFunction:
|
|
240
|
+
def from_table(tbl: catalog.Table, return_value: 'exprs.Expr' | None, description: str | None) -> ExprTemplateFunction:
|
|
243
241
|
"""
|
|
244
242
|
Constructs an `ExprTemplateFunction` from a `Table`.
|
|
245
243
|
|
|
@@ -262,7 +260,7 @@ def from_table(
|
|
|
262
260
|
"""
|
|
263
261
|
from pixeltable import exprs
|
|
264
262
|
|
|
265
|
-
ancestors = [tbl, *tbl.
|
|
263
|
+
ancestors = [tbl, *tbl._get_base_tables()]
|
|
266
264
|
ancestors.reverse() # We must traverse the ancestors in order from base to derived
|
|
267
265
|
|
|
268
266
|
subst: dict[exprs.Expr, exprs.Expr] = {}
|
|
@@ -297,7 +295,7 @@ def from_table(
|
|
|
297
295
|
|
|
298
296
|
if description is None:
|
|
299
297
|
# Default description is the table comment
|
|
300
|
-
description = tbl.
|
|
298
|
+
description = tbl._get_comment()
|
|
301
299
|
if len(description) == 0:
|
|
302
300
|
description = f"UDF for table '{tbl._name}'"
|
|
303
301
|
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
General Pixeltable UDFs.
|
|
3
|
+
|
|
4
|
+
This parent module contains general-purpose UDFs that apply to multiple data types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
# ruff: noqa: F401
|
|
2
8
|
|
|
3
9
|
from pixeltable.utils.code import local_public_names
|
|
@@ -8,23 +14,33 @@ from . import (
|
|
|
8
14
|
bedrock,
|
|
9
15
|
date,
|
|
10
16
|
deepseek,
|
|
17
|
+
fal,
|
|
11
18
|
fireworks,
|
|
12
19
|
gemini,
|
|
20
|
+
groq,
|
|
13
21
|
huggingface,
|
|
14
22
|
image,
|
|
15
23
|
json,
|
|
16
24
|
llama_cpp,
|
|
17
25
|
math,
|
|
18
26
|
mistralai,
|
|
27
|
+
net,
|
|
19
28
|
ollama,
|
|
20
29
|
openai,
|
|
30
|
+
openrouter,
|
|
21
31
|
replicate,
|
|
32
|
+
reve,
|
|
22
33
|
string,
|
|
23
34
|
timestamp,
|
|
24
35
|
together,
|
|
36
|
+
twelvelabs,
|
|
37
|
+
uuid,
|
|
25
38
|
video,
|
|
26
39
|
vision,
|
|
40
|
+
voyageai,
|
|
27
41
|
whisper,
|
|
42
|
+
whisperx,
|
|
43
|
+
yolox,
|
|
28
44
|
)
|
|
29
45
|
from .globals import count, map, max, mean, min, sum
|
|
30
46
|
|