pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -4,25 +4,25 @@ import dataclasses
|
|
|
4
4
|
import importlib
|
|
5
5
|
import logging
|
|
6
6
|
import sys
|
|
7
|
-
from typing import Optional
|
|
8
7
|
from uuid import UUID
|
|
9
8
|
|
|
10
9
|
import sqlalchemy as sql
|
|
11
10
|
|
|
12
|
-
import
|
|
13
|
-
import pixeltable.exceptions as excs
|
|
14
|
-
import pixeltable.type_system as ts
|
|
11
|
+
from pixeltable import env, exceptions as excs, type_system as ts
|
|
15
12
|
from pixeltable.metadata import schema
|
|
13
|
+
|
|
16
14
|
from .function import Function
|
|
17
15
|
|
|
18
16
|
_logger = logging.getLogger('pixeltable')
|
|
19
17
|
|
|
18
|
+
|
|
20
19
|
class FunctionRegistry:
|
|
21
20
|
"""
|
|
22
21
|
A central registry for all Functions. Handles interactions with the backing store.
|
|
23
22
|
Function are loaded from the store on demand.
|
|
24
23
|
"""
|
|
25
|
-
|
|
24
|
+
|
|
25
|
+
_instance: FunctionRegistry | None = None
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
28
|
def get(cls) -> FunctionRegistry:
|
|
@@ -30,7 +30,7 @@ class FunctionRegistry:
|
|
|
30
30
|
cls._instance = FunctionRegistry()
|
|
31
31
|
return cls._instance
|
|
32
32
|
|
|
33
|
-
def __init__(self):
|
|
33
|
+
def __init__(self) -> None:
|
|
34
34
|
self.stored_fns_by_id: dict[UUID, Function] = {}
|
|
35
35
|
self.module_fns: dict[str, Function] = {} # fqn -> Function
|
|
36
36
|
self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
|
|
@@ -68,7 +68,7 @@ class FunctionRegistry:
|
|
|
68
68
|
raise excs.Error(f'A UDF with that name already exists: {fqn}')
|
|
69
69
|
self.module_fns[fqn] = fn
|
|
70
70
|
if fn.is_method or fn.is_property:
|
|
71
|
-
base_type = fn.
|
|
71
|
+
base_type = fn.signatures[0].parameters_by_pos[0].col_type.type_enum
|
|
72
72
|
if base_type not in self.type_methods:
|
|
73
73
|
self.type_methods[base_type] = {}
|
|
74
74
|
if fn.name in self.type_methods[base_type]:
|
|
@@ -93,7 +93,7 @@ class FunctionRegistry:
|
|
|
93
93
|
# stored_fn_md.append(md)
|
|
94
94
|
return list(self.module_fns.values())
|
|
95
95
|
|
|
96
|
-
# def get_function(self, *, id:
|
|
96
|
+
# def get_function(self, *, id: UUID | None = None, fqn: str | None = None) -> Function:
|
|
97
97
|
# assert (id is not None) != (fqn is not None)
|
|
98
98
|
# if id is not None:
|
|
99
99
|
# if id not in self.stored_fns_by_id:
|
|
@@ -142,7 +142,7 @@ class FunctionRegistry:
|
|
|
142
142
|
return list(self.type_methods[base_type].values())
|
|
143
143
|
return []
|
|
144
144
|
|
|
145
|
-
def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) ->
|
|
145
|
+
def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) -> Function | None:
|
|
146
146
|
"""
|
|
147
147
|
Look up a method (or property) by name for a given base type. If no such method is registered, return None.
|
|
148
148
|
"""
|
|
@@ -150,14 +150,16 @@ class FunctionRegistry:
|
|
|
150
150
|
return self.type_methods[base_type][name]
|
|
151
151
|
return None
|
|
152
152
|
|
|
153
|
-
#def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id:
|
|
154
|
-
def create_stored_function(self, pxt_fn: Function, dir_id:
|
|
153
|
+
# def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: UUID | None = None) -> UUID:
|
|
154
|
+
def create_stored_function(self, pxt_fn: Function, dir_id: UUID | None = None) -> UUID:
|
|
155
155
|
fn_md, binary_obj = pxt_fn.to_store()
|
|
156
156
|
md = schema.FunctionMd(name=pxt_fn.name, md=fn_md, py_version=sys.version, class_name=pxt_fn.__class__.__name__)
|
|
157
157
|
with env.Env.get().engine.begin() as conn:
|
|
158
158
|
res = conn.execute(
|
|
159
|
-
sql.insert(schema.Function.__table__)
|
|
160
|
-
|
|
159
|
+
sql.insert(schema.Function.__table__).values(
|
|
160
|
+
dir_id=dir_id, md=dataclasses.asdict(md), binary_obj=binary_obj
|
|
161
|
+
)
|
|
162
|
+
)
|
|
161
163
|
id = res.inserted_primary_key[0]
|
|
162
164
|
_logger.info(f'Created function {pxt_fn.name} (id {id}) in store')
|
|
163
165
|
self.stored_fns_by_id[id] = pxt_fn
|
|
@@ -166,8 +168,9 @@ class FunctionRegistry:
|
|
|
166
168
|
def get_stored_function(self, id: UUID) -> Function:
|
|
167
169
|
if id in self.stored_fns_by_id:
|
|
168
170
|
return self.stored_fns_by_id[id]
|
|
169
|
-
stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id)
|
|
170
|
-
|
|
171
|
+
stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id).where(
|
|
172
|
+
schema.Function.id == id
|
|
173
|
+
)
|
|
171
174
|
with env.Env.get().engine.begin() as conn:
|
|
172
175
|
row = conn.execute(stmt).fetchone()
|
|
173
176
|
if row is None:
|
|
@@ -180,7 +183,7 @@ class FunctionRegistry:
|
|
|
180
183
|
self.stored_fns_by_id[id] = instance
|
|
181
184
|
return instance
|
|
182
185
|
|
|
183
|
-
# def create_function(self, fn: Function, dir_id:
|
|
186
|
+
# def create_function(self, fn: Function, dir_id: UUID | None = None, name: str | None = None) -> None:
|
|
184
187
|
# with env.Env.get().engine.begin() as conn:
|
|
185
188
|
# _logger.debug(f'Pickling function {name}')
|
|
186
189
|
# eval_fn_str = cloudpickle.dumps(fn.eval_fn) if fn.eval_fn is not None else None
|
|
@@ -237,7 +240,5 @@ class FunctionRegistry:
|
|
|
237
240
|
def delete_function(self, id: UUID) -> None:
|
|
238
241
|
assert id is not None
|
|
239
242
|
with env.Env.get().engine.begin() as conn:
|
|
240
|
-
conn.execute(
|
|
241
|
-
sql.delete(schema.Function.__table__)
|
|
242
|
-
.where(schema.Function.id == id))
|
|
243
|
+
conn.execute(sql.delete(schema.Function.__table__).where(schema.Function.id == id))
|
|
243
244
|
_logger.info(f'Deleted function with id {id} from store')
|
pixeltable/func/globals.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import inspect
|
|
3
3
|
from types import ModuleType
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
import pixeltable.exceptions as excs
|
|
7
6
|
|
|
8
7
|
|
|
9
|
-
def resolve_symbol(symbol_path: str) ->
|
|
8
|
+
def resolve_symbol(symbol_path: str) -> object | None:
|
|
10
9
|
path_elems = symbol_path.split('.')
|
|
11
|
-
module:
|
|
10
|
+
module: ModuleType | None = None
|
|
12
11
|
i = len(path_elems) - 1
|
|
13
12
|
while i > 0 and module is None:
|
|
14
13
|
try:
|
|
@@ -28,10 +27,12 @@ def validate_symbol_path(fn_path: str) -> None:
|
|
|
28
27
|
fn_name = path_elems[-1]
|
|
29
28
|
if any(el == '<locals>' for el in path_elems):
|
|
30
29
|
raise excs.Error(
|
|
31
|
-
f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.'
|
|
30
|
+
f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.'
|
|
31
|
+
)
|
|
32
32
|
if any(not el.isidentifier() for el in path_elems):
|
|
33
33
|
raise excs.Error(
|
|
34
|
-
f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.'
|
|
34
|
+
f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.'
|
|
35
|
+
)
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def get_caller_module_path() -> str:
|
pixeltable/func/mcp.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import inspect
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
import pixeltable as pxt
|
|
6
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
7
|
+
from pixeltable.func.signature import Parameter
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import mcp
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def mcp_udfs(url: str) -> list['pxt.func.Function']:
|
|
14
|
+
return asyncio.run(mcp_udfs_async(url))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def mcp_udfs_async(url: str) -> list['pxt.func.Function']:
|
|
18
|
+
import mcp
|
|
19
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
20
|
+
|
|
21
|
+
list_tools_result: mcp.types.ListToolsResult | None = None
|
|
22
|
+
async with (
|
|
23
|
+
streamablehttp_client(url) as (read_stream, write_stream, _),
|
|
24
|
+
mcp.ClientSession(read_stream, write_stream) as session,
|
|
25
|
+
):
|
|
26
|
+
await session.initialize()
|
|
27
|
+
list_tools_result = await session.list_tools()
|
|
28
|
+
assert list_tools_result is not None
|
|
29
|
+
|
|
30
|
+
return [mcp_tool_to_udf(url, tool) for tool in list_tools_result.tools]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def mcp_tool_to_udf(url: str, mcp_tool: 'mcp.types.Tool') -> 'pxt.func.Function':
|
|
34
|
+
import mcp
|
|
35
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
36
|
+
|
|
37
|
+
async def invoke(**kwargs: Any) -> str:
|
|
38
|
+
# TODO: Cache session objects rather than creating a new one each time?
|
|
39
|
+
async with (
|
|
40
|
+
streamablehttp_client(url) as (read_stream, write_stream, _),
|
|
41
|
+
mcp.ClientSession(read_stream, write_stream) as session,
|
|
42
|
+
):
|
|
43
|
+
await session.initialize()
|
|
44
|
+
res = await session.call_tool(name=mcp_tool.name, arguments=kwargs)
|
|
45
|
+
# TODO Handle image/audio responses?
|
|
46
|
+
return res.content[0].text # type: ignore[union-attr]
|
|
47
|
+
|
|
48
|
+
if mcp_tool.description is not None:
|
|
49
|
+
invoke.__doc__ = mcp_tool.description
|
|
50
|
+
|
|
51
|
+
input_schema = mcp_tool.inputSchema
|
|
52
|
+
params = {
|
|
53
|
+
name: __mcp_param_to_pxt_type(mcp_tool.name, name, param) for name, param in input_schema['properties'].items()
|
|
54
|
+
}
|
|
55
|
+
required = input_schema.get('required', [])
|
|
56
|
+
|
|
57
|
+
# Ensure that any params not appearing in `required` are nullable.
|
|
58
|
+
# (A required param might or might not be nullable, since its type might be an 'anyOf' containing a null.)
|
|
59
|
+
for name in params.keys() - required:
|
|
60
|
+
params[name] = params[name].copy(nullable=True)
|
|
61
|
+
|
|
62
|
+
signature = pxt.func.Signature(
|
|
63
|
+
return_type=ts.StringType(), # Return type is always string
|
|
64
|
+
parameters=[Parameter(name, col_type, inspect.Parameter.KEYWORD_ONLY) for name, col_type in params.items()],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return pxt.func.CallableFunction(signatures=[signature], py_fns=[invoke], self_name=mcp_tool.name)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def __mcp_param_to_pxt_type(tool_name: str, name: str, param: dict[str, Any]) -> ts.ColumnType:
|
|
71
|
+
pxt_type = ts.ColumnType.from_json_schema(param)
|
|
72
|
+
if pxt_type is None:
|
|
73
|
+
raise excs.Error(f'Unknown type schema for MCP parameter {name!r} of tool {tool_name!r}: {param}')
|
|
74
|
+
return pxt_type
|
|
@@ -1,69 +1,77 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from
|
|
4
|
+
from functools import reduce
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, overload
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
import pixeltable as pxt
|
|
9
|
-
from pixeltable import exprs
|
|
7
|
+
from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
|
|
10
8
|
|
|
11
9
|
from .function import Function
|
|
12
10
|
from .signature import Signature
|
|
13
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from pixeltable import Query
|
|
14
|
+
|
|
14
15
|
|
|
15
16
|
class QueryTemplateFunction(Function):
|
|
16
|
-
"""A parameterized query
|
|
17
|
+
"""A parameterized query from which an executable Query is created with a function call."""
|
|
18
|
+
|
|
19
|
+
template_df: 'Query' | None
|
|
20
|
+
self_name: str | None
|
|
21
|
+
_comment: str | None
|
|
17
22
|
|
|
18
23
|
@classmethod
|
|
19
24
|
def create(
|
|
20
|
-
cls, template_callable: Callable, param_types:
|
|
25
|
+
cls, template_callable: Callable, param_types: list[ts.ColumnType] | None, path: str, name: str
|
|
21
26
|
) -> QueryTemplateFunction:
|
|
22
27
|
# we need to construct a template df and a signature
|
|
23
28
|
py_sig = inspect.signature(template_callable)
|
|
24
29
|
py_params = list(py_sig.parameters.values())
|
|
25
30
|
params = Signature.create_parameters(py_params=py_params, param_types=param_types)
|
|
26
|
-
# invoke template_callable with parameter expressions to construct a
|
|
31
|
+
# invoke template_callable with parameter expressions to construct a Query with parameters
|
|
27
32
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
|
|
28
33
|
template_df = template_callable(*var_exprs)
|
|
29
|
-
from pixeltable import
|
|
30
|
-
|
|
34
|
+
from pixeltable import Query
|
|
35
|
+
|
|
36
|
+
assert isinstance(template_df, Query)
|
|
31
37
|
# we take params and return json
|
|
32
|
-
sig = Signature(return_type=
|
|
33
|
-
return QueryTemplateFunction(template_df, sig, path=path, name=name)
|
|
38
|
+
sig = Signature(return_type=ts.JsonType(), parameters=params)
|
|
39
|
+
return QueryTemplateFunction(template_df, sig, path=path, name=name, comment=inspect.getdoc(template_callable))
|
|
34
40
|
|
|
35
41
|
def __init__(
|
|
36
|
-
|
|
37
|
-
|
|
42
|
+
self,
|
|
43
|
+
template_df: 'Query' | None,
|
|
44
|
+
sig: Signature,
|
|
45
|
+
path: str | None = None,
|
|
46
|
+
name: str | None = None,
|
|
47
|
+
comment: str | None = None,
|
|
38
48
|
):
|
|
39
|
-
|
|
49
|
+
assert sig is not None
|
|
50
|
+
super().__init__([sig], self_path=path)
|
|
40
51
|
self.self_name = name
|
|
41
52
|
self.template_df = template_df
|
|
53
|
+
self._comment = comment
|
|
42
54
|
|
|
43
|
-
|
|
44
|
-
#
|
|
45
|
-
# TODO: figure out a more general way to make execution state available
|
|
46
|
-
self.conn: Optional[sql.engine.Connection] = None
|
|
55
|
+
def _update_as_overload_resolution(self, signature_idx: int) -> None:
|
|
56
|
+
pass # only one signature supported for QueryTemplateFunction
|
|
47
57
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
for param in [p for p in self.signature.parameters.values() if p.has_default()]:
|
|
52
|
-
assert param.name in param_types
|
|
53
|
-
param_type = param_types[param.name]
|
|
54
|
-
literal_default = exprs.Literal(param.default, col_type=param_type)
|
|
55
|
-
self.defaults[param.name] = literal_default
|
|
56
|
-
|
|
57
|
-
def set_conn(self, conn: Optional[sql.engine.Connection]) -> None:
|
|
58
|
-
self.conn = conn
|
|
58
|
+
@property
|
|
59
|
+
def is_async(self) -> bool:
|
|
60
|
+
return True
|
|
59
61
|
|
|
60
|
-
def
|
|
62
|
+
async def aexec(self, *args: Any, **kwargs: Any) -> Any:
|
|
63
|
+
# assert not self.is_polymorphic
|
|
61
64
|
bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
|
|
62
65
|
# apply defaults, otherwise we might have Parameters left over
|
|
63
66
|
bound_args.update(
|
|
64
|
-
{
|
|
67
|
+
{
|
|
68
|
+
param.name: param.default
|
|
69
|
+
for param in self.signature.parameters.values()
|
|
70
|
+
if param.has_default() and param.name not in bound_args
|
|
71
|
+
}
|
|
72
|
+
)
|
|
65
73
|
bound_df = self.template_df.bind(bound_args)
|
|
66
|
-
result = bound_df.
|
|
74
|
+
result = await bound_df._acollect()
|
|
67
75
|
return list(result)
|
|
68
76
|
|
|
69
77
|
@property
|
|
@@ -74,10 +82,118 @@ class QueryTemplateFunction(Function):
|
|
|
74
82
|
def name(self) -> str:
|
|
75
83
|
return self.self_name
|
|
76
84
|
|
|
85
|
+
def comment(self) -> str | None:
|
|
86
|
+
return self._comment
|
|
87
|
+
|
|
77
88
|
def _as_dict(self) -> dict:
|
|
78
89
|
return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
|
|
79
90
|
|
|
80
91
|
@classmethod
|
|
81
92
|
def _from_dict(cls, d: dict) -> Function:
|
|
82
|
-
from pixeltable.
|
|
83
|
-
|
|
93
|
+
from pixeltable._query import Query
|
|
94
|
+
|
|
95
|
+
return cls(Query.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@overload
|
|
99
|
+
def query(py_fn: Callable) -> QueryTemplateFunction: ...
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@overload
|
|
103
|
+
def query(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], QueryTemplateFunction]: ...
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def query(*args: Any, **kwargs: Any) -> Any:
|
|
107
|
+
def make_query_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> QueryTemplateFunction:
|
|
108
|
+
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
109
|
+
# this is a named function in a module
|
|
110
|
+
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
111
|
+
else:
|
|
112
|
+
function_path = None
|
|
113
|
+
query_name = py_fn.__name__
|
|
114
|
+
query_fn = QueryTemplateFunction.create(py_fn, param_types=param_types, path=function_path, name=query_name)
|
|
115
|
+
return query_fn
|
|
116
|
+
|
|
117
|
+
# TODO: verify that the inferred return type matches that of the template
|
|
118
|
+
# TODO: verify that the signature doesn't contain batched parameters
|
|
119
|
+
|
|
120
|
+
if len(args) == 1:
|
|
121
|
+
assert len(kwargs) == 0 and callable(args[0])
|
|
122
|
+
return make_query_template(args[0], None)
|
|
123
|
+
else:
|
|
124
|
+
assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
|
|
125
|
+
return lambda py_fn: make_query_template(py_fn, kwargs['param_types'])
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def retrieval_udf(
|
|
129
|
+
table: catalog.Table,
|
|
130
|
+
name: str | None = None,
|
|
131
|
+
description: str | None = None,
|
|
132
|
+
parameters: Iterable[str | exprs.ColumnRef] | None = None,
|
|
133
|
+
limit: int | None = 10,
|
|
134
|
+
) -> func.QueryTemplateFunction:
|
|
135
|
+
"""
|
|
136
|
+
Constructs a retrieval UDF for the given table. The retrieval UDF is a UDF whose parameters are
|
|
137
|
+
columns of the table and whose return value is a list of rows from the table. The return value of
|
|
138
|
+
```python
|
|
139
|
+
f(col1=x, col2=y, ...)
|
|
140
|
+
```
|
|
141
|
+
will be a list of all rows from the table that match the specified arguments.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
table: The table to use as the dataset for the retrieval tool.
|
|
145
|
+
name: The name of the tool. If not specified, then the name of the table will be used by default.
|
|
146
|
+
description: The description of the tool. If not specified, then a default description will be generated.
|
|
147
|
+
parameters: The columns of the table to use as parameters. If not specified, all data columns
|
|
148
|
+
(non-computed columns) will be used as parameters.
|
|
149
|
+
|
|
150
|
+
All of the specified parameters will be required parameters of the tool, regardless of their status
|
|
151
|
+
as columns.
|
|
152
|
+
limit: The maximum number of rows to return. If not specified, then all matching rows will be returned.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
A list of dictionaries containing data from the table, one per row that matches the input arguments.
|
|
156
|
+
If there are no matching rows, an empty list will be returned.
|
|
157
|
+
"""
|
|
158
|
+
# Argument validation
|
|
159
|
+
col_refs: list[exprs.ColumnRef]
|
|
160
|
+
# TODO: get rid of references to ColumnRef internals and replace instead with a public interface
|
|
161
|
+
col_names = table.columns()
|
|
162
|
+
if parameters is None:
|
|
163
|
+
col_refs = [table[col_name] for col_name in col_names if not table[col_name].col.is_computed]
|
|
164
|
+
else:
|
|
165
|
+
for param in parameters:
|
|
166
|
+
if isinstance(param, str) and param not in col_names:
|
|
167
|
+
raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path()!r}')
|
|
168
|
+
col_refs = [table[param] if isinstance(param, str) else param for param in parameters]
|
|
169
|
+
|
|
170
|
+
if len(col_refs) == 0:
|
|
171
|
+
raise excs.Error('Parameter list cannot be empty.')
|
|
172
|
+
|
|
173
|
+
# Construct the Query
|
|
174
|
+
predicates = [col_ref == exprs.Variable(col_ref.col.name, col_ref.col.col_type) for col_ref in col_refs]
|
|
175
|
+
where_clause = reduce(lambda c1, c2: c1 & c2, predicates)
|
|
176
|
+
df = table.select().where(where_clause)
|
|
177
|
+
if limit is not None:
|
|
178
|
+
df = df.limit(limit)
|
|
179
|
+
|
|
180
|
+
# Construct the signature
|
|
181
|
+
query_params = [
|
|
182
|
+
func.Parameter(col_ref.col.name, col_ref.col.col_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
183
|
+
for col_ref in col_refs
|
|
184
|
+
]
|
|
185
|
+
query_signature = func.Signature(return_type=ts.JsonType(), parameters=query_params)
|
|
186
|
+
|
|
187
|
+
# Construct a name and/or description if not provided
|
|
188
|
+
if name is None:
|
|
189
|
+
name = table._name
|
|
190
|
+
if description is None:
|
|
191
|
+
description = (
|
|
192
|
+
f'Retrieves an entry from the dataset {name!r} that matches the given parameters.\n\nParameters:\n'
|
|
193
|
+
)
|
|
194
|
+
description += '\n'.join(
|
|
195
|
+
[f' {col_ref.col.name}: of type `{col_ref.col.col_type._to_base_str()}`' for col_ref in col_refs]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
fn = func.QueryTemplateFunction(df, query_signature, name=name, comment=description)
|
|
199
|
+
return fn
|