uer-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +633 -0
- package/bin/uer-mcp.js +118 -0
- package/package.json +46 -0
- package/python/README.md +633 -0
- package/python/pyproject.toml +127 -0
- package/python/src/uer/__init__.py +17 -0
- package/python/src/uer/__main__.py +12 -0
- package/python/src/uer/__pycache__/__init__.cpython-313.pyc +0 -0
- package/python/src/uer/__pycache__/server.cpython-313.pyc +0 -0
- package/python/src/uer/llm/__init__.py +0 -0
- package/python/src/uer/llm/__pycache__/__init__.cpython-313.pyc +0 -0
- package/python/src/uer/llm/__pycache__/gateway.cpython-313.pyc +0 -0
- package/python/src/uer/llm/gateway.py +87 -0
- package/python/src/uer/models/__init__.py +0 -0
- package/python/src/uer/models/__pycache__/__init__.cpython-313.pyc +0 -0
- package/python/src/uer/models/__pycache__/llm.cpython-313.pyc +0 -0
- package/python/src/uer/models/llm.py +109 -0
- package/python/src/uer/server.py +186 -0
- package/python/uv.lock +2922 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "uer"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "UER - Universal Expert Registry: Manipulation detection for AI systems"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Margus Martsepp" },
|
|
10
|
+
{ name = "Marco Lackovic" },
|
|
11
|
+
{ name = "Martin Martsepp" },
|
|
12
|
+
{ name = "Zane Estere" },
|
|
13
|
+
{ name = "Anirudh" },
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
dependencies = [
|
|
17
|
+
"litellm>=1.40.0",
|
|
18
|
+
"datasets>=2.18.0",
|
|
19
|
+
"huggingface-hub>=0.20.0",
|
|
20
|
+
"mcp>=1.0.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = [
|
|
25
|
+
"black>=24.1.1",
|
|
26
|
+
"ruff>=0.1.14",
|
|
27
|
+
"mypy>=1.8.0",
|
|
28
|
+
"pytest>=8.0.0",
|
|
29
|
+
"pytest-asyncio>=0.23.0",
|
|
30
|
+
"pytest-cov>=4.1.0",
|
|
31
|
+
"pre-commit>=3.6.0",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["hatchling"]
|
|
36
|
+
build-backend = "hatchling.build"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/uer"]
|
|
40
|
+
|
|
41
|
+
[tool.black]
|
|
42
|
+
line-length = 100
|
|
43
|
+
target-version = ['py311']
|
|
44
|
+
skip-string-normalization = false
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
line-length = 100
|
|
48
|
+
target-version = "py311"
|
|
49
|
+
|
|
50
|
+
[tool.ruff.lint]
|
|
51
|
+
select = [
|
|
52
|
+
"E", # pycodestyle errors
|
|
53
|
+
"F", # pyflakes
|
|
54
|
+
"I", # isort
|
|
55
|
+
"N", # pep8-naming
|
|
56
|
+
"W", # pycodestyle warnings
|
|
57
|
+
"UP", # pyupgrade
|
|
58
|
+
"B", # flake8-bugbear
|
|
59
|
+
"C4", # flake8-comprehensions
|
|
60
|
+
"SIM", # flake8-simplify
|
|
61
|
+
]
|
|
62
|
+
ignore = []
|
|
63
|
+
|
|
64
|
+
[tool.ruff.lint.per-file-ignores]
|
|
65
|
+
"__init__.py" = ["F401"] # Allow unused imports in __init__.py
|
|
66
|
+
"tests/**/*.py" = ["S101"] # Allow assert statements in tests
|
|
67
|
+
"context/scripts/*.py" = ["E501"] # Allow long lines in test scripts
|
|
68
|
+
"seed_datasets.py" = ["E501"] # Allow long lines in setup script
|
|
69
|
+
"src/uer/models/*.py" = ["E501", "N805"] # Allow long lines in descriptions, Pydantic validators use cls
|
|
70
|
+
"src/uer/server.py" = ["E501"] # Allow long lines in tool descriptions
|
|
71
|
+
|
|
72
|
+
[tool.mypy]
|
|
73
|
+
python_version = "3.11"
|
|
74
|
+
strict = true
|
|
75
|
+
warn_return_any = true
|
|
76
|
+
warn_unused_configs = true
|
|
77
|
+
disallow_untyped_defs = true
|
|
78
|
+
disallow_any_generics = true
|
|
79
|
+
check_untyped_defs = true
|
|
80
|
+
no_implicit_optional = true
|
|
81
|
+
warn_redundant_casts = true
|
|
82
|
+
warn_unused_ignores = true
|
|
83
|
+
|
|
84
|
+
[[tool.mypy.overrides]]
|
|
85
|
+
module = [
|
|
86
|
+
"datasets.*",
|
|
87
|
+
"huggingface_hub.*",
|
|
88
|
+
"litellm.*",
|
|
89
|
+
"mcp.*",
|
|
90
|
+
"pydantic.*",
|
|
91
|
+
]
|
|
92
|
+
ignore_missing_imports = true
|
|
93
|
+
|
|
94
|
+
[[tool.mypy.overrides]]
|
|
95
|
+
module = [
|
|
96
|
+
"seed_datasets",
|
|
97
|
+
"context.scripts.*",
|
|
98
|
+
]
|
|
99
|
+
disallow_untyped_defs = false
|
|
100
|
+
check_untyped_defs = false
|
|
101
|
+
|
|
102
|
+
[tool.pytest.ini_options]
|
|
103
|
+
testpaths = ["tests"]
|
|
104
|
+
asyncio_mode = "auto"
|
|
105
|
+
addopts = [
|
|
106
|
+
"--cov=.",
|
|
107
|
+
"--cov-report=term-missing",
|
|
108
|
+
"--cov-report=html",
|
|
109
|
+
"-v",
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
[tool.coverage.run]
|
|
113
|
+
omit = [
|
|
114
|
+
"tests/*",
|
|
115
|
+
"context/*",
|
|
116
|
+
"*/site-packages/*",
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
[tool.coverage.report]
|
|
120
|
+
exclude_lines = [
|
|
121
|
+
"pragma: no cover",
|
|
122
|
+
"def __repr__",
|
|
123
|
+
"raise AssertionError",
|
|
124
|
+
"raise NotImplementedError",
|
|
125
|
+
"if __name__ == .__main__.:",
|
|
126
|
+
"if TYPE_CHECKING:",
|
|
127
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UER - Universal Expert Registry.
|
|
3
|
+
|
|
4
|
+
An MCP server providing universal LLM access via LiteLLM.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
# Export main components for programmatic use
|
|
10
|
+
from uer.llm.gateway import LLMGateway
|
|
11
|
+
from uer.models.llm import LLMCallRequest, LLMCallResponse
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"LLMGateway",
|
|
15
|
+
"LLMCallRequest",
|
|
16
|
+
"LLMCallResponse",
|
|
17
|
+
]
|
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""LiteLLM Gateway - Unified interface to 100+ LLM providers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import litellm
|
|
7
|
+
from litellm import acompletion
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LLMGateway:
|
|
11
|
+
"""Unified gateway to LLM providers via LiteLLM."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
"""Initialize gateway and detect available providers."""
|
|
15
|
+
self.available_providers = self._detect_providers()
|
|
16
|
+
|
|
17
|
+
# Configure LiteLLM
|
|
18
|
+
litellm.set_verbose = False
|
|
19
|
+
litellm.drop_params = True # Drop unsupported params instead of erroring
|
|
20
|
+
|
|
21
|
+
def _detect_providers(self) -> list[str]:
|
|
22
|
+
"""Detect which LLM providers have API keys configured."""
|
|
23
|
+
providers = []
|
|
24
|
+
|
|
25
|
+
if os.getenv("GEMINI_API_KEY"):
|
|
26
|
+
providers.append("gemini")
|
|
27
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
28
|
+
providers.append("anthropic")
|
|
29
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
30
|
+
providers.append("openai")
|
|
31
|
+
if os.getenv("AWS_ACCESS_KEY_ID"):
|
|
32
|
+
providers.append("bedrock")
|
|
33
|
+
if os.getenv("AZURE_API_KEY"):
|
|
34
|
+
providers.append("azure")
|
|
35
|
+
|
|
36
|
+
return providers
|
|
37
|
+
|
|
38
|
+
async def call(
|
|
39
|
+
self, model: str, messages: list[dict[str, str]], **kwargs: Any
|
|
40
|
+
) -> dict[str, Any]:
|
|
41
|
+
"""
|
|
42
|
+
Call any LLM via LiteLLM.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
model: LiteLLM identifier (e.g., "gemini/gemini-3-flash-preview")
|
|
46
|
+
messages: List of chat messages with role and content
|
|
47
|
+
**kwargs: Additional parameters (temperature, max_tokens, response_format,
|
|
48
|
+
thinking_level, thinking_budget, etc.)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
OpenAI-compatible response dict
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: Invalid model format
|
|
55
|
+
RuntimeError: API key missing or call failed
|
|
56
|
+
"""
|
|
57
|
+
# Validate model format
|
|
58
|
+
if "/" not in model:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Invalid model format: '{model}'. "
|
|
61
|
+
f"Expected 'provider/model' (e.g., 'gemini/gemini-3-flash-preview')"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
provider = model.split("/")[0]
|
|
65
|
+
|
|
66
|
+
# Check if provider is available
|
|
67
|
+
if provider not in self.available_providers:
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
f"Provider '{provider}' not available. "
|
|
70
|
+
f"Please set the appropriate API key environment variable. "
|
|
71
|
+
f"Available providers: {', '.join(self.available_providers) or 'none'}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Call LiteLLM with all parameters
|
|
76
|
+
# Note: response_format, thinking_level, thinking_budget are passed through **kwargs
|
|
77
|
+
response = await acompletion(model=model, messages=messages, **kwargs)
|
|
78
|
+
|
|
79
|
+
# Convert to dict (LiteLLM returns ModelResponse object)
|
|
80
|
+
return response.model_dump()
|
|
81
|
+
|
|
82
|
+
except Exception as e:
|
|
83
|
+
raise RuntimeError(f"LLM call failed for model '{model}': {str(e)}") from e
|
|
84
|
+
|
|
85
|
+
def get_available_providers(self) -> list[str]:
|
|
86
|
+
"""Get list of available providers."""
|
|
87
|
+
return self.available_providers
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Pydantic models for LLM call requests and responses."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, validator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LLMCallRequest(BaseModel):
|
|
9
|
+
"""Request for llm_call tool with validation."""
|
|
10
|
+
|
|
11
|
+
model: str = Field(
|
|
12
|
+
..., description="LiteLLM model identifier (e.g., 'gemini/gemini-3-flash-preview')"
|
|
13
|
+
)
|
|
14
|
+
messages: list[dict[str, str]] = Field(
|
|
15
|
+
..., min_length=1, description="List of chat messages with 'role' and 'content' keys"
|
|
16
|
+
)
|
|
17
|
+
temperature: float | None = Field(
|
|
18
|
+
default=0.7, ge=0.0, le=2.0, description="Sampling temperature"
|
|
19
|
+
)
|
|
20
|
+
max_tokens: int | None = Field(default=4096, ge=1, description="Maximum tokens to generate")
|
|
21
|
+
|
|
22
|
+
# Structured Output Support
|
|
23
|
+
response_format: dict[str, Any] | None = Field(
|
|
24
|
+
default=None,
|
|
25
|
+
description=(
|
|
26
|
+
"Force structured JSON output matching a schema. "
|
|
27
|
+
"Format: {'type': 'json_schema', 'json_schema': {'name': '...', 'schema': {...}, 'strict': True}}. "
|
|
28
|
+
"The calling LLM can generate schemas dynamically based on the task."
|
|
29
|
+
),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Chain of Thought Support (Gemini-specific)
|
|
33
|
+
thinking_level: Literal["minimal", "low", "medium", "high"] | None = Field(
|
|
34
|
+
default=None,
|
|
35
|
+
description=(
|
|
36
|
+
"Gemini 3 reasoning level. Use 'high' for complex tasks requiring deep reasoning, "
|
|
37
|
+
"'low' for simple tasks. Defaults to 'high' if not specified."
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
thinking_budget: int | None = Field(
|
|
41
|
+
default=None,
|
|
42
|
+
ge=-1,
|
|
43
|
+
le=32768,
|
|
44
|
+
description=(
|
|
45
|
+
"Gemini 2.5 thinking tokens budget (128-32768, or -1 for dynamic). "
|
|
46
|
+
"Use -1 for automatic complexity-based allocation."
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Tool Use Support
|
|
51
|
+
tools: list[dict[str, Any]] | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description=(
|
|
54
|
+
"List of tools the model can use in OpenAI format. "
|
|
55
|
+
"For Claude models, use built-in tools like 'web_search_20250305' for web search. "
|
|
56
|
+
"For Gemini, tools include code_execution, google_search_retrieval, etc. "
|
|
57
|
+
"Example: [{'type': 'web_search_20250305'}] for Claude web search."
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@validator("model")
|
|
62
|
+
def validate_model_format(cls, v: str) -> str:
|
|
63
|
+
"""Ensure model is in 'provider/model' format."""
|
|
64
|
+
if "/" not in v:
|
|
65
|
+
raise ValueError(f"Model must be in format 'provider/model', got '{v}'")
|
|
66
|
+
return v
|
|
67
|
+
|
|
68
|
+
@validator("messages")
|
|
69
|
+
def validate_messages(cls, v: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
70
|
+
"""Ensure messages have required fields."""
|
|
71
|
+
for msg in v:
|
|
72
|
+
if "role" not in msg:
|
|
73
|
+
raise ValueError("Each message must have 'role' field")
|
|
74
|
+
if "content" not in msg:
|
|
75
|
+
raise ValueError("Each message must have 'content' field")
|
|
76
|
+
return v
|
|
77
|
+
|
|
78
|
+
@validator("response_format")
|
|
79
|
+
def validate_response_format(cls, v: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
80
|
+
"""Validate response_format structure."""
|
|
81
|
+
if v is not None:
|
|
82
|
+
if "type" not in v:
|
|
83
|
+
raise ValueError("response_format must have 'type' field")
|
|
84
|
+
if v["type"] == "json_schema" and "json_schema" not in v:
|
|
85
|
+
raise ValueError("json_schema response_format must have 'json_schema' field")
|
|
86
|
+
return v
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class LLMCallResponse(BaseModel):
|
|
90
|
+
"""Response from llm_call tool."""
|
|
91
|
+
|
|
92
|
+
model: str
|
|
93
|
+
choices: list[dict[str, Any]]
|
|
94
|
+
usage: dict[str, int] | None = None
|
|
95
|
+
|
|
96
|
+
class Config:
|
|
97
|
+
"""Pydantic config."""
|
|
98
|
+
|
|
99
|
+
json_schema_extra = {
|
|
100
|
+
"examples": [
|
|
101
|
+
{
|
|
102
|
+
"model": "gemini/gemini-3-flash-preview",
|
|
103
|
+
"choices": [
|
|
104
|
+
{"message": {"role": "assistant", "content": "Hello! How can I help you?"}}
|
|
105
|
+
],
|
|
106
|
+
"usage": {"total_tokens": 25},
|
|
107
|
+
}
|
|
108
|
+
]
|
|
109
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""UER MCP Server - Universal Expert Registry."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from mcp.server import Server
|
|
10
|
+
from mcp.server.stdio import stdio_server
|
|
11
|
+
from mcp.types import TextContent, Tool
|
|
12
|
+
|
|
13
|
+
from uer.llm.gateway import LLMGateway
|
|
14
|
+
from uer.models.llm import LLMCallRequest
|
|
15
|
+
|
|
16
|
+
# Configure logging
|
|
17
|
+
logging.basicConfig(
|
|
18
|
+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
19
|
+
)
|
|
20
|
+
logger = logging.getLogger("uer.server")
|
|
21
|
+
|
|
22
|
+
# Initialize MCP server
|
|
23
|
+
app = Server("uer")
|
|
24
|
+
gateway = LLMGateway()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.list_tools()
|
|
28
|
+
async def list_tools() -> list[Tool]:
|
|
29
|
+
"""List available MCP tools."""
|
|
30
|
+
available = gateway.get_available_providers()
|
|
31
|
+
|
|
32
|
+
return [
|
|
33
|
+
Tool(
|
|
34
|
+
name="llm_call",
|
|
35
|
+
description=(
|
|
36
|
+
"Call any LLM via LiteLLM unified interface. "
|
|
37
|
+
f"Available providers: {', '.join(available) or 'none'}. "
|
|
38
|
+
"Supports Anthropic (Claude), OpenAI (GPT), Google (Gemini), and 100+ more. "
|
|
39
|
+
"Features: Structured output (response_format), Chain of Thought (thinking_level/thinking_budget), "
|
|
40
|
+
"Tool use (tools) for web search, code execution, grounding, etc."
|
|
41
|
+
),
|
|
42
|
+
inputSchema={
|
|
43
|
+
"type": "object",
|
|
44
|
+
"properties": {
|
|
45
|
+
"model": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"description": "LiteLLM model identifier (e.g., 'gemini/gemini-3-flash-preview')",
|
|
48
|
+
},
|
|
49
|
+
"messages": {
|
|
50
|
+
"type": "array",
|
|
51
|
+
"description": "List of chat messages",
|
|
52
|
+
"items": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"properties": {
|
|
55
|
+
"role": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"enum": ["system", "user", "assistant"],
|
|
58
|
+
},
|
|
59
|
+
"content": {"type": "string"},
|
|
60
|
+
},
|
|
61
|
+
"required": ["role", "content"],
|
|
62
|
+
},
|
|
63
|
+
"minItems": 1,
|
|
64
|
+
},
|
|
65
|
+
"temperature": {
|
|
66
|
+
"type": "number",
|
|
67
|
+
"default": 0.7,
|
|
68
|
+
"minimum": 0,
|
|
69
|
+
"maximum": 2,
|
|
70
|
+
},
|
|
71
|
+
"max_tokens": {
|
|
72
|
+
"type": "integer",
|
|
73
|
+
"default": 4096,
|
|
74
|
+
"minimum": 1,
|
|
75
|
+
},
|
|
76
|
+
"response_format": {
|
|
77
|
+
"type": "object",
|
|
78
|
+
"description": (
|
|
79
|
+
"Force structured JSON output. The calling LLM can generate schemas dynamically. "
|
|
80
|
+
"Format: {'type': 'json_schema', 'json_schema': {'name': '...', 'schema': {...}, 'strict': True}}"
|
|
81
|
+
),
|
|
82
|
+
},
|
|
83
|
+
"thinking_level": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"enum": ["minimal", "low", "medium", "high"],
|
|
86
|
+
"description": "Gemini 3 reasoning level: 'high' for complex reasoning tasks, 'low' for simple ones",
|
|
87
|
+
},
|
|
88
|
+
"thinking_budget": {
|
|
89
|
+
"type": "integer",
|
|
90
|
+
"minimum": -1,
|
|
91
|
+
"maximum": 32768,
|
|
92
|
+
"description": "Gemini 2.5 thinking tokens (128-32768, or -1 for dynamic allocation)",
|
|
93
|
+
},
|
|
94
|
+
"tools": {
|
|
95
|
+
"type": "array",
|
|
96
|
+
"description": (
|
|
97
|
+
"List of tools the model can use. "
|
|
98
|
+
"Claude: [{'type': 'web_search_20250305'}] for web search, [{'type': 'bash_20250305'}] for code execution. "
|
|
99
|
+
"Gemini: [{'type': 'code_execution'}] or [{'type': 'google_search_retrieval'}]. "
|
|
100
|
+
"OpenAI: Standard function calling format."
|
|
101
|
+
),
|
|
102
|
+
"items": {"type": "object"},
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
"required": ["model", "messages"],
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.call_tool()
|
|
112
|
+
async def call_tool(name: str, arguments: Any) -> Sequence[TextContent]:
|
|
113
|
+
"""Handle tool invocation."""
|
|
114
|
+
if name != "llm_call":
|
|
115
|
+
raise ValueError(f"Unknown tool: {name}")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
# Validate input using Pydantic
|
|
119
|
+
request = LLMCallRequest(**arguments)
|
|
120
|
+
|
|
121
|
+
logger.info(
|
|
122
|
+
f"Calling LLM: model={request.model}, messages={len(request.messages)}, "
|
|
123
|
+
f"structured_output={request.response_format is not None}, "
|
|
124
|
+
f"thinking={request.thinking_level or request.thinking_budget}, "
|
|
125
|
+
f"tools={len(request.tools) if request.tools else 0}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Call LLM via gateway with all parameters
|
|
129
|
+
response = await gateway.call(
|
|
130
|
+
model=request.model,
|
|
131
|
+
messages=request.messages,
|
|
132
|
+
temperature=request.temperature,
|
|
133
|
+
max_tokens=request.max_tokens,
|
|
134
|
+
response_format=request.response_format,
|
|
135
|
+
thinking_level=request.thinking_level,
|
|
136
|
+
thinking_budget=request.thinking_budget,
|
|
137
|
+
tools=request.tools,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Log result
|
|
141
|
+
usage = response.get("usage", {})
|
|
142
|
+
logger.info(
|
|
143
|
+
f"LLM call successful: tokens={usage.get('total_tokens', 'unknown')}, "
|
|
144
|
+
f"model={response.get('model', 'unknown')}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Return response as text content
|
|
148
|
+
return [TextContent(type="text", text=json.dumps(response, indent=2))]
|
|
149
|
+
|
|
150
|
+
except ValueError as e:
|
|
151
|
+
# Validation error from Pydantic
|
|
152
|
+
logger.error(f"Invalid input: {str(e)}")
|
|
153
|
+
return [
|
|
154
|
+
TextContent(type="text", text=json.dumps({"error": "Invalid input", "message": str(e)}))
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
except RuntimeError as e:
|
|
158
|
+
# Error from LLMGateway
|
|
159
|
+
logger.error(f"LLM call failed: {str(e)}")
|
|
160
|
+
return [
|
|
161
|
+
TextContent(
|
|
162
|
+
type="text", text=json.dumps({"error": "LLM call failed", "message": str(e)})
|
|
163
|
+
)
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
# Unexpected error
|
|
168
|
+
logger.exception(f"Unexpected error: {str(e)}")
|
|
169
|
+
return [
|
|
170
|
+
TextContent(
|
|
171
|
+
type="text", text=json.dumps({"error": "Internal error", "message": str(e)})
|
|
172
|
+
)
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
async def main() -> None:
|
|
177
|
+
"""Run the MCP server via stdio transport."""
|
|
178
|
+
logger.info("Starting UER MCP server...")
|
|
179
|
+
logger.info(f"Available providers: {gateway.get_available_providers()}")
|
|
180
|
+
|
|
181
|
+
async with stdio_server() as (read_stream, write_stream):
|
|
182
|
+
await app.run(read_stream, write_stream, app.create_initialization_options())
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
if __name__ == "__main__":
|
|
186
|
+
asyncio.run(main())
|