PyPI - ailoy-py - Versions diffs - 0.0.1__cp310-cp310-win_amd64.whl → 0.0.3__cp310-cp310-win_amd64.whl - Mend

ailoy-py 0.0.1__cp310-cp310-win_amd64.whl → 0.0.3__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ailoy/__init__.py +20 -1
ailoy/agent.py +349 -309
ailoy/ailoy_py.cp310-win_amd64.pyd +0 -0
ailoy/mcp.py +171 -0
ailoy/models/__init__.py +7 -0
ailoy/models/api_model.py +71 -0
ailoy/models/local_model.py +44 -0
ailoy/runtime.py +34 -19
ailoy/tools.py +205 -0
ailoy/utils/__init__.py +0 -0
ailoy/utils/image.py +11 -0
ailoy/vector_store.py +10 -9
ailoy_py-0.0.3.dist-info/DELVEWHEEL +2 -0
{ailoy_py-0.0.1.dist-info → ailoy_py-0.0.3.dist-info}/METADATA +5 -4
ailoy_py-0.0.3.dist-info/RECORD +27 -0
{ailoy_py-0.0.1.dist-info → ailoy_py-0.0.3.dist-info}/WHEEL +1 -1
ailoy_py.libs/msvcp140-0c97ddc05c5b9024aa6af9538804ea77.dll +0 -0
ailoy_py.libs/tvm_runtime-b9e3c7109c2f4b1e95a6f576ff368094.dll +0 -0
ailoy_py-0.0.1.dist-info/DELVEWHEEL +0 -2
ailoy_py-0.0.1.dist-info/RECORD +0 -20
ailoy_py.libs/msvcp140-9867ece6bcf7e4746fa7e6671b0a17bd.dll +0 -0
ailoy_py.libs/tvm_runtime-781b77698d9c76cd695ed4ae13795465.dll +0 -0
{ailoy_py-0.0.1.dist-info → ailoy_py-0.0.3.dist-info}/entry_points.txt +0 -0

ailoy/agent.py CHANGED Viewed

@@ -1,211 +1,185 @@
+import base64
 import json
-import subprocess
 import warnings
 from abc import ABC, abstractmethod
-from collections.abc import Awaitable, Callable, Generator
+from collections.abc import Callable, Generator
+from functools import partial
 from pathlib import Path
 from typing import (
+    Annotated,
     Any,
     Literal,
     Optional,
-    TypeVar,
     Union,
 )
 from urllib.parse import urlencode, urlparse, urlunparse
 import jmespath
-import mcp
-import mcp.types as mcp_types
-from pydantic import BaseModel, ConfigDict, Field
+from PIL.Image import Image
+from pydantic import BaseModel, ConfigDict, Field, TypeAdapter
 from rich.console import Console
 from rich.panel import Panel
 from ailoy.ailoy_py import generate_uuid
+from ailoy.mcp import MCPServer, MCPTool, StdioServerParameters
+from ailoy.models import APIModel, LocalModel
 from ailoy.runtime import Runtime
+from ailoy.tools import DocstringParsingException, TypeHintParsingException, get_json_schema
+from ailoy.utils.image import pillow_image_to_base64
-__all__ = ["Agent"]
+## Types for internal data structures
-## Types for OpenAI API-compatible data structures
-class SystemMessage(BaseModel):
-    role: Literal["system"]
-    content: str
+class TextContent(BaseModel):
+    type: Literal["text"] = "text"
+    text: str
-class UserMessage(BaseModel):
-    role: Literal["user"]
-    content: str
+class ImageContent(BaseModel):
+    class UrlData(BaseModel):
+        url: str
+    type: Literal["image_url"] = "image_url"
+    image_url: UrlData
-class AIOutputTextMessage(BaseModel):
-    role: Literal["assistant"]
-    content: str
-    reasoning: Optional[bool] = None
-class AIToolCallMessage(BaseModel):
-    role: Literal["assistant"]
-    content: None
-    tool_calls: list["ToolCall"]
-class ToolCall(BaseModel):
-    id: str
-    type: Literal["function"] = "function"
-    function: "ToolCallFunction"
+    @staticmethod
+    def from_url(url: str):
+        return ImageContent(image_url={"url": url})
+    @staticmethod
+    def from_pillow(image: Image):
+        return ImageContent(image_url={"url": pillow_image_to_base64(image)})
-class ToolCallFunction(BaseModel):
-    name: str
-    arguments: dict[str, Any]
+class AudioContent(BaseModel):
+    class AudioData(BaseModel):
+        data: str
+        format: Literal["mp3", "wav"]
-class ToolCallResultMessage(BaseModel):
-    role: Literal["tool"]
-    name: str
-    tool_call_id: str
-    content: str
+    type: Literal["input_audio"] = "input_audio"
+    input_audio: AudioData
+    @staticmethod
+    def from_bytes(data: bytes, format: Literal["mp3", "wav"]):
+        return AudioContent(input_audio={"data": base64.b64encode(data).decode("utf-8"), "format": format})
-Message = Union[
-    SystemMessage,
-    UserMessage,
-    AIOutputTextMessage,
-    AIToolCallMessage,
-    ToolCallResultMessage,
-]
+class FunctionData(BaseModel):
+    class FunctionBody(BaseModel):
+        name: str
+        arguments: Any
-class MessageDelta(BaseModel):
-    finish_reason: Optional[Literal["stop", "tool_calls", "length", "error"]]
-    message: Message
+    type: Literal["function"] = "function"
+    id: Optional[str] = None
+    function: FunctionBody
-## Types for LLM Model Definitions
+class SystemMessage(BaseModel):
+    role: Literal["system"] = "system"
+    content: str | list[TextContent]
-TVMModelName = Literal["Qwen/Qwen3-0.6B", "Qwen/Qwen3-1.7B", "Qwen/Qwen3-4B", "Qwen/Qwen3-8B"]
-OpenAIModelName = Literal["gpt-4o"]
-ModelName = Union[TVMModelName, OpenAIModelName]
+class UserMessage(BaseModel):
+    role: Literal["user"] = "user"
+    content: str | list[TextContent | ImageContent | AudioContent]
-class TVMModel(BaseModel):
-    name: TVMModelName
-    quantization: Optional[Literal["q4f16_1"]] = None
-    mode: Optional[Literal["interactive"]] = None
+class AssistantMessage(BaseModel):
+    role: Literal["assistant"] = "assistant"
+    content: Optional[str | list[TextContent]] = None
+    name: Optional[str] = None
+    tool_calls: Optional[list[FunctionData]] = None
-class OpenAIModel(BaseModel):
-    name: OpenAIModelName
-    api_key: str
+    # Non-OpenAI fields
+    reasoning: Optional[list[TextContent]] = None
-class ModelDescription(BaseModel):
-    model_id: str
-    component_type: str
-    default_system_message: Optional[str] = None
+class ToolMessage(BaseModel):
+    role: Literal["tool"] = "tool"
+    content: str | list[TextContent]
+    tool_call_id: Optional[str] = None
-model_descriptions: dict[ModelName, ModelDescription] = {
-    "Qwen/Qwen3-0.6B": ModelDescription(
-        model_id="Qwen/Qwen3-0.6B",
-        component_type="tvm_language_model",
-        default_system_message="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-    ),
-    "Qwen/Qwen3-1.7B": ModelDescription(
-        model_id="Qwen/Qwen3-1.7B",
-        component_type="tvm_language_model",
-        default_system_message="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-    ),
-    "Qwen/Qwen3-4B": ModelDescription(
-        model_id="Qwen/Qwen3-4B",
-        component_type="tvm_language_model",
-        default_system_message="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-    ),
-    "Qwen/Qwen3-8B": ModelDescription(
-        model_id="Qwen/Qwen3-8B",
-        component_type="tvm_language_model",
-        default_system_message="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-    ),
-    "gpt-4o": ModelDescription(
-        model_id="gpt-4o",
-        component_type="openai",
-    ),
-}
+Message = Union[
+    SystemMessage,
+    UserMessage,
+    AssistantMessage,
+    ToolMessage,
+]
-class ComponentState(BaseModel):
-    name: str
-    valid: bool
+class MessageOutput(BaseModel):
+    message: AssistantMessage
+    finish_reason: Optional[Literal["stop", "tool_calls", "invalid_tool_call", "length", "error"]] = None
 ## Types for agent's responses
-_console = Console(highlight=False)
+_console = Console(highlight=False, force_jupyter=False, force_terminal=True)
-class AgentResponseBase(BaseModel):
-    type: Literal["output_text", "tool_call", "tool_call_result", "reasoning", "error"]
-    end_of_turn: bool
-    role: Literal["assistant", "tool"]
-    content: Any
-    def print(self):
-        raise NotImplementedError
-class AgentResponseOutputText(AgentResponseBase):
+class AgentResponseOutputText(BaseModel):
     type: Literal["output_text", "reasoning"]
-    role: Literal["assistant"]
+    role: Literal["assistant"] = "assistant"
+    is_type_switched: bool = False
     content: str
     def print(self):
+        if self.is_type_switched:
+            _console.print()  # add newline if type has been switched
         _console.print(self.content, end="", style=("yellow" if self.type == "reasoning" else None))
-        if self.end_of_turn:
-            _console.print()
-class AgentResponseToolCall(AgentResponseBase):
-    type: Literal["tool_call"]
-    role: Literal["assistant"]
-    content: ToolCall
+class AgentResponseToolCall(BaseModel):
+    type: Literal["tool_call"] = "tool_call"
+    role: Literal["assistant"] = "assistant"
+    is_type_switched: bool = False
+    content: FunctionData
     def print(self):
+        title = f"[magenta]Tool Call[/magenta]: [bold]{self.content.function.name}[/bold]"
+        if self.content.id is not None and len(self.content.id) > 0:
+            title += f" ({self.content.id})"
         panel = Panel(
             json.dumps(self.content.function.arguments, indent=2),
-            title=f"[magenta]Tool Call[/magenta]: [bold]{self.content.function.name}[/bold] ({self.content.id})",
+            title=title,
             title_align="left",
         )
         _console.print(panel)
-class AgentResponseToolCallResult(AgentResponseBase):
-    type: Literal["tool_call_result"]
-    role: Literal["tool"]
-    content: ToolCallResultMessage
+class AgentResponseToolResult(BaseModel):
+    type: Literal["tool_call_result"] = "tool_call_result"
+    role: Literal["tool"] = "tool"
+    is_type_switched: bool = False
+    content: ToolMessage
     def print(self):
         try:
             # Try to parse as json
-            content = json.dumps(json.loads(self.content.content), indent=2)
+            content = json.dumps(json.loads(self.content.content[0].text), indent=2)
         except json.JSONDecodeError:
             # Use original content if not json deserializable
-            content = self.content.content
+            content = self.content.content[0].text
         # Truncate long contents
         if len(content) > 500:
             content = content[:500] + "...(truncated)"
+        title = "[green]Tool Result[/green]"
+        if self.content.tool_call_id is not None and len(self.content.tool_call_id) > 0:
+            title += f" ({self.content.tool_call_id})"
         panel = Panel(
             content,
-            title=f"[green]Tool Result[/green]: [bold]{self.content.name}[/bold] ({self.content.tool_call_id})",
+            title=title,
             title_align="left",
         )
         _console.print(panel)
-class AgentResponseError(AgentResponseBase):
-    type: Literal["error"]
-    role: Literal["assistant"]
+class AgentResponseError(BaseModel):
+    type: Literal["error"] = "error"
+    role: Literal["assistant"] = "assistant"
+    is_type_switched: bool = False
     content: str
     def print(self):
@@ -219,7 +193,7 @@ class AgentResponseError(AgentResponseBase):
 AgentResponse = Union[
     AgentResponseOutputText,
     AgentResponseToolCall,
-    AgentResponseToolCallResult,
+    AgentResponseToolResult,
     AgentResponseError,
 ]
@@ -305,22 +279,6 @@ class BearerAuthenticator(ToolAuthenticator):
         return {**request, "headers": headers}
-T_Retval = TypeVar("T_Retval")
-def run_async(coro: Callable[..., Awaitable[T_Retval]]) -> T_Retval:
-    try:
-        import anyio
-        # Running outside async loop
-        return anyio.run(lambda: coro)
-    except RuntimeError:
-        import anyio.from_thread
-        # Already in a running event loop: use anyio from_thread
-        return anyio.from_thread.run(coro)
 class Agent:
     """
     The `Agent` class provides a high-level interface for interacting with large language models (LLMs) in Ailoy.
@@ -334,39 +292,37 @@ class Agent:
     def __init__(
         self,
         runtime: Runtime,
-        model_name: ModelName,
+        model: APIModel | LocalModel,
         system_message: Optional[str] = None,
-        api_key: Optional[str] = None,
-        attrs: dict[str, Any] = dict(),
     ):
         """
         Create an instance.
         :param runtime: The runtime environment associated with the agent.
-        :param model_name: The name of the LLM model to use.
+        :param model: The model instance.
         :param system_message: Optional system message to set the initial assistant context.
-        :param api_key: (web agent only) The API key for AI API.
-        :param attrs: Additional initialization parameters (for `define_component` runtime call)
         :raises ValueError: If model name is not supported or validation fails.
         """
         self._runtime = runtime
         # Initialize component state
-        self._component_state = ComponentState(
-            name=generate_uuid(),
-            valid=False,
-        )
+        self._component_name = generate_uuid()
+        self._component_ready = False
         # Initialize messages
         self._messages: list[Message] = []
-        if system_message:
-            self._messages.append(SystemMessage(role="system", content=system_message))
+        # Initialize system message
+        self._system_message = system_message
         # Initialize tools
         self._tools: list[Tool] = []
+        # Initialize MCP servers
+        self._mcp_servers: list[MCPServer] = []
         # Define the component
-        self.define(model_name, api_key=api_key, attrs=attrs)
+        self.define(model)
     def __del__(self):
         self.delete()
@@ -377,151 +333,216 @@ class Agent:
     def __exit__(self, type, value, traceback):
         self.delete()
-    def define(self, model_name: ModelName, api_key: Optional[str] = None, attrs: dict[str, Any] = dict()) -> None:
+    def define(self, model: APIModel | LocalModel) -> None:
         """
         Initializes the agent by defining its model in the runtime.
         This must be called before running the agent. If already initialized, this is a no-op.
-        :param model_name: The name of the LLM model to use.
-        :param api_key: (web agent only) The API key for AI API.
-        :param attrs: Additional initialization parameters (for `define_component` runtime call)
+        :param model: The model instance.
         """
-        if self._component_state.valid:
+        if self._component_ready:
             return
-        if model_name not in model_descriptions:
-            raise ValueError(f"Model `{model_name}` not supported")
+        if not self._runtime.is_alive():
+            raise ValueError("Runtime is currently stopped.")
-        model_desc = model_descriptions[model_name]
+        # Set default system message if not given; still can be None
+        if self._system_message is None:
+            self._system_message = getattr(model, "default_system_message", None)
-        # Add model name into attrs
-        if "model" not in attrs:
-            attrs["model"] = model_desc.model_id
-        # Set default system message
-        if len(self._messages) == 0 and model_desc.default_system_message:
-            self._messages.append(SystemMessage(role="system", content=model_desc.default_system_message))
-        # Add API key
-        if api_key:
-            attrs["api_key"] = api_key
+        self.clear_messages()
         # Call runtime's define
         self._runtime.define(
-            model_descriptions[model_name].component_type,
-            self._component_state.name,
-            attrs,
+            model.component_type,
+            self._component_name,
+            model.to_attrs(),
         )
         # Mark as defined
-        self._component_state.valid = True
+        self._component_ready = True
     def delete(self) -> None:
         """
         Deinitializes the agent and releases resources in the runtime.
         This should be called when the agent is no longer needed. If already deinitialized, this is a no-op.
         """
-        if not self._component_state.valid:
+        if not self._component_ready:
             return
-        self._runtime.delete(self._component_state.name)
-        if len(self._messages) > 0 and self._messages[0].role == "system":
-            self._messages = [self._messages[0]]
-        else:
-            self._messages = []
-        self._component_state.valid = False
+        if self._runtime.is_alive():
+            self._runtime.delete(self._component_name)
+        self.clear_messages()
+        for mcp_server in self._mcp_servers:
+            mcp_server.cleanup()
+        self._component_ready = False
     def query(
         self,
-        message: str,
-        enable_reasoning: bool = False,
-        ignore_reasoning_messages: bool = False,
+        message: str | list[str | Image | dict | TextContent | ImageContent | AudioContent],
+        reasoning: bool = False,
     ) -> Generator[AgentResponse, None, None]:
         """
         Runs the agent with a new user message and yields streamed responses.
         :param message: The user message to send to the model.
-        :param enable_reasoning: If True, enables reasoning capabilities. (default: False)
-        :param ignore_reasoning_messages: If True, reasoning steps are not included in the response stream. (default: False)
-        :yield: AgentResponse output of the LLM inference or tool calls
+        :param reasoning: If True, enables reasoning capabilities. (Default: False)
+        :return: An iterator over the output, where each item represents either a generated token from the assistant or a tool call.
+        :rtype: Iterator[:class:`AgentResponse`]
         """  # noqa: E501
-        self._messages.append(UserMessage(role="user", content=message))
+        if not self._component_ready:
+            raise ValueError("Agent is not valid. Create one or define newly.")
+        if not self._runtime.is_alive():
+            raise ValueError("Runtime is currently stopped.")
+        if isinstance(message, str):
+            self._messages.append(UserMessage(content=[TextContent(text=message)]))
+        elif isinstance(message, list):
+            if len(message) == 0:
+                raise ValueError("Message is empty")
+            contents = []
+            for content in message:
+                if isinstance(content, str):
+                    contents.append(TextContent(text=content))
+                elif isinstance(content, Image):
+                    contents.append(ImageContent.from_pillow(image=content))
+                elif isinstance(content, dict):
+                    ta: TypeAdapter[TextContent | ImageContent | AudioContent] = TypeAdapter(
+                        Annotated[TextContent | ImageContent | AudioContent, Field(discriminator="type")]
+                    )
+                    validated_content = ta.validate_python(content)
+                    contents.append(validated_content)
+                else:
+                    contents.append(content)
+            self._messages.append(UserMessage(content=contents))
+        else:
+            raise ValueError(f"Invalid message type: {type(message)}")
+        prev_resp_type = None
         while True:
             infer_args = {
-                "messages": [msg.model_dump() for msg in self._messages],
-                "tools": [{"type": "function", "function": t.desc.model_dump()} for t in self._tools],
+                "messages": [msg.model_dump(exclude_none=True) for msg in self._messages],
+                "tools": [{"type": "function", "function": t.desc.model_dump(exclude_none=True)} for t in self._tools],
             }
-            if enable_reasoning:
-                infer_args["enable_reasoning"] = enable_reasoning
-            if ignore_reasoning_messages:
-                infer_args["ignore_reasoning_messages"] = ignore_reasoning_messages
-            for resp in self._runtime.call_iter_method(self._component_state.name, "infer", infer_args):
-                delta = MessageDelta.model_validate(resp)
-                if delta.finish_reason is None:
-                    output_msg = AIOutputTextMessage.model_validate(delta.message)
-                    yield AgentResponseOutputText(
-                        type="reasoning" if output_msg.reasoning else "output_text",
-                        end_of_turn=False,
-                        role="assistant",
-                        content=output_msg.content,
-                    )
-                    continue
-                if delta.finish_reason == "tool_calls":
-                    tool_call_message = AIToolCallMessage.model_validate(delta.message)
-                    self._messages.append(tool_call_message)
-                    for tool_call in tool_call_message.tool_calls:
-                        yield AgentResponseToolCall(
-                            type="tool_call",
-                            end_of_turn=True,
-                            role="assistant",
-                            content=tool_call,
+            if reasoning:
+                infer_args["reasoning"] = reasoning
+            assistant_reasoning = None
+            assistant_content = None
+            assistant_tool_calls = None
+            finish_reason = ""
+            for result in self._runtime.call_iter_method(self._component_name, "infer", infer_args):
+                msg = MessageOutput.model_validate(result)
+                if msg.message.reasoning:
+                    for v in msg.message.reasoning:
+                        if not assistant_reasoning:
+                            assistant_reasoning = [v]
+                        else:
+                            assistant_reasoning[0].text += v.text
+                        resp = AgentResponseOutputText(
+                            type="reasoning",
+                            is_type_switched=(prev_resp_type != "reasoning"),
+                            content=v.text,
                         )
-                    tool_call_results: list[ToolCallResultMessage] = []
-                    def run_tool(tool_call: ToolCall):
-                        tool_ = next(
-                            (t for t in self._tools if t.desc.name == tool_call.function.name),
-                            None,
+                        prev_resp_type = resp.type
+                        yield resp
+                if msg.message.content is not None:
+                    # Canonicalize message content to the array of TextContent
+                    if isinstance(msg.message.content, str):
+                        msg.message.content = [TextContent(text=msg.message.content)]
+                    for v in msg.message.content:
+                        if not assistant_content:
+                            assistant_content = [v]
+                        else:
+                            assistant_content[0].text += v.text
+                        resp = AgentResponseOutputText(
+                            type="output_text",
+                            is_type_switched=(prev_resp_type != "output_text"),
+                            content=v.text,
                         )
-                        if not tool_:
-                            raise RuntimeError("Tool not found")
-                        resp = tool_.call(**tool_call.function.arguments)
-                        return ToolCallResultMessage(
-                            role="tool",
-                            name=tool_call.function.name,
-                            tool_call_id=tool_call.id,
-                            content=json.dumps(resp),
+                        prev_resp_type = resp.type
+                        yield resp
+                if msg.message.tool_calls:
+                    for v in msg.message.tool_calls:
+                        if not assistant_tool_calls:
+                            assistant_tool_calls = [v]
+                        else:
+                            assistant_tool_calls.append(v)
+                        resp = AgentResponseToolCall(
+                            is_type_switched=True,
+                            content=v,
                         )
+                        prev_resp_type = resp.type
+                        yield resp
+                if msg.finish_reason:
+                    finish_reason = msg.finish_reason
+                    break
+            # Append output
+            self._messages.append(
+                AssistantMessage(
+                    reasoning=assistant_reasoning,
+                    content=assistant_content,
+                    tool_calls=assistant_tool_calls,
+                )
+            )
+            if finish_reason == "tool_calls":
+                def run_tool(tool_call: FunctionData) -> ToolMessage:
+                    tool_ = next(
+                        (t for t in self._tools if t.desc.name == tool_call.function.name),
+                        None,
+                    )
+                    if not tool_:
+                        raise RuntimeError("Tool not found")
+                    tool_result = tool_.call(**tool_call.function.arguments)
+                    return ToolMessage(
+                        content=[TextContent(text=json.dumps(tool_result))],
+                        tool_call_id=tool_call.id,
+                    )
-                    tool_call_results = [run_tool(tc) for tc in tool_call_message.tool_calls]
+                tool_call_results = [run_tool(tc) for tc in assistant_tool_calls]
+                for result_msg in tool_call_results:
+                    self._messages.append(result_msg)
+                    resp = AgentResponseToolResult(
+                        is_type_switched=True,
+                        content=result_msg,
+                    )
+                    prev_resp_type = resp.type
+                    yield resp
+                # Infer again if tool calls happened
+                continue
-                    for result_msg in tool_call_results:
-                        self._messages.append(result_msg)
-                        yield AgentResponseToolCallResult(
-                            type="tool_call_result",
-                            end_of_turn=True,
-                            role="tool",
-                            content=result_msg,
-                        )
+            # Finish this generator
+            yield AgentResponseOutputText(type="output_text", content="\n")
+            break
-                    # Run infer again with new messages
-                    break
+    def get_messages(self) -> list[Message]:
+        """
+        Get the current conversation history.
+        Each item in the list represents a message from either the user or the assistant.
-                if delta.finish_reason in ["stop", "length", "error"]:
-                    output_msg = AIOutputTextMessage.model_validate(delta.message)
-                    yield AgentResponseOutputText(
-                        type="reasoning" if output_msg.reasoning else "output_text",
-                        end_of_turn=True,
-                        role="assistant",
-                        content=output_msg.content,
-                    )
+        :return: The conversation history so far in the form of a list.
+        :rtype: list[Message]
+        """
+        return self._messages
-                    # finish this Generator
-                    return
+    def clear_messages(self):
+        """
+        Clear the history of conversation messages.
+        """
+        self._messages.clear()
+        if self._system_message is not None:
+            self._messages.append(SystemMessage(role="system", content=[TextContent(text=self._system_message)]))
     def print(self, resp: AgentResponse):
         resp.print()
@@ -537,14 +558,29 @@ class Agent:
             return
         self._tools.append(tool)
-    def add_py_function_tool(self, desc: dict, f: Callable[..., Any]):
+    def add_py_function_tool(self, f: Callable[..., Any], desc: Optional[dict] = None):
         """
         Adds a Python function as a tool using callable.
-        :param desc: Tool descriotion.
         :param f: Function will be called when the tool invocation occured.
+        :param desc: Tool description to override. If not given, parsed from docstring of function `f`.
+        :raises ValueError: Docstring parsing is failed.
+        :raises ValidationError: Given or parsed description is not a valid `ToolDescription`.
         """
-        self.add_tool(Tool(desc=ToolDescription.model_validate(desc), call_fn=f))
+        tool_description = None
+        if desc is not None:
+            tool_description = ToolDescription.model_validate(desc)
+        if tool_description is None:
+            try:
+                json_schema = get_json_schema(f)
+            except (TypeHintParsingException, DocstringParsingException) as e:
+                raise ValueError("Failed to parse docstring", e)
+            tool_description = ToolDescription.model_validate(json_schema.get("function", {}))
+        self.add_tool(Tool(desc=tool_description, call_fn=f))
     def add_builtin_tool(self, tool_def: BuiltinToolDefinition) -> bool:
         """
@@ -669,61 +705,65 @@ class Agent:
             else:
                 warnings.warn(f'Tool type "{tool_type}" is not supported. Skip adding tool "{tool_name}".')
-    def add_mcp_tool(self, params: mcp.StdioServerParameters, tool: mcp_types.Tool):
+    def add_tools_from_mcp_server(
+        self, name: str, params: StdioServerParameters, tools_to_add: Optional[list[str]] = None
+    ):
         """
-        Adds a tool from an MCP (Model Context Protocol) server.
+        Create a MCP server and register its tools to agent.
+        :param name: The unique name of the MCP server.
+                     If there's already a MCP server with the same name, it raises RuntimeError.
         :param params: Parameters for connecting to the MCP stdio server.
-        :param tool: Tool metadata as defined by MCP.
-        :returns: True if the tool was successfully added.
+        :param tools_to_add: Optional list of tool names to add. If None, all tools are added.
         """
-        from mcp.client.stdio import stdio_client
+        if any([s.name == name for s in self._mcp_servers]):
+            raise RuntimeError(f"MCP server with name '{name}' is already registered")
-        def call(**inputs: dict[str, Any]) -> Any:
-            async def _inner():
-                async with stdio_client(params, errlog=subprocess.STDOUT) as streams:
-                    async with mcp.ClientSession(*streams) as session:
-                        await session.initialize()
-                        result = await session.call_tool(tool.name, inputs)
-                        contents: list[str] = []
-                        for item in result.content:
-                            if isinstance(item, mcp_types.TextContent):
-                                contents.append(item.text)
-                            elif isinstance(item, mcp_types.ImageContent):
-                                contents.append(item.data)
-                            elif isinstance(item, mcp_types.EmbeddedResource):
-                                if isinstance(item.resource, mcp_types.TextResourceContents):
-                                    contents.append(item.resource.text)
-                                else:
-                                    contents.append(item.resource.blob)
-                        return contents
-            return run_async(_inner())
-        desc = ToolDescription(name=tool.name, description=tool.description, parameters=tool.inputSchema)
-        return self.add_tool(Tool(desc=desc, call_fn=call))
-    def add_tools_from_mcp_server(self, params: mcp.StdioServerParameters, tools_to_add: Optional[list[str]] = None):
+        # Create and register MCP server
+        mcp_server = MCPServer(name, params)
+        self._mcp_servers.append(mcp_server)
+        # Register tools
+        for tool in mcp_server.list_tools():
+            # Skip if this tool is not in the whitelist
+            if tools_to_add is not None and tool.name not in tools_to_add:
+                continue
+            desc = ToolDescription(
+                name=f"{name}-{tool.name}", description=tool.description, parameters=tool.inputSchema
+            )
+            def call(tool: MCPTool, **inputs: dict[str, Any]) -> list[str]:
+                return mcp_server.call_tool(tool, inputs)
+            self.add_tool(Tool(desc=desc, call_fn=partial(call, tool)))
+    def remove_mcp_server(self, name: str):
         """
-        Fetches tools from an MCP stdio server and registers them with the agent.
+        Removes the MCP server and its tools from the agent, with terminating the MCP server process.
-        :param params: Parameters for connecting to the MCP stdio server.
-        :param tools_to_add: Optional list of tool names to add. If None, all tools are added.
-        :returns: list of all tools returned by the server.
+        :param name: The unique name of the MCP server.
+                     If there's no MCP server matches the name, it raises RuntimeError.
+        """
+        if all([s.name != name for s in self._mcp_servers]):
+            raise RuntimeError(f"MCP server with name '{name}' does not exist")
+        # Remove the MCP server
+        mcp_server = next(filter(lambda s: s.name == name, self._mcp_servers))
+        self._mcp_servers.remove(mcp_server)
+        mcp_server.cleanup()
+        # Remove tools registered from the MCP server
+        self._tools = list(filter(lambda t: not t.desc.name.startswith(f"{mcp_server.name}-"), self._tools))
+    def get_tools(self):
+        """
+        Get the list of registered tools.
+        """
+        return self._tools
+    def clear_tools(self):
+        """
+        Clear the registered tools.
         """
-        from mcp.client.stdio import stdio_client
-        async def _inner():
-            async with stdio_client(params, errlog=subprocess.STDOUT) as streams:
-                async with mcp.ClientSession(*streams) as session:
-                    await session.initialize()
-                    resp = await session.list_tools()
-                    for tool in resp.tools:
-                        if tools_to_add is None or tool.name in tools_to_add:
-                            self.add_mcp_tool(params, tool)
-                    return resp.tools
-        tools = run_async(_inner())
-        return tools
+        self._tools.clear()