aixtools 0.1.11__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aixtools might be problematic. Click here for more details.

aixtools/mcp/client.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """MCP server utilities with caching and robust error handling."""
2
2
 
3
3
  import asyncio
4
+ import logging
4
5
  from contextlib import asynccontextmanager
5
6
  from datetime import timedelta
6
7
  from typing import Any, AsyncGenerator
@@ -9,12 +10,13 @@ import anyio
9
10
  import httpx
10
11
  from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
11
12
  from cachebox import TTLCache
13
+ from fastmcp.client.logging import LogMessage
12
14
  from mcp import types as mcp_types
13
15
  from mcp.client import streamable_http
14
16
  from mcp.shared.exceptions import McpError
15
17
  from mcp.shared.message import SessionMessage
16
18
  from pydantic_ai import RunContext, exceptions
17
- from pydantic_ai.mcp import MCPServerStreamableHTTP, ToolResult
19
+ from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP, ToolResult
18
20
  from pydantic_ai.toolsets.abstract import ToolsetTool
19
21
 
20
22
  from aixtools.context import SessionIdTuple
@@ -28,6 +30,49 @@ CACHE_KEY = "TOOL_LIST"
28
30
  logger = get_logger(__name__)
29
31
 
30
32
 
33
+ # Default log_handler for MCP clients
34
+ LOGGING_LEVEL_MAP = logging.getLevelNamesMapping()
35
+
36
+
37
+ async def default_mcp_log_handler(message: LogMessage):
38
+ """
39
+ Handles incoming logs from the MCP server and forwards them
40
+ to the standard Python logging system.
41
+ """
42
+ msg = message.data.get("msg")
43
+ extra = message.data.get("extra")
44
+
45
+ # Convert the MCP log level to a Python log level
46
+ level = LOGGING_LEVEL_MAP.get(message.level.upper(), logging.INFO)
47
+
48
+ # Log the message using the standard logging library
49
+ logger.log(level, msg, extra=extra)
50
+
51
+
52
+ def get_mcp_client(
53
+ url: str | None = None,
54
+ command: str | None = None,
55
+ args: list[str] = None,
56
+ log_handler: callable = default_mcp_log_handler, # type: ignore
57
+ ) -> MCPServerStreamableHTTP | MCPServerStdio:
58
+ """
59
+ Create an MCP client instance based on the provided URL or command.
60
+ By providing a log_handler, incoming logs from the MCP server can be shown, which improves debugging.
61
+
62
+ Args:
63
+ url (str | None): The URL of the MCP server.
64
+ command (str | None): The command to start a local MCP server (STDIO MCP).
65
+ args (list[str] | None): Additional arguments for the command (STDIO MCP).
66
+ """
67
+ if args is None:
68
+ args = []
69
+ if url:
70
+ return MCPServerStreamableHTTP(url="http://127.0.0.1:8089/mcp/", log_handler=log_handler)
71
+ if command:
72
+ return MCPServerStdio(command=command, args=args, log_handler=log_handler)
73
+ raise ValueError("Either url or command must be provided to create MCP client.")
74
+
75
+
31
76
  def get_mcp_headers(session_id_tuple: SessionIdTuple) -> dict[str, str] | None:
32
77
  """
33
78
  Generate headers for MCP server requests.
@@ -13,10 +13,6 @@ from .utils import (
13
13
  get_session_id_tuple,
14
14
  run_in_thread,
15
15
  )
16
- from .workspace_privacy import (
17
- is_session_private,
18
- set_session_private,
19
- )
20
16
 
21
17
  __all__ = [
22
18
  "get_workspace_path",
@@ -24,6 +20,4 @@ __all__ = [
24
20
  "container_to_host_path",
25
21
  "host_to_container_path",
26
22
  "run_in_thread",
27
- "is_session_private",
28
- "set_session_private",
29
23
  ]
aixtools/server/path.py CHANGED
@@ -2,47 +2,82 @@
2
2
  Workspace path handling for user sessions.
3
3
  """
4
4
 
5
- from pathlib import Path, PurePath, PurePosixPath
5
+ import posixpath
6
+ from pathlib import Path, PurePosixPath
6
7
 
7
8
  from fastmcp import Context
8
9
 
9
10
  from ..utils.config import DATA_DIR
10
11
  from .utils import get_session_id_tuple
11
12
 
12
- WORKSPACES_ROOT_DIR = DATA_DIR / "workspaces" # Path on the host where workspaces are stored
13
+ WORKSPACES_ROOT_DIR = (DATA_DIR / "workspaces").resolve() # Path on the host where workspaces are stored
13
14
  CONTAINER_WORKSPACE_PATH = PurePosixPath("/workspace") # Path inside the sandbox container where workspace is mounted
14
15
 
15
16
 
16
- def get_workspace_path(service_name: str = None, *, in_sandbox: bool = False, ctx: Context | tuple = None) -> PurePath:
17
+ def get_workspace_path(ctx: Context | tuple | None = None) -> Path:
17
18
  """
18
- Get the workspace path for a specific service (e.g. MCP server).
19
- If `service_name` is None, then the path to entire workspace folder (as mounted to a sandbox) is returned.
20
- If `in_sandbox` is True, it returns a path in sandbox, e.g.: `/workspace/mcp_repl`.
21
- If `in_sandbox` is False, it returns the path based on user and session IDs in the format:
22
- `<DATA_DIR>/workspaces/<user_id>/<session_id>/<service_name>`, where `DATA_DIR` should come from
23
- the environment variables, e.g.:
24
- `/data/workspaces/foo-user/bar-session/mcp_repl`.
25
- The `ctx` is used to get user and session IDs tuple. It can be passed directly or via HTTP headers from `Context`.
26
- If `ctx` is None, the current FastMCP request HTTP headers are used.
19
+ Get the workspace path (in the host) for a specific service (e.g. MCP server).
20
+ Returns the path based on user and session IDs in the format:
21
+
22
+ <DATA_DIR>/workspaces/<user_id>/<session_id>
23
+
24
+ where `DATA_DIR` should come from the environment variables
25
+ Example workspace path:
26
+
27
+ /data/workspaces/foo-user/bar-session
28
+
29
+ The `ctx` is used to get user and session IDs tuple. It can be passed directly
30
+ or via HTTP headers from `Context`. If `ctx` is None, the current FastMCP
31
+ request HTTP headers are used.
27
32
 
28
33
  Args:
29
34
  ctx: The FastMCP context, which contains the user session.
30
- service_name: The name of the service (e.g. "mcp_server").
31
- in_sandbox: If True, use a sandbox path; otherwise, use user/session-based path.
32
35
 
33
- Returns: The workspace path as a PurePath object.
36
+ Returns: The workspace path as a Path object.
37
+ """
38
+ user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
39
+ return WORKSPACES_ROOT_DIR / user_id / session_id
40
+
41
+
42
+ def get_workspace_path_sandbox() -> PurePosixPath:
43
+ """
44
+ Get the workspace path in the sandbox container.
45
+
46
+ We return PurePosixPath to ensure compatibility with Linux containers.
47
+
48
+ The paths inside the sandbox cannot be resolved (because they don't exist
49
+ on the host), so we use PurePosixPath instead of Path. Also Path could be
50
+ a WindowsPath on Windows hosts, which would be incorrect for Linux containers.
51
+
52
+ Returns: The workspace path as a PurePosixPath object.
34
53
  """
35
- if in_sandbox:
36
- path = CONTAINER_WORKSPACE_PATH
37
- else:
38
- user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
39
- path = WORKSPACES_ROOT_DIR / user_id / session_id
40
- if service_name:
41
- path = path / service_name
42
- return path
54
+ return CONTAINER_WORKSPACE_PATH
43
55
 
44
56
 
45
- def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple = None) -> Path | None:
57
+ def path_normalize(p: PurePosixPath) -> PurePosixPath:
58
+ """
59
+ Normalize a PurePosixPath (remove redundant separators and up-level references).
60
+ """
61
+ return PurePosixPath(posixpath.normpath(p.as_posix()))
62
+
63
+
64
+ def path_chroot(path: Path, old_root: Path, new_root: Path) -> Path:
65
+ """
66
+ Change the root of a given path from old_root to new_root.
67
+ If the path is not absolute (e.g. 'my_file.txt', './my_file.txt', 'my_dir/file.txt')
68
+ we treat it as relative to the 'new_root'
69
+ """
70
+ if not Path(path).is_absolute():
71
+ new_path = Path(new_root / path).resolve()
72
+ new_root = Path(new_root).resolve()
73
+ if not new_path.is_relative_to(new_root):
74
+ raise ValueError(f"Path must not escape the workspace root: '{path}'")
75
+ return Path(new_path)
76
+ # Otherwise, we treat it as absolute and change the root
77
+ return new_root / Path(path).relative_to(old_root)
78
+
79
+
80
+ def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple | None = None) -> Path | None:
46
81
  """
47
82
  Convert a path in a sandbox container to a host path
48
83
 
@@ -54,19 +89,41 @@ def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple = None)
54
89
  Returns:
55
90
  Path to the file on the host, or None if the conversion fails.
56
91
  """
57
- old_root = CONTAINER_WORKSPACE_PATH
92
+ # Try without service name (maybe the LLM forgot to put the SERVICE_NAME in the path)
93
+ old_root = get_workspace_path_sandbox()
58
94
  new_root = get_workspace_path(ctx=ctx)
59
95
  try:
60
- return new_root / PurePosixPath(path).relative_to(old_root)
96
+ # Relative paths are treated as relative to the new_root
97
+ if not PurePosixPath(path).is_absolute():
98
+ # Resolve paths to prevent escaping the workspace root
99
+ new_path = Path(new_root / path).resolve()
100
+ new_root = Path(new_root.resolve())
101
+ if not new_path.is_relative_to(new_root):
102
+ raise ValueError(f"Path must not escape the workspace root: '{path}'")
103
+ return new_path
104
+ # Otherwise, we treat it as absolute and change the root
105
+ return new_root / Path(path).relative_to(old_root)
61
106
  except ValueError as e:
62
107
  raise ValueError(f"Container path must be a subdir of '{old_root}', got '{path}' instead") from e
63
108
 
64
109
 
65
- def host_to_container_path(path: Path, *, ctx: Context | tuple = None) -> PurePosixPath:
66
- """Convert a host path to a path in a sandbox container."""
110
+ def host_to_container_path(path: Path, *, ctx: Context | tuple | None = None) -> PurePosixPath:
111
+ """
112
+ Convert a host path to a path in a sandbox container.
113
+ Paths inside the sandbox MUST be PurePosixPath (i.e. we use Linux containers).
114
+ """
67
115
  old_root = get_workspace_path(ctx=ctx)
68
- new_root = CONTAINER_WORKSPACE_PATH
116
+ new_root = get_workspace_path_sandbox()
69
117
  try:
118
+ # Relative paths are treated as relative to the new_root
119
+ if not Path(path).is_absolute():
120
+ # Normalize paths to prevent escaping the workspace root (we cannot resolve PurePosixPaths)
121
+ new_path = path_normalize(new_root / path)
122
+ new_root = path_normalize(new_root)
123
+ if not new_path.is_relative_to(new_root):
124
+ raise ValueError(f"Path must not escape the workspace root: '{path}'")
125
+ return new_path
126
+ # Otherwise, we treat it as absolute and change the root
70
127
  return new_root / Path(path).relative_to(old_root)
71
- except ValueError as exc:
72
- raise ValueError(f"Host path must be a subdir of '{old_root}', got '{path}' instead") from exc
128
+ except ValueError as e:
129
+ raise ValueError(f"Host path must be a subdir of either '{old_root}', got '{path}' instead") from e
@@ -112,7 +112,13 @@ class AixTestModel(Model):
112
112
  **kwargs, # pylint: disable=unused-argument
113
113
  ) -> AsyncIterator[StreamedResponse]:
114
114
  model_response = await self._request(messages, model_settings, model_request_parameters)
115
- yield TestStreamedResponse(_model_name=self.model_name, _structured_response=model_response, _messages=messages)
115
+ yield TestStreamedResponse(
116
+ _model_name=self.model_name,
117
+ _structured_response=model_response,
118
+ _messages=messages,
119
+ model_request_parameters=model_request_parameters,
120
+ _provider_name="",
121
+ )
116
122
 
117
123
  @property
118
124
  def model_name(self) -> str:
@@ -0,0 +1,79 @@
1
+ import argparse
2
+ import asyncio
3
+
4
+ from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP
5
+
6
+ from aixtools.agents import get_agent, run_agent
7
+ from aixtools.tools.doctor.tool_doctor import TOOL_DOCTOR_PROMPT
8
+ from aixtools.tools.doctor.tool_recommendation import ToolRecommendation
9
+
10
+
11
+ async def tool_doctor_mcp(
12
+ mcp_url: str = "http://127.0.0.1:8000/mcp",
13
+ mcp_server: MCPServerStreamableHTTP | MCPServerStdio | None = None,
14
+ verbose: bool = False,
15
+ debug: bool = False,
16
+ ) -> list[ToolRecommendation]:
17
+ """
18
+ Run the tool doctor agent to analyze tools from an MCP server and give recommendations.
19
+
20
+ Usage examples:
21
+ # Using an http MCP server
22
+ ret = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
23
+ print(ret)
24
+
25
+ # Using a stdio MCP server
26
+ server = MCPServerStdio(command='fastmcp', args=['run', 'my_mcp_server.py'])
27
+ ret = await tool_doctor_mcp(mcp_server=server)
28
+ print(ret)
29
+ """
30
+ if mcp_server is None:
31
+ mcp_server = MCPServerStreamableHTTP(url=mcp_url)
32
+ agent = get_agent(toolsets=[mcp_server], output_type=list[ToolRecommendation])
33
+ async with agent:
34
+ ret, nodes = await run_agent(agent, TOOL_DOCTOR_PROMPT, verbose=verbose, debug=debug)
35
+ return ret # type: ignore
36
+
37
+
38
+ def main_cli():
39
+ """Command line interface for tool doctor MCP."""
40
+ parser = argparse.ArgumentParser(description="Analyze tools from an MCP server and provide recommendations")
41
+
42
+ # MCP server connection options
43
+ server_group = parser.add_mutually_exclusive_group()
44
+ server_group.add_argument(
45
+ "--mcp-url",
46
+ default="http://127.0.0.1:8000/mcp",
47
+ help="URL of the HTTP MCP server (default: http://127.0.0.1:8000/mcp)",
48
+ )
49
+ server_group.add_argument("--stdio-command", help="Command to run STDIO MCP server (e.g., 'fastmcp')")
50
+
51
+ parser.add_argument(
52
+ "--stdio-args",
53
+ nargs="*",
54
+ default=[],
55
+ help="Arguments for STDIO MCP server command (e.g., 'run', 'my_server.py')",
56
+ )
57
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
58
+ parser.add_argument("--debug", action="store_true", help="Enable debug output")
59
+
60
+ args = parser.parse_args()
61
+
62
+ async def run():
63
+ mcp_server = None
64
+ if args.stdio_command:
65
+ mcp_server = MCPServerStdio(command=args.stdio_command, args=args.stdio_args)
66
+ recommendations = await tool_doctor_mcp(mcp_server=mcp_server, verbose=args.verbose, debug=args.debug)
67
+ else:
68
+ recommendations = await tool_doctor_mcp(mcp_url=args.mcp_url, verbose=args.verbose, debug=args.debug)
69
+
70
+ print("Tool Doctor Recommendations:")
71
+ print("=" * 50)
72
+ for i, rec in enumerate(recommendations, 1):
73
+ print(f"\n{i}. {rec}")
74
+
75
+ asyncio.run(run())
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main_cli()
@@ -1,3 +1,7 @@
1
+ """
2
+ Tool doctor: Analyze tools and give recommendations for improvement.
3
+ """
4
+
1
5
  from aixtools.agents import get_agent, run_agent
2
6
  from aixtools.tools.doctor.tool_recommendation import ToolRecommendation
3
7
 
@@ -1,5 +1,10 @@
1
1
  from pydantic import BaseModel
2
2
 
3
+ """
4
+ These are classes that represent recommendations for improving tools.
5
+ They are ued by ToolDoctor
6
+ """
7
+
3
8
 
4
9
  class ArgumentRecommendation(BaseModel):
5
10
  """A recommendation for an argument"""
aixtools/utils/config.py CHANGED
@@ -71,7 +71,6 @@ logging.warning("Using DATA_DIR='%s'", DATA_DIR)
71
71
  VDB_CHROMA_PATH = DATA_DB_DIR / "chroma.db"
72
72
  VDB_DEFAULT_SIMILARITY_THRESHOLD = 0.85
73
73
 
74
-
75
74
  # ---
76
75
  # Variables in '.env' file
77
76
  # Explicitly load specific variables
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aixtools
3
- Version: 0.1.11
3
+ Version: 0.2.1
4
4
  Summary: Tools for AI exploration and debugging
5
5
  Requires-Python: >=3.11.2
6
6
  Description-Content-Type: text/markdown
@@ -18,7 +18,8 @@ Requires-Dist: langchain-openai>=0.3.14
18
18
  Requires-Dist: mcp>=1.11.0
19
19
  Requires-Dist: mypy>=1.18.2
20
20
  Requires-Dist: pandas>=2.2.3
21
- Requires-Dist: pydantic-ai>=0.4.10
21
+ Requires-Dist: pydantic-evals>=0.4.10
22
+ Requires-Dist: pydantic-ai>=1.0.9
22
23
  Requires-Dist: pylint>=3.3.7
23
24
  Requires-Dist: rich>=14.0.0
24
25
  Requires-Dist: ruff>=0.11.6
@@ -45,20 +46,16 @@ A2A
45
46
  - Google SDK Integration for A2A - `aixtools/a2a/google_sdk/`
46
47
  - PydanticAI Adapter for Google SDK - `aixtools/a2a/google_sdk/pydantic_ai_adapter/`
47
48
 
48
- Databases
49
- - Database Integration - `aixtools/db/`
50
- - Vector Database Support - `aixtools/db/vector_db.py`
51
-
52
49
  Logging & Debugging
53
- - Log Viewing Application - `aixtools/log_view/`
50
+ - Log Viewing Web Application - `log_view`
54
51
  - Object Logging System - `aixtools/logging/`
55
52
  - Model Patch Logging - `aixtools/logging/model_patch_logging.py`
56
53
  - Log Filtering System - `aixtools/logfilters/`
57
54
  - FastMCP Logging - `aixtools/mcp/fast_mcp_log.py`
58
- - Command Line Interface for Log Viewing - Entry point: `log_view`
59
55
  - MCP (Model Context Protocol) Support - `aixtools/logging/mcp_log_models.py`, `aixtools/logging/mcp_logger.py`
60
56
 
61
- Testing & Tools
57
+ Testing Tools & Evals
58
+ - Evaluations - `aixtools/evals/` - Entry point: `evals`
62
59
  - Testing Utilities - `aixtools/testing/`
63
60
  - Mock Tool System - `aixtools/testing/mock_tool.py`
64
61
  - Model Patch Caching - `aixtools/testing/model_patch_cache.py`
@@ -66,6 +63,10 @@ Testing & Tools
66
63
  - Tool Recommendation Engine - `aixtools/tools/doctor/tool_recommendation.py`
67
64
  - FaultyMCP - `aixtools/mcp/faulty_mcp.py`
68
65
 
66
+ Databases
67
+ - Database Integration - `aixtools/db/`
68
+ - Vector Database Support - `aixtools/db/vector_db.py`
69
+
69
70
  Chainlit & HTTP Server
70
71
  - Chainlit Integration - `aixtools/app.py`, `aixtools/chainlit.md`
71
72
  - Chainlit Utilities - `aixtools/utils/chainlit/`
@@ -81,28 +82,11 @@ Programming utils
81
82
 
82
83
  ## Installation
83
84
 
84
- ### From GitHub
85
-
86
85
  ```bash
87
86
  uv add aixtools
88
87
  ```
89
88
 
90
- ### Development Setup
91
-
92
- ```bash
93
- # Create a new project
94
- uv init MyNewProject
95
- cd MyNewProject
96
-
97
- # Add virtual environment and activate it
98
- uv venv .venv
99
- source .venv/bin/activate
100
-
101
- # Add this package
102
- uv add aixtools
103
- ```
104
-
105
- ### Updating
89
+ **Updating**
106
90
 
107
91
  ```bash
108
92
  uv add --upgrade aixtools
@@ -114,6 +98,7 @@ AIXtools requires environment variables for model providers.
114
98
 
115
99
  **IMPORTANT:** Create a `.env` file based on [`.env_template`](./.env_template):
116
100
 
101
+ Here is an example configuration:
117
102
  ```bash
118
103
  # Model family (azure, openai, or ollama)
119
104
  MODEL_FAMILY=azure
@@ -161,6 +146,33 @@ agent = get_agent(system_prompt="You are a helpful assistant.")
161
146
  result, nodes = await run_agent(agent, "Tell me about AI")
162
147
  ```
163
148
 
149
+ ### Node Debugging and Visualization
150
+
151
+ The [`print_nodes`](aixtools/agents/print_nodes.py) module provides a clean, indented output for easy reading of the node from agent execution.
152
+
153
+ ```python
154
+ from aixtools.agents.print_nodes import print_nodes, print_node
155
+ from aixtools.agents.agent import get_agent, run_agent
156
+
157
+ agent = get_agent(system_prompt="You are a helpful assistant.")
158
+ result, nodes = await run_agent(agent, "Explain quantum computing")
159
+ # Print all execution nodes for debugging
160
+ print_nodes(nodes)
161
+ ```
162
+
163
+ **Features:**
164
+ - **Node Type Detection**: Automatically handles different node types (`UserPromptNode`, `CallToolsNode`, `ModelRequestNode`, `End`)
165
+ - **Formatted Output**: Provides clean, indented output for easy reading
166
+ - **Tool Call Visualization**: Shows tool names and arguments for tool calls
167
+ - **Text Content Display**: Formats text parts with proper indentation
168
+ - **Model Request Summary**: Shows character count for model requests to avoid verbose output
169
+
170
+ **Node Types Supported:**
171
+ - `UserPromptNode` - Displays user prompts with indentation
172
+ - `CallToolsNode` - Shows tool calls with names and arguments
173
+ - `ModelRequestNode` - Summarizes model requests with character count
174
+ - `End` - Marks the end of execution (output suppressed by default)
175
+
164
176
  ### Agent Batch Processing
165
177
 
166
178
  Process multiple agent queries simultaneously with built-in concurrency control and result aggregation.
@@ -299,9 +311,39 @@ with ObjectLogger() as logger:
299
311
  logger.log(agent_response)
300
312
  ```
301
313
 
302
- ### MCP Logger
314
+ ### MCP logging
315
+
316
+ AIXtools provides MCP support for both client and server implementations with easier logging for debugging pourposes.
317
+
318
+ **Example:**
319
+
320
+ Let's assume we have an MCP server that runs an agent tool.
321
+
322
+ Note that the `ctx: Context` parameter is passed to `run_agent`, this will enable logging to the MCP client.
323
+
324
+ ```python
325
+ @mcp.tool
326
+ async def my_tool_with_agent(query: str, ctx: Context) -> str:
327
+ """ A tool that uses an gents to process the query """
328
+ agent = get_agent()
329
+ async with get_qb_agent() as agent:
330
+ ret, nodes = await run_agent(agent=agent, prompt=query, ctx=ctx) # Enable MCP logging
331
+ return str(ret)
332
+ ```
333
+
334
+ On the client side, you can create an agent connected to the MCP server, the "nodes" from the MCP server will show on the STDOUT so you can see what's going on the MCP server's agent loop
335
+
336
+ ```python
337
+ mcp = get_mcp_client("http://localhost:8000") # Get an MCP client with a default log handler that prints to STDOUT
338
+ agent = get_agent(toolsets=[mcp])
339
+ async with agent:
340
+ # The messages from the MCP server will be printed to the STDOUT
341
+ ret, nodes = await run_agent(agent, prompt="...")
342
+ ```
343
+
344
+ #### MCP Server Logging
303
345
 
304
- This is an MCP server that can log MCP requests and responses.
346
+ Create MCP servers with built-in logging capabilities.
305
347
 
306
348
  ```python
307
349
  from aixtools.mcp.fast_mcp_log import FastMcpLog
@@ -371,6 +413,77 @@ By default, the "FaultyMCP" includes several tools you can use in your tests:
371
413
  - `freeze_server(seconds)` - Simulates server freeze
372
414
  - `throw_404_exception()` - Throws HTTP 404 error
373
415
 
416
+ ### Evals
417
+
418
+ Run comprehensive Agent/LLM evaluations using the built-in evaluation discovery based on Pydantic-AI framework.
419
+
420
+ ```bash
421
+ # Run all evaluations
422
+ evals
423
+
424
+ # Run evaluations with filtering
425
+ evals --filter "specific_test"
426
+
427
+ # Run with verbose output and detailed reporting
428
+ evals --verbose --include-input --include-output --include-reasons
429
+
430
+ # Specify custom evaluations directory
431
+ evals --evals-dir /path/to/evals
432
+
433
+ # Set minimum assertions threshold
434
+ evals --min-assertions 0.8
435
+ ```
436
+
437
+ **Command Line Options:**
438
+ - `--evals-dir` - Directory containing eval_*.py files (default: evals)
439
+ - `--filter` - Filter to run only matching evaluations
440
+ - `--include-input` - Include input in report output
441
+ - `--include-output` - Include output in report output
442
+ - `--include-evaluator-failures` - Include evaluator failures in report
443
+ - `--include-reasons` - Include reasons in report output
444
+ - `--min-assertions` - Minimum assertions average required for success (default: 1.0)
445
+ - `--verbose` - Print detailed information about discovery and processing
446
+
447
+ The evaluation system discovers and runs all Dataset objects from eval_*.py files in the specified directory, similar to test runners but specifically designed for LLM evaluations using pydantic_evals.
448
+
449
+ **Discovery Mechanism:**
450
+
451
+ The evaluation framework uses an automatic discovery system that:
452
+
453
+ 1. **File Discovery**: Scans the specified directory for files matching the pattern `eval_*.py`
454
+ 2. **Dataset Discovery**: Within each file, looks for variables named `dataset_*` that are instances of `pydantic_evals.Dataset`
455
+ 3. **Target Function Discovery**: Automatically finds the first async function in each module that doesn't start with an underscore (`_`) to use as the evaluation target
456
+ 4. **Filtering**: Supports filtering by module name, file name, dataset name, or fully qualified name
457
+
458
+ **Example Evaluation File Structure:**
459
+ ```python
460
+ # eval_math_operations.py
461
+ from pydantic_evals import Dataset, Case
462
+
463
+ # This dataset will be discovered automatically
464
+ dataset_addition = Dataset(
465
+ name="Addition Tests",
466
+ cases=[
467
+ Case(input="What is 2 + 2?", expected="4"),
468
+ Case(input="What is 10 + 5?", expected="15"),
469
+ ],
470
+ evaluators=[...]
471
+ )
472
+
473
+ # This function will be used as the evaluation target
474
+ async def evaluate_math_agent(input_text: str) -> str:
475
+ # Your agent evaluation logic here
476
+ agent = get_agent(system_prompt="You are a math assistant.")
477
+ result, _ = await run_agent(agent, input_text)
478
+ return result
479
+ ```
480
+
481
+ The discovery system will:
482
+ - Find `eval_math_operations.py` in the evals directory
483
+ - Discover `dataset_addition` as an evaluation dataset
484
+ - Use `evaluate_math_agent` as the target function for evaluation
485
+ - Run each case through the target function and evaluate results
486
+
374
487
  ## Testing & Tools
375
488
 
376
489
  AIXtools provides comprehensive testing utilities and diagnostic tools for AI agent development and debugging.
@@ -397,7 +510,49 @@ test_model = AixTestModel()
397
510
 
398
511
  ### Tool Doctor System
399
512
 
400
- Automated tool analysis and recommendation system for optimizing agent tool usage.
513
+ Automated tool analysis and recommendation system for optimizing agent tool usage and analyzing MCP servers.
514
+
515
+ #### MCP Tool Doctor
516
+
517
+ Analyze tools from MCP (Model Context Protocol) servers and receive AI-powered recommendations for improvement.
518
+
519
+ ```python
520
+ from aixtools.tools.doctor.mcp_tool_doctor import tool_doctor_mcp
521
+ from pydantic_ai.mcp import MCPServerStreamableHTTP, MCPServerStdio
522
+
523
+ # Analyze HTTP MCP server
524
+ recommendations = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
525
+ for rec in recommendations:
526
+ print(rec)
527
+
528
+ # Analyze STDIO MCP server
529
+ server = MCPServerStdio(command='fastmcp', args=['run', 'my_server.py'])
530
+ recommendations = await tool_doctor_mcp(mcp_server=server, verbose=True)
531
+ ```
532
+
533
+ **Command Line Usage:**
534
+
535
+ ```bash
536
+ # Analyze HTTP MCP server (default)
537
+ tool_doctor_mcp
538
+
539
+ # Analyze specific HTTP MCP server
540
+ tool_doctor_mcp --mcp-url http://localhost:9000/mcp --verbose
541
+
542
+ # Analyze STDIO MCP server
543
+ tool_doctor_mcp --stdio-command fastmcp --stdio-args run my_server.py --debug
544
+
545
+ # Available options:
546
+ # --mcp-url URL URL of HTTP MCP server (default: http://127.0.0.1:8000/mcp)
547
+ # --stdio-command CMD Command to run STDIO MCP server
548
+ # --stdio-args ARGS Arguments for STDIO MCP server command
549
+ # --verbose Enable verbose output
550
+ # --debug Enable debug output
551
+ ```
552
+
553
+ #### Traditional Tool Doctor
554
+
555
+ Analyze tool usage patterns from agent logs and get optimization recommendations.
401
556
 
402
557
  ```python
403
558
  from aixtools.tools.doctor.tool_doctor import ToolDoctor