PyPI - aixtools - Versions diffs - 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

aixtools 0.1.11py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aixtools might be problematic. Click here for more details.

Files changed (25) hide show

aixtools/_version.py +2 -2
aixtools/agents/agent.py +26 -7
aixtools/agents/print_nodes.py +54 -0
aixtools/agents/prompt.py +2 -2
aixtools/compliance/private_data.py +1 -1
aixtools/evals/__init__.py +0 -0
aixtools/evals/discovery.py +174 -0
aixtools/evals/evals.py +74 -0
aixtools/evals/run_evals.py +110 -0
aixtools/logging/log_objects.py +24 -23
aixtools/mcp/client.py +46 -1
aixtools/server/__init__.py +0 -6
aixtools/server/path.py +88 -31
aixtools/testing/aix_test_model.py +7 -1
aixtools/tools/doctor/mcp_tool_doctor.py +79 -0
aixtools/tools/doctor/tool_doctor.py +4 -0
aixtools/tools/doctor/tool_recommendation.py +5 -0
aixtools/utils/config.py +0 -1
{aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/METADATA +185 -30
{aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/RECORD +23 -18
aixtools-0.2.0.dist-info/entry_points.txt +4 -0
aixtools/server/workspace_privacy.py +0 -65
aixtools-0.1.11.dist-info/entry_points.txt +0 -2
{aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/WHEEL +0 -0
{aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/top_level.txt +0 -0

aixtools/mcp/client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """MCP server utilities with caching and robust error handling."""
 import asyncio
+import logging
 from contextlib import asynccontextmanager
 from datetime import timedelta
 from typing import Any, AsyncGenerator
@@ -9,12 +10,13 @@ import anyio
 import httpx
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 from cachebox import TTLCache
+from fastmcp.client.logging import LogMessage
 from mcp import types as mcp_types
 from mcp.client import streamable_http
 from mcp.shared.exceptions import McpError
 from mcp.shared.message import SessionMessage
 from pydantic_ai import RunContext, exceptions
-from pydantic_ai.mcp import MCPServerStreamableHTTP, ToolResult
+from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP, ToolResult
 from pydantic_ai.toolsets.abstract import ToolsetTool
 from aixtools.context import SessionIdTuple
@@ -28,6 +30,49 @@ CACHE_KEY = "TOOL_LIST"
 logger = get_logger(__name__)
+# Default log_handler for MCP clients
+LOGGING_LEVEL_MAP = logging.getLevelNamesMapping()
+async def default_mcp_log_handler(message: LogMessage):
+    """
+    Handles incoming logs from the MCP server and forwards them
+    to the standard Python logging system.
+    """
+    msg = message.data.get("msg")
+    extra = message.data.get("extra")
+    # Convert the MCP log level to a Python log level
+    level = LOGGING_LEVEL_MAP.get(message.level.upper(), logging.INFO)
+    # Log the message using the standard logging library
+    logger.log(level, msg, extra=extra)
+def get_mcp_client(
+    url: str | None = None,
+    command: str | None = None,
+    args: list[str] = None,
+    log_handler: callable = default_mcp_log_handler,  # type: ignore
+) -> MCPServerStreamableHTTP | MCPServerStdio:
+    """
+    Create an MCP client instance based on the provided URL or command.
+    By providing a log_handler, incoming logs from the MCP server can be shown, which improves debugging.
+    Args:
+        url (str | None): The URL of the MCP server.
+        command (str | None): The command to start a local MCP server (STDIO MCP).
+        args (list[str] | None): Additional arguments for the command (STDIO MCP).
+    """
+    if args is None:
+        args = []
+    if url:
+        return MCPServerStreamableHTTP(url="http://127.0.0.1:8089/mcp/", log_handler=log_handler)
+    if command:
+        return MCPServerStdio(command=command, args=args, log_handler=log_handler)
+    raise ValueError("Either url or command must be provided to create MCP client.")
 def get_mcp_headers(session_id_tuple: SessionIdTuple) -> dict[str, str] | None:
     """
     Generate headers for MCP server requests.

aixtools/server/__init__.py CHANGED Viewed

@@ -13,10 +13,6 @@ from .utils import (
     get_session_id_tuple,
     run_in_thread,
 )
-from .workspace_privacy import (
-    is_session_private,
-    set_session_private,
-)
 __all__ = [
     "get_workspace_path",
@@ -24,6 +20,4 @@ __all__ = [
     "container_to_host_path",
     "host_to_container_path",
     "run_in_thread",
-    "is_session_private",
-    "set_session_private",
 ]

aixtools/server/path.py CHANGED Viewed

@@ -2,47 +2,82 @@
 Workspace path handling for user sessions.
 """
-from pathlib import Path, PurePath, PurePosixPath
+import posixpath
+from pathlib import Path, PurePosixPath
 from fastmcp import Context
 from ..utils.config import DATA_DIR
 from .utils import get_session_id_tuple
-WORKSPACES_ROOT_DIR = DATA_DIR / "workspaces"  # Path on the host where workspaces are stored
+WORKSPACES_ROOT_DIR = (DATA_DIR / "workspaces").resolve()  # Path on the host where workspaces are stored
 CONTAINER_WORKSPACE_PATH = PurePosixPath("/workspace")  # Path inside the sandbox container where workspace is mounted
-def get_workspace_path(service_name: str = None, *, in_sandbox: bool = False, ctx: Context | tuple = None) -> PurePath:
+def get_workspace_path(ctx: Context | tuple | None = None) -> Path:
     """
-    Get the workspace path for a specific service (e.g. MCP server).
-    If `service_name` is None, then the path to entire workspace folder (as mounted to a sandbox) is returned.
-    If `in_sandbox` is True, it returns a path in sandbox, e.g.: `/workspace/mcp_repl`.
-    If `in_sandbox` is False, it returns the path based on user and session IDs in the format:
-    `<DATA_DIR>/workspaces/<user_id>/<session_id>/<service_name>`, where `DATA_DIR` should come from
-    the environment variables, e.g.:
-    `/data/workspaces/foo-user/bar-session/mcp_repl`.
-    The `ctx` is used to get user and session IDs tuple. It can be passed directly or via HTTP headers from `Context`.
-    If `ctx` is None, the current FastMCP request HTTP headers are used.
+    Get the workspace path (in the host) for a specific service (e.g. MCP server).
+    Returns the path based on user and session IDs in the format:
+        <DATA_DIR>/workspaces/<user_id>/<session_id>
+    where `DATA_DIR` should come from the environment variables
+    Example workspace path:
+        /data/workspaces/foo-user/bar-session
+    The `ctx` is used to get user and session IDs tuple. It can be passed directly
+    or via HTTP headers from `Context`. If `ctx` is None, the current FastMCP
+    request HTTP headers are used.
     Args:
         ctx: The FastMCP context, which contains the user session.
-        service_name: The name of the service (e.g. "mcp_server").
-        in_sandbox: If True, use a sandbox path; otherwise, use user/session-based path.
-    Returns: The workspace path as a PurePath object.
+    Returns: The workspace path as a Path object.
+    """
+    user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
+    return WORKSPACES_ROOT_DIR / user_id / session_id
+def get_workspace_path_sandbox() -> PurePosixPath:
+    """
+    Get the workspace path in the sandbox container.
+    We return PurePosixPath to ensure compatibility with Linux containers.
+    The paths inside the sandbox cannot be resolved (because they don't exist
+    on the host), so we use PurePosixPath instead of Path. Also Path could be
+    a WindowsPath on Windows hosts, which would be incorrect for Linux containers.
+    Returns: The workspace path as a PurePosixPath object.
     """
-    if in_sandbox:
-        path = CONTAINER_WORKSPACE_PATH
-    else:
-        user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
-        path = WORKSPACES_ROOT_DIR / user_id / session_id
-    if service_name:
-        path = path / service_name
-    return path
+    return CONTAINER_WORKSPACE_PATH
-def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple = None) -> Path | None:
+def path_normalize(p: PurePosixPath) -> PurePosixPath:
+    """
+    Normalize a PurePosixPath (remove redundant separators and up-level references).
+    """
+    return PurePosixPath(posixpath.normpath(p.as_posix()))
+def path_chroot(path: Path, old_root: Path, new_root: Path) -> Path:
+    """
+    Change the root of a given path from old_root to new_root.
+    If the path is not absolute (e.g. 'my_file.txt', './my_file.txt', 'my_dir/file.txt')
+    we treat it as relative to the 'new_root'
+    """
+    if not Path(path).is_absolute():
+        new_path = Path(new_root / path).resolve()
+        new_root = Path(new_root).resolve()
+        if not new_path.is_relative_to(new_root):
+            raise ValueError(f"Path must not escape the workspace root: '{path}'")
+        return Path(new_path)
+    # Otherwise, we treat it as absolute and change the root
+    return new_root / Path(path).relative_to(old_root)
+def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple | None = None) -> Path | None:
     """
     Convert a path in a sandbox container to a host path
@@ -54,19 +89,41 @@ def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple = None)
     Returns:
         Path to the file on the host, or None if the conversion fails.
     """
-    old_root = CONTAINER_WORKSPACE_PATH
+    # Try without service name (maybe the LLM forgot to put the SERVICE_NAME in the path)
+    old_root = get_workspace_path_sandbox()
     new_root = get_workspace_path(ctx=ctx)
     try:
-        return new_root / PurePosixPath(path).relative_to(old_root)
+        # Relative paths are treated as relative to the new_root
+        if not PurePosixPath(path).is_absolute():
+            # Resolve paths to prevent escaping the workspace root
+            new_path = Path(new_root / path).resolve()
+            new_root = Path(new_root.resolve())
+            if not new_path.is_relative_to(new_root):
+                raise ValueError(f"Path must not escape the workspace root: '{path}'")
+            return new_path
+        # Otherwise, we treat it as absolute and change the root
+        return new_root / Path(path).relative_to(old_root)
     except ValueError as e:
         raise ValueError(f"Container path must be a subdir of '{old_root}', got '{path}' instead") from e
-def host_to_container_path(path: Path, *, ctx: Context | tuple = None) -> PurePosixPath:
-    """Convert a host path to a path in a sandbox container."""
+def host_to_container_path(path: Path, *, ctx: Context | tuple | None = None) -> PurePosixPath:
+    """
+    Convert a host path to a path in a sandbox container.
+    Paths inside the sandbox MUST be PurePosixPath (i.e. we use Linux containers).
+    """
     old_root = get_workspace_path(ctx=ctx)
-    new_root = CONTAINER_WORKSPACE_PATH
+    new_root = get_workspace_path_sandbox()
     try:
+        # Relative paths are treated as relative to the new_root
+        if not Path(path).is_absolute():
+            # Normalize paths to prevent escaping the workspace root (we cannot resolve PurePosixPaths)
+            new_path = path_normalize(new_root / path)
+            new_root = path_normalize(new_root)
+            if not new_path.is_relative_to(new_root):
+                raise ValueError(f"Path must not escape the workspace root: '{path}'")
+            return new_path
+        # Otherwise, we treat it as absolute and change the root
         return new_root / Path(path).relative_to(old_root)
-    except ValueError as exc:
-        raise ValueError(f"Host path must be a subdir of '{old_root}', got '{path}' instead") from exc
+    except ValueError as e:
+        raise ValueError(f"Host path must be a subdir of either '{old_root}', got '{path}' instead") from e

aixtools/testing/aix_test_model.py CHANGED Viewed

@@ -112,7 +112,13 @@ class AixTestModel(Model):
         **kwargs,  # pylint: disable=unused-argument
     ) -> AsyncIterator[StreamedResponse]:
         model_response = await self._request(messages, model_settings, model_request_parameters)
-        yield TestStreamedResponse(_model_name=self.model_name, _structured_response=model_response, _messages=messages)
+        yield TestStreamedResponse(
+            _model_name=self.model_name,
+            _structured_response=model_response,
+            _messages=messages,
+            model_request_parameters=model_request_parameters,
+            _provider_name="",
+        )
     @property
     def model_name(self) -> str:

aixtools/tools/doctor/mcp_tool_doctor.py ADDED Viewed

@@ -0,0 +1,79 @@
+import argparse
+import asyncio
+from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP
+from aixtools.agents import get_agent, run_agent
+from aixtools.tools.doctor.tool_doctor import TOOL_DOCTOR_PROMPT
+from aixtools.tools.doctor.tool_recommendation import ToolRecommendation
+async def tool_doctor_mcp(
+    mcp_url: str = "http://127.0.0.1:8000/mcp",
+    mcp_server: MCPServerStreamableHTTP | MCPServerStdio | None = None,
+    verbose: bool = False,
+    debug: bool = False,
+) -> list[ToolRecommendation]:
+    """
+    Run the tool doctor agent to analyze tools from an MCP server and give recommendations.
+    Usage examples:
+        # Using an http MCP server
+        ret = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
+        print(ret)
+        # Using a stdio MCP server
+        server = MCPServerStdio(command='fastmcp', args=['run', 'my_mcp_server.py'])
+        ret = await tool_doctor_mcp(mcp_server=server)
+        print(ret)
+    """
+    if mcp_server is None:
+        mcp_server = MCPServerStreamableHTTP(url=mcp_url)
+    agent = get_agent(toolsets=[mcp_server], output_type=list[ToolRecommendation])
+    async with agent:
+        ret, nodes = await run_agent(agent, TOOL_DOCTOR_PROMPT, verbose=verbose, debug=debug)
+    return ret  # type: ignore
+def main_cli():
+    """Command line interface for tool doctor MCP."""
+    parser = argparse.ArgumentParser(description="Analyze tools from an MCP server and provide recommendations")
+    # MCP server connection options
+    server_group = parser.add_mutually_exclusive_group()
+    server_group.add_argument(
+        "--mcp-url",
+        default="http://127.0.0.1:8000/mcp",
+        help="URL of the HTTP MCP server (default: http://127.0.0.1:8000/mcp)",
+    )
+    server_group.add_argument("--stdio-command", help="Command to run STDIO MCP server (e.g., 'fastmcp')")
+    parser.add_argument(
+        "--stdio-args",
+        nargs="*",
+        default=[],
+        help="Arguments for STDIO MCP server command (e.g., 'run', 'my_server.py')",
+    )
+    parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
+    parser.add_argument("--debug", action="store_true", help="Enable debug output")
+    args = parser.parse_args()
+    async def run():
+        mcp_server = None
+        if args.stdio_command:
+            mcp_server = MCPServerStdio(command=args.stdio_command, args=args.stdio_args)
+            recommendations = await tool_doctor_mcp(mcp_server=mcp_server, verbose=args.verbose, debug=args.debug)
+        else:
+            recommendations = await tool_doctor_mcp(mcp_url=args.mcp_url, verbose=args.verbose, debug=args.debug)
+        print("Tool Doctor Recommendations:")
+        print("=" * 50)
+        for i, rec in enumerate(recommendations, 1):
+            print(f"\n{i}. {rec}")
+    asyncio.run(run())
+if __name__ == "__main__":
+    main_cli()

aixtools/tools/doctor/tool_doctor.py CHANGED Viewed

@@ -1,3 +1,7 @@
+"""
+Tool doctor: Analyze tools and give recommendations for improvement.
+"""
 from aixtools.agents import get_agent, run_agent
 from aixtools.tools.doctor.tool_recommendation import ToolRecommendation

aixtools/tools/doctor/tool_recommendation.py CHANGED Viewed

@@ -1,5 +1,10 @@
 from pydantic import BaseModel
+"""
+These are classes that represent recommendations for improving tools.
+They are ued by ToolDoctor
+"""
 class ArgumentRecommendation(BaseModel):
     """A recommendation for an argument"""

aixtools/utils/config.py CHANGED Viewed

@@ -71,7 +71,6 @@ logging.warning("Using         DATA_DIR='%s'", DATA_DIR)
 VDB_CHROMA_PATH = DATA_DB_DIR / "chroma.db"
 VDB_DEFAULT_SIMILARITY_THRESHOLD = 0.85
 # ---
 # Variables in '.env' file
 # Explicitly load specific variables

{aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aixtools
-Version: 0.1.11
+Version: 0.2.0
 Summary: Tools for AI exploration and debugging
 Requires-Python: >=3.11.2
 Description-Content-Type: text/markdown
@@ -18,7 +18,8 @@ Requires-Dist: langchain-openai>=0.3.14
 Requires-Dist: mcp>=1.11.0
 Requires-Dist: mypy>=1.18.2
 Requires-Dist: pandas>=2.2.3
-Requires-Dist: pydantic-ai>=0.4.10
+Requires-Dist: pydantic-evals>=0.4.10
+Requires-Dist: pydantic-ai>=1.0.9
 Requires-Dist: pylint>=3.3.7
 Requires-Dist: rich>=14.0.0
 Requires-Dist: ruff>=0.11.6
@@ -45,20 +46,16 @@ A2A
 - Google SDK Integration for A2A - `aixtools/a2a/google_sdk/`
 - PydanticAI Adapter for Google SDK - `aixtools/a2a/google_sdk/pydantic_ai_adapter/`
-Databases
-- Database Integration - `aixtools/db/`
-- Vector Database Support - `aixtools/db/vector_db.py`
 Logging & Debugging
-- Log Viewing Application - `aixtools/log_view/`
+- Log Viewing Web Application - `log_view`
 - Object Logging System - `aixtools/logging/`
 - Model Patch Logging - `aixtools/logging/model_patch_logging.py`
 - Log Filtering System - `aixtools/logfilters/`
 - FastMCP Logging - `aixtools/mcp/fast_mcp_log.py`
-- Command Line Interface for Log Viewing - Entry point: `log_view`
 - MCP (Model Context Protocol) Support - `aixtools/logging/mcp_log_models.py`, `aixtools/logging/mcp_logger.py`
-Testing & Tools
+Testing Tools & Evals
+- Evaluations - `aixtools/evals/` - Entry point: `evals`
 - Testing Utilities - `aixtools/testing/`
 - Mock Tool System - `aixtools/testing/mock_tool.py`
 - Model Patch Caching - `aixtools/testing/model_patch_cache.py`
@@ -66,6 +63,10 @@ Testing & Tools
 - Tool Recommendation Engine - `aixtools/tools/doctor/tool_recommendation.py`
 - FaultyMCP - `aixtools/mcp/faulty_mcp.py`
+Databases
+- Database Integration - `aixtools/db/`
+- Vector Database Support - `aixtools/db/vector_db.py`
 Chainlit & HTTP Server
 - Chainlit Integration - `aixtools/app.py`, `aixtools/chainlit.md`
 - Chainlit Utilities - `aixtools/utils/chainlit/`
@@ -81,28 +82,11 @@ Programming utils
 ## Installation
-### From GitHub
 ```bash
 uv add aixtools
 ```
-### Development Setup
-```bash
-# Create a new project
-uv init MyNewProject
-cd MyNewProject
-# Add virtual environment and activate it
-uv venv .venv
-source .venv/bin/activate
-# Add this package
-uv add aixtools
-```
-### Updating
+**Updating**
 ```bash
 uv add --upgrade aixtools
@@ -114,6 +98,7 @@ AIXtools requires environment variables for model providers.
 **IMPORTANT:** Create a `.env` file based on [`.env_template`](./.env_template):
+Here is an example configuration:
 ```bash
 # Model family (azure, openai, or ollama)
 MODEL_FAMILY=azure
@@ -161,6 +146,33 @@ agent = get_agent(system_prompt="You are a helpful assistant.")
 result, nodes = await run_agent(agent, "Tell me about AI")
 ```
+### Node Debugging and Visualization
+The [`print_nodes`](aixtools/agents/print_nodes.py) module provides a clean, indented output for easy reading of the node from agent execution.
+```python
+from aixtools.agents.print_nodes import print_nodes, print_node
+from aixtools.agents.agent import get_agent, run_agent
+agent = get_agent(system_prompt="You are a helpful assistant.")
+result, nodes = await run_agent(agent, "Explain quantum computing")
+# Print all execution nodes for debugging
+print_nodes(nodes)
+```
+**Features:**
+- **Node Type Detection**: Automatically handles different node types (`UserPromptNode`, `CallToolsNode`, `ModelRequestNode`, `End`)
+- **Formatted Output**: Provides clean, indented output for easy reading
+- **Tool Call Visualization**: Shows tool names and arguments for tool calls
+- **Text Content Display**: Formats text parts with proper indentation
+- **Model Request Summary**: Shows character count for model requests to avoid verbose output
+**Node Types Supported:**
+- `UserPromptNode` - Displays user prompts with indentation
+- `CallToolsNode` - Shows tool calls with names and arguments
+- `ModelRequestNode` - Summarizes model requests with character count
+- `End` - Marks the end of execution (output suppressed by default)
 ### Agent Batch Processing
 Process multiple agent queries simultaneously with built-in concurrency control and result aggregation.
@@ -299,9 +311,39 @@ with ObjectLogger() as logger:
     logger.log(agent_response)
 ```
-### MCP Logger
+### MCP logging
+AIXtools provides  MCP support for both client and server implementations with easier logging for debugging pourposes.
+**Example:**
+Let's assume we have an MCP server that runs an agent tool.
+Note that the `ctx: Context` parameter is passed to `run_agent`, this will enable logging to the MCP client.
+```python
+@mcp.tool
+async def my_tool_with_agent(query: str, ctx: Context) -> str:
+    """ A tool that uses an gents to process the query """
+    agent = get_agent()
+    async with get_qb_agent() as agent:
+        ret, nodes = await run_agent(agent=agent, prompt=query, ctx=ctx)    # Enable MCP logging
+        return str(ret)
+```
+On the client side, you can create an agent connected to the MCP server, the "nodes" from the MCP server will show on the STDOUT so you can see what's going on the MCP server's agent loop
+```python
+mcp = get_mcp_client("http://localhost:8000")   # Get an MCP client with a default log handler that prints to STDOUT
+agent = get_agent(toolsets=[mcp])
+async with agent:
+    # The messages from the MCP server will be printed to the STDOUT
+    ret, nodes = await run_agent(agent, prompt="...")
+```
+#### MCP Server Logging
-This is an MCP server that can log MCP requests and responses.
+Create MCP servers with built-in logging capabilities.
 ```python
 from aixtools.mcp.fast_mcp_log import FastMcpLog
@@ -371,6 +413,77 @@ By default, the "FaultyMCP" includes several tools you can use in your tests:
 - `freeze_server(seconds)` - Simulates server freeze
 - `throw_404_exception()` - Throws HTTP 404 error
+### Evals
+Run comprehensive Agent/LLM evaluations using the built-in evaluation discovery based on Pydantic-AI framework.
+```bash
+# Run all evaluations
+evals
+# Run evaluations with filtering
+evals --filter "specific_test"
+# Run with verbose output and detailed reporting
+evals --verbose --include-input --include-output --include-reasons
+# Specify custom evaluations directory
+evals --evals-dir /path/to/evals
+# Set minimum assertions threshold
+evals --min-assertions 0.8
+```
+**Command Line Options:**
+- `--evals-dir` - Directory containing eval_*.py files (default: evals)
+- `--filter` - Filter to run only matching evaluations
+- `--include-input` - Include input in report output
+- `--include-output` - Include output in report output
+- `--include-evaluator-failures` - Include evaluator failures in report
+- `--include-reasons` - Include reasons in report output
+- `--min-assertions` - Minimum assertions average required for success (default: 1.0)
+- `--verbose` - Print detailed information about discovery and processing
+The evaluation system discovers and runs all Dataset objects from eval_*.py files in the specified directory, similar to test runners but specifically designed for LLM evaluations using pydantic_evals.
+**Discovery Mechanism:**
+The evaluation framework uses an automatic discovery system that:
+1. **File Discovery**: Scans the specified directory for files matching the pattern `eval_*.py`
+2. **Dataset Discovery**: Within each file, looks for variables named `dataset_*` that are instances of `pydantic_evals.Dataset`
+3. **Target Function Discovery**: Automatically finds the first async function in each module that doesn't start with an underscore (`_`) to use as the evaluation target
+4. **Filtering**: Supports filtering by module name, file name, dataset name, or fully qualified name
+**Example Evaluation File Structure:**
+```python
+# eval_math_operations.py
+from pydantic_evals import Dataset, Case
+# This dataset will be discovered automatically
+dataset_addition = Dataset(
+    name="Addition Tests",
+    cases=[
+        Case(input="What is 2 + 2?", expected="4"),
+        Case(input="What is 10 + 5?", expected="15"),
+    ],
+    evaluators=[...]
+)
+# This function will be used as the evaluation target
+async def evaluate_math_agent(input_text: str) -> str:
+    # Your agent evaluation logic here
+    agent = get_agent(system_prompt="You are a math assistant.")
+    result, _ = await run_agent(agent, input_text)
+    return result
+```
+The discovery system will:
+- Find `eval_math_operations.py` in the evals directory
+- Discover `dataset_addition` as an evaluation dataset
+- Use `evaluate_math_agent` as the target function for evaluation
+- Run each case through the target function and evaluate results
 ## Testing & Tools
 AIXtools provides comprehensive testing utilities and diagnostic tools for AI agent development and debugging.
@@ -397,7 +510,49 @@ test_model = AixTestModel()
 ### Tool Doctor System
-Automated tool analysis and recommendation system for optimizing agent tool usage.
+Automated tool analysis and recommendation system for optimizing agent tool usage and analyzing MCP servers.
+#### MCP Tool Doctor
+Analyze tools from MCP (Model Context Protocol) servers and receive AI-powered recommendations for improvement.
+```python
+from aixtools.tools.doctor.mcp_tool_doctor import tool_doctor_mcp
+from pydantic_ai.mcp import MCPServerStreamableHTTP, MCPServerStdio
+# Analyze HTTP MCP server
+recommendations = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
+for rec in recommendations:
+    print(rec)
+# Analyze STDIO MCP server
+server = MCPServerStdio(command='fastmcp', args=['run', 'my_server.py'])
+recommendations = await tool_doctor_mcp(mcp_server=server, verbose=True)
+```
+**Command Line Usage:**
+```bash
+# Analyze HTTP MCP server (default)
+tool_doctor_mcp
+# Analyze specific HTTP MCP server
+tool_doctor_mcp --mcp-url http://localhost:9000/mcp --verbose
+# Analyze STDIO MCP server
+tool_doctor_mcp --stdio-command fastmcp --stdio-args run my_server.py --debug
+# Available options:
+# --mcp-url URL          URL of HTTP MCP server (default: http://127.0.0.1:8000/mcp)
+# --stdio-command CMD    Command to run STDIO MCP server
+# --stdio-args ARGS      Arguments for STDIO MCP server command
+# --verbose              Enable verbose output
+# --debug                Enable debug output
+```
+#### Traditional Tool Doctor
+Analyze tool usage patterns from agent logs and get optimization recommendations.
 ```python
 from aixtools.tools.doctor.tool_doctor import ToolDoctor

aixtools 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

aixtools 0.1.11py3-none-any.whl → 0.2.0py3-none-any.whl