aixtools 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aixtools might be problematic. Click here for more details.
- aixtools/_version.py +2 -2
- aixtools/agents/agent.py +26 -7
- aixtools/agents/print_nodes.py +54 -0
- aixtools/agents/prompt.py +2 -2
- aixtools/compliance/private_data.py +1 -1
- aixtools/evals/__init__.py +0 -0
- aixtools/evals/discovery.py +174 -0
- aixtools/evals/evals.py +74 -0
- aixtools/evals/run_evals.py +110 -0
- aixtools/logging/log_objects.py +24 -23
- aixtools/mcp/client.py +46 -1
- aixtools/server/__init__.py +0 -6
- aixtools/server/path.py +88 -31
- aixtools/testing/aix_test_model.py +7 -1
- aixtools/tools/doctor/mcp_tool_doctor.py +79 -0
- aixtools/tools/doctor/tool_doctor.py +4 -0
- aixtools/tools/doctor/tool_recommendation.py +5 -0
- aixtools/utils/config.py +0 -1
- {aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/METADATA +185 -30
- {aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/RECORD +23 -18
- aixtools-0.2.0.dist-info/entry_points.txt +4 -0
- aixtools/server/workspace_privacy.py +0 -65
- aixtools-0.1.11.dist-info/entry_points.txt +0 -2
- {aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/WHEEL +0 -0
- {aixtools-0.1.11.dist-info → aixtools-0.2.0.dist-info}/top_level.txt +0 -0
aixtools/mcp/client.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""MCP server utilities with caching and robust error handling."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import logging
|
|
4
5
|
from contextlib import asynccontextmanager
|
|
5
6
|
from datetime import timedelta
|
|
6
7
|
from typing import Any, AsyncGenerator
|
|
@@ -9,12 +10,13 @@ import anyio
|
|
|
9
10
|
import httpx
|
|
10
11
|
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
|
11
12
|
from cachebox import TTLCache
|
|
13
|
+
from fastmcp.client.logging import LogMessage
|
|
12
14
|
from mcp import types as mcp_types
|
|
13
15
|
from mcp.client import streamable_http
|
|
14
16
|
from mcp.shared.exceptions import McpError
|
|
15
17
|
from mcp.shared.message import SessionMessage
|
|
16
18
|
from pydantic_ai import RunContext, exceptions
|
|
17
|
-
from pydantic_ai.mcp import MCPServerStreamableHTTP, ToolResult
|
|
19
|
+
from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP, ToolResult
|
|
18
20
|
from pydantic_ai.toolsets.abstract import ToolsetTool
|
|
19
21
|
|
|
20
22
|
from aixtools.context import SessionIdTuple
|
|
@@ -28,6 +30,49 @@ CACHE_KEY = "TOOL_LIST"
|
|
|
28
30
|
logger = get_logger(__name__)
|
|
29
31
|
|
|
30
32
|
|
|
33
|
+
# Default log_handler for MCP clients
|
|
34
|
+
LOGGING_LEVEL_MAP = logging.getLevelNamesMapping()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def default_mcp_log_handler(message: LogMessage):
|
|
38
|
+
"""
|
|
39
|
+
Handles incoming logs from the MCP server and forwards them
|
|
40
|
+
to the standard Python logging system.
|
|
41
|
+
"""
|
|
42
|
+
msg = message.data.get("msg")
|
|
43
|
+
extra = message.data.get("extra")
|
|
44
|
+
|
|
45
|
+
# Convert the MCP log level to a Python log level
|
|
46
|
+
level = LOGGING_LEVEL_MAP.get(message.level.upper(), logging.INFO)
|
|
47
|
+
|
|
48
|
+
# Log the message using the standard logging library
|
|
49
|
+
logger.log(level, msg, extra=extra)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_mcp_client(
|
|
53
|
+
url: str | None = None,
|
|
54
|
+
command: str | None = None,
|
|
55
|
+
args: list[str] = None,
|
|
56
|
+
log_handler: callable = default_mcp_log_handler, # type: ignore
|
|
57
|
+
) -> MCPServerStreamableHTTP | MCPServerStdio:
|
|
58
|
+
"""
|
|
59
|
+
Create an MCP client instance based on the provided URL or command.
|
|
60
|
+
By providing a log_handler, incoming logs from the MCP server can be shown, which improves debugging.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
url (str | None): The URL of the MCP server.
|
|
64
|
+
command (str | None): The command to start a local MCP server (STDIO MCP).
|
|
65
|
+
args (list[str] | None): Additional arguments for the command (STDIO MCP).
|
|
66
|
+
"""
|
|
67
|
+
if args is None:
|
|
68
|
+
args = []
|
|
69
|
+
if url:
|
|
70
|
+
return MCPServerStreamableHTTP(url="http://127.0.0.1:8089/mcp/", log_handler=log_handler)
|
|
71
|
+
if command:
|
|
72
|
+
return MCPServerStdio(command=command, args=args, log_handler=log_handler)
|
|
73
|
+
raise ValueError("Either url or command must be provided to create MCP client.")
|
|
74
|
+
|
|
75
|
+
|
|
31
76
|
def get_mcp_headers(session_id_tuple: SessionIdTuple) -> dict[str, str] | None:
|
|
32
77
|
"""
|
|
33
78
|
Generate headers for MCP server requests.
|
aixtools/server/__init__.py
CHANGED
|
@@ -13,10 +13,6 @@ from .utils import (
|
|
|
13
13
|
get_session_id_tuple,
|
|
14
14
|
run_in_thread,
|
|
15
15
|
)
|
|
16
|
-
from .workspace_privacy import (
|
|
17
|
-
is_session_private,
|
|
18
|
-
set_session_private,
|
|
19
|
-
)
|
|
20
16
|
|
|
21
17
|
__all__ = [
|
|
22
18
|
"get_workspace_path",
|
|
@@ -24,6 +20,4 @@ __all__ = [
|
|
|
24
20
|
"container_to_host_path",
|
|
25
21
|
"host_to_container_path",
|
|
26
22
|
"run_in_thread",
|
|
27
|
-
"is_session_private",
|
|
28
|
-
"set_session_private",
|
|
29
23
|
]
|
aixtools/server/path.py
CHANGED
|
@@ -2,47 +2,82 @@
|
|
|
2
2
|
Workspace path handling for user sessions.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import posixpath
|
|
6
|
+
from pathlib import Path, PurePosixPath
|
|
6
7
|
|
|
7
8
|
from fastmcp import Context
|
|
8
9
|
|
|
9
10
|
from ..utils.config import DATA_DIR
|
|
10
11
|
from .utils import get_session_id_tuple
|
|
11
12
|
|
|
12
|
-
WORKSPACES_ROOT_DIR = DATA_DIR / "workspaces" # Path on the host where workspaces are stored
|
|
13
|
+
WORKSPACES_ROOT_DIR = (DATA_DIR / "workspaces").resolve() # Path on the host where workspaces are stored
|
|
13
14
|
CONTAINER_WORKSPACE_PATH = PurePosixPath("/workspace") # Path inside the sandbox container where workspace is mounted
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def get_workspace_path(
|
|
17
|
+
def get_workspace_path(ctx: Context | tuple | None = None) -> Path:
|
|
17
18
|
"""
|
|
18
|
-
Get the workspace path for a specific service (e.g. MCP server).
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
the environment variables
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
19
|
+
Get the workspace path (in the host) for a specific service (e.g. MCP server).
|
|
20
|
+
Returns the path based on user and session IDs in the format:
|
|
21
|
+
|
|
22
|
+
<DATA_DIR>/workspaces/<user_id>/<session_id>
|
|
23
|
+
|
|
24
|
+
where `DATA_DIR` should come from the environment variables
|
|
25
|
+
Example workspace path:
|
|
26
|
+
|
|
27
|
+
/data/workspaces/foo-user/bar-session
|
|
28
|
+
|
|
29
|
+
The `ctx` is used to get user and session IDs tuple. It can be passed directly
|
|
30
|
+
or via HTTP headers from `Context`. If `ctx` is None, the current FastMCP
|
|
31
|
+
request HTTP headers are used.
|
|
27
32
|
|
|
28
33
|
Args:
|
|
29
34
|
ctx: The FastMCP context, which contains the user session.
|
|
30
|
-
service_name: The name of the service (e.g. "mcp_server").
|
|
31
|
-
in_sandbox: If True, use a sandbox path; otherwise, use user/session-based path.
|
|
32
35
|
|
|
33
|
-
Returns: The workspace path as a
|
|
36
|
+
Returns: The workspace path as a Path object.
|
|
37
|
+
"""
|
|
38
|
+
user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
|
|
39
|
+
return WORKSPACES_ROOT_DIR / user_id / session_id
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_workspace_path_sandbox() -> PurePosixPath:
|
|
43
|
+
"""
|
|
44
|
+
Get the workspace path in the sandbox container.
|
|
45
|
+
|
|
46
|
+
We return PurePosixPath to ensure compatibility with Linux containers.
|
|
47
|
+
|
|
48
|
+
The paths inside the sandbox cannot be resolved (because they don't exist
|
|
49
|
+
on the host), so we use PurePosixPath instead of Path. Also Path could be
|
|
50
|
+
a WindowsPath on Windows hosts, which would be incorrect for Linux containers.
|
|
51
|
+
|
|
52
|
+
Returns: The workspace path as a PurePosixPath object.
|
|
34
53
|
"""
|
|
35
|
-
|
|
36
|
-
path = CONTAINER_WORKSPACE_PATH
|
|
37
|
-
else:
|
|
38
|
-
user_id, session_id = ctx if isinstance(ctx, tuple) else get_session_id_tuple(ctx)
|
|
39
|
-
path = WORKSPACES_ROOT_DIR / user_id / session_id
|
|
40
|
-
if service_name:
|
|
41
|
-
path = path / service_name
|
|
42
|
-
return path
|
|
54
|
+
return CONTAINER_WORKSPACE_PATH
|
|
43
55
|
|
|
44
56
|
|
|
45
|
-
def
|
|
57
|
+
def path_normalize(p: PurePosixPath) -> PurePosixPath:
|
|
58
|
+
"""
|
|
59
|
+
Normalize a PurePosixPath (remove redundant separators and up-level references).
|
|
60
|
+
"""
|
|
61
|
+
return PurePosixPath(posixpath.normpath(p.as_posix()))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def path_chroot(path: Path, old_root: Path, new_root: Path) -> Path:
|
|
65
|
+
"""
|
|
66
|
+
Change the root of a given path from old_root to new_root.
|
|
67
|
+
If the path is not absolute (e.g. 'my_file.txt', './my_file.txt', 'my_dir/file.txt')
|
|
68
|
+
we treat it as relative to the 'new_root'
|
|
69
|
+
"""
|
|
70
|
+
if not Path(path).is_absolute():
|
|
71
|
+
new_path = Path(new_root / path).resolve()
|
|
72
|
+
new_root = Path(new_root).resolve()
|
|
73
|
+
if not new_path.is_relative_to(new_root):
|
|
74
|
+
raise ValueError(f"Path must not escape the workspace root: '{path}'")
|
|
75
|
+
return Path(new_path)
|
|
76
|
+
# Otherwise, we treat it as absolute and change the root
|
|
77
|
+
return new_root / Path(path).relative_to(old_root)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple | None = None) -> Path | None:
|
|
46
81
|
"""
|
|
47
82
|
Convert a path in a sandbox container to a host path
|
|
48
83
|
|
|
@@ -54,19 +89,41 @@ def container_to_host_path(path: PurePosixPath, *, ctx: Context | tuple = None)
|
|
|
54
89
|
Returns:
|
|
55
90
|
Path to the file on the host, or None if the conversion fails.
|
|
56
91
|
"""
|
|
57
|
-
|
|
92
|
+
# Try without service name (maybe the LLM forgot to put the SERVICE_NAME in the path)
|
|
93
|
+
old_root = get_workspace_path_sandbox()
|
|
58
94
|
new_root = get_workspace_path(ctx=ctx)
|
|
59
95
|
try:
|
|
60
|
-
|
|
96
|
+
# Relative paths are treated as relative to the new_root
|
|
97
|
+
if not PurePosixPath(path).is_absolute():
|
|
98
|
+
# Resolve paths to prevent escaping the workspace root
|
|
99
|
+
new_path = Path(new_root / path).resolve()
|
|
100
|
+
new_root = Path(new_root.resolve())
|
|
101
|
+
if not new_path.is_relative_to(new_root):
|
|
102
|
+
raise ValueError(f"Path must not escape the workspace root: '{path}'")
|
|
103
|
+
return new_path
|
|
104
|
+
# Otherwise, we treat it as absolute and change the root
|
|
105
|
+
return new_root / Path(path).relative_to(old_root)
|
|
61
106
|
except ValueError as e:
|
|
62
107
|
raise ValueError(f"Container path must be a subdir of '{old_root}', got '{path}' instead") from e
|
|
63
108
|
|
|
64
109
|
|
|
65
|
-
def host_to_container_path(path: Path, *, ctx: Context | tuple = None) -> PurePosixPath:
|
|
66
|
-
"""
|
|
110
|
+
def host_to_container_path(path: Path, *, ctx: Context | tuple | None = None) -> PurePosixPath:
|
|
111
|
+
"""
|
|
112
|
+
Convert a host path to a path in a sandbox container.
|
|
113
|
+
Paths inside the sandbox MUST be PurePosixPath (i.e. we use Linux containers).
|
|
114
|
+
"""
|
|
67
115
|
old_root = get_workspace_path(ctx=ctx)
|
|
68
|
-
new_root =
|
|
116
|
+
new_root = get_workspace_path_sandbox()
|
|
69
117
|
try:
|
|
118
|
+
# Relative paths are treated as relative to the new_root
|
|
119
|
+
if not Path(path).is_absolute():
|
|
120
|
+
# Normalize paths to prevent escaping the workspace root (we cannot resolve PurePosixPaths)
|
|
121
|
+
new_path = path_normalize(new_root / path)
|
|
122
|
+
new_root = path_normalize(new_root)
|
|
123
|
+
if not new_path.is_relative_to(new_root):
|
|
124
|
+
raise ValueError(f"Path must not escape the workspace root: '{path}'")
|
|
125
|
+
return new_path
|
|
126
|
+
# Otherwise, we treat it as absolute and change the root
|
|
70
127
|
return new_root / Path(path).relative_to(old_root)
|
|
71
|
-
except ValueError as
|
|
72
|
-
raise ValueError(f"Host path must be a subdir of '{old_root}', got '{path}' instead") from
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
raise ValueError(f"Host path must be a subdir of either '{old_root}', got '{path}' instead") from e
|
|
@@ -112,7 +112,13 @@ class AixTestModel(Model):
|
|
|
112
112
|
**kwargs, # pylint: disable=unused-argument
|
|
113
113
|
) -> AsyncIterator[StreamedResponse]:
|
|
114
114
|
model_response = await self._request(messages, model_settings, model_request_parameters)
|
|
115
|
-
yield TestStreamedResponse(
|
|
115
|
+
yield TestStreamedResponse(
|
|
116
|
+
_model_name=self.model_name,
|
|
117
|
+
_structured_response=model_response,
|
|
118
|
+
_messages=messages,
|
|
119
|
+
model_request_parameters=model_request_parameters,
|
|
120
|
+
_provider_name="",
|
|
121
|
+
)
|
|
116
122
|
|
|
117
123
|
@property
|
|
118
124
|
def model_name(self) -> str:
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
|
|
4
|
+
from pydantic_ai.mcp import MCPServerStdio, MCPServerStreamableHTTP
|
|
5
|
+
|
|
6
|
+
from aixtools.agents import get_agent, run_agent
|
|
7
|
+
from aixtools.tools.doctor.tool_doctor import TOOL_DOCTOR_PROMPT
|
|
8
|
+
from aixtools.tools.doctor.tool_recommendation import ToolRecommendation
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def tool_doctor_mcp(
|
|
12
|
+
mcp_url: str = "http://127.0.0.1:8000/mcp",
|
|
13
|
+
mcp_server: MCPServerStreamableHTTP | MCPServerStdio | None = None,
|
|
14
|
+
verbose: bool = False,
|
|
15
|
+
debug: bool = False,
|
|
16
|
+
) -> list[ToolRecommendation]:
|
|
17
|
+
"""
|
|
18
|
+
Run the tool doctor agent to analyze tools from an MCP server and give recommendations.
|
|
19
|
+
|
|
20
|
+
Usage examples:
|
|
21
|
+
# Using an http MCP server
|
|
22
|
+
ret = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
|
|
23
|
+
print(ret)
|
|
24
|
+
|
|
25
|
+
# Using a stdio MCP server
|
|
26
|
+
server = MCPServerStdio(command='fastmcp', args=['run', 'my_mcp_server.py'])
|
|
27
|
+
ret = await tool_doctor_mcp(mcp_server=server)
|
|
28
|
+
print(ret)
|
|
29
|
+
"""
|
|
30
|
+
if mcp_server is None:
|
|
31
|
+
mcp_server = MCPServerStreamableHTTP(url=mcp_url)
|
|
32
|
+
agent = get_agent(toolsets=[mcp_server], output_type=list[ToolRecommendation])
|
|
33
|
+
async with agent:
|
|
34
|
+
ret, nodes = await run_agent(agent, TOOL_DOCTOR_PROMPT, verbose=verbose, debug=debug)
|
|
35
|
+
return ret # type: ignore
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main_cli():
|
|
39
|
+
"""Command line interface for tool doctor MCP."""
|
|
40
|
+
parser = argparse.ArgumentParser(description="Analyze tools from an MCP server and provide recommendations")
|
|
41
|
+
|
|
42
|
+
# MCP server connection options
|
|
43
|
+
server_group = parser.add_mutually_exclusive_group()
|
|
44
|
+
server_group.add_argument(
|
|
45
|
+
"--mcp-url",
|
|
46
|
+
default="http://127.0.0.1:8000/mcp",
|
|
47
|
+
help="URL of the HTTP MCP server (default: http://127.0.0.1:8000/mcp)",
|
|
48
|
+
)
|
|
49
|
+
server_group.add_argument("--stdio-command", help="Command to run STDIO MCP server (e.g., 'fastmcp')")
|
|
50
|
+
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--stdio-args",
|
|
53
|
+
nargs="*",
|
|
54
|
+
default=[],
|
|
55
|
+
help="Arguments for STDIO MCP server command (e.g., 'run', 'my_server.py')",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
58
|
+
parser.add_argument("--debug", action="store_true", help="Enable debug output")
|
|
59
|
+
|
|
60
|
+
args = parser.parse_args()
|
|
61
|
+
|
|
62
|
+
async def run():
|
|
63
|
+
mcp_server = None
|
|
64
|
+
if args.stdio_command:
|
|
65
|
+
mcp_server = MCPServerStdio(command=args.stdio_command, args=args.stdio_args)
|
|
66
|
+
recommendations = await tool_doctor_mcp(mcp_server=mcp_server, verbose=args.verbose, debug=args.debug)
|
|
67
|
+
else:
|
|
68
|
+
recommendations = await tool_doctor_mcp(mcp_url=args.mcp_url, verbose=args.verbose, debug=args.debug)
|
|
69
|
+
|
|
70
|
+
print("Tool Doctor Recommendations:")
|
|
71
|
+
print("=" * 50)
|
|
72
|
+
for i, rec in enumerate(recommendations, 1):
|
|
73
|
+
print(f"\n{i}. {rec}")
|
|
74
|
+
|
|
75
|
+
asyncio.run(run())
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
main_cli()
|
aixtools/utils/config.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aixtools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Tools for AI exploration and debugging
|
|
5
5
|
Requires-Python: >=3.11.2
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -18,7 +18,8 @@ Requires-Dist: langchain-openai>=0.3.14
|
|
|
18
18
|
Requires-Dist: mcp>=1.11.0
|
|
19
19
|
Requires-Dist: mypy>=1.18.2
|
|
20
20
|
Requires-Dist: pandas>=2.2.3
|
|
21
|
-
Requires-Dist: pydantic-
|
|
21
|
+
Requires-Dist: pydantic-evals>=0.4.10
|
|
22
|
+
Requires-Dist: pydantic-ai>=1.0.9
|
|
22
23
|
Requires-Dist: pylint>=3.3.7
|
|
23
24
|
Requires-Dist: rich>=14.0.0
|
|
24
25
|
Requires-Dist: ruff>=0.11.6
|
|
@@ -45,20 +46,16 @@ A2A
|
|
|
45
46
|
- Google SDK Integration for A2A - `aixtools/a2a/google_sdk/`
|
|
46
47
|
- PydanticAI Adapter for Google SDK - `aixtools/a2a/google_sdk/pydantic_ai_adapter/`
|
|
47
48
|
|
|
48
|
-
Databases
|
|
49
|
-
- Database Integration - `aixtools/db/`
|
|
50
|
-
- Vector Database Support - `aixtools/db/vector_db.py`
|
|
51
|
-
|
|
52
49
|
Logging & Debugging
|
|
53
|
-
- Log Viewing Application - `
|
|
50
|
+
- Log Viewing Web Application - `log_view`
|
|
54
51
|
- Object Logging System - `aixtools/logging/`
|
|
55
52
|
- Model Patch Logging - `aixtools/logging/model_patch_logging.py`
|
|
56
53
|
- Log Filtering System - `aixtools/logfilters/`
|
|
57
54
|
- FastMCP Logging - `aixtools/mcp/fast_mcp_log.py`
|
|
58
|
-
- Command Line Interface for Log Viewing - Entry point: `log_view`
|
|
59
55
|
- MCP (Model Context Protocol) Support - `aixtools/logging/mcp_log_models.py`, `aixtools/logging/mcp_logger.py`
|
|
60
56
|
|
|
61
|
-
Testing &
|
|
57
|
+
Testing Tools & Evals
|
|
58
|
+
- Evaluations - `aixtools/evals/` - Entry point: `evals`
|
|
62
59
|
- Testing Utilities - `aixtools/testing/`
|
|
63
60
|
- Mock Tool System - `aixtools/testing/mock_tool.py`
|
|
64
61
|
- Model Patch Caching - `aixtools/testing/model_patch_cache.py`
|
|
@@ -66,6 +63,10 @@ Testing & Tools
|
|
|
66
63
|
- Tool Recommendation Engine - `aixtools/tools/doctor/tool_recommendation.py`
|
|
67
64
|
- FaultyMCP - `aixtools/mcp/faulty_mcp.py`
|
|
68
65
|
|
|
66
|
+
Databases
|
|
67
|
+
- Database Integration - `aixtools/db/`
|
|
68
|
+
- Vector Database Support - `aixtools/db/vector_db.py`
|
|
69
|
+
|
|
69
70
|
Chainlit & HTTP Server
|
|
70
71
|
- Chainlit Integration - `aixtools/app.py`, `aixtools/chainlit.md`
|
|
71
72
|
- Chainlit Utilities - `aixtools/utils/chainlit/`
|
|
@@ -81,28 +82,11 @@ Programming utils
|
|
|
81
82
|
|
|
82
83
|
## Installation
|
|
83
84
|
|
|
84
|
-
### From GitHub
|
|
85
|
-
|
|
86
85
|
```bash
|
|
87
86
|
uv add aixtools
|
|
88
87
|
```
|
|
89
88
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
```bash
|
|
93
|
-
# Create a new project
|
|
94
|
-
uv init MyNewProject
|
|
95
|
-
cd MyNewProject
|
|
96
|
-
|
|
97
|
-
# Add virtual environment and activate it
|
|
98
|
-
uv venv .venv
|
|
99
|
-
source .venv/bin/activate
|
|
100
|
-
|
|
101
|
-
# Add this package
|
|
102
|
-
uv add aixtools
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Updating
|
|
89
|
+
**Updating**
|
|
106
90
|
|
|
107
91
|
```bash
|
|
108
92
|
uv add --upgrade aixtools
|
|
@@ -114,6 +98,7 @@ AIXtools requires environment variables for model providers.
|
|
|
114
98
|
|
|
115
99
|
**IMPORTANT:** Create a `.env` file based on [`.env_template`](./.env_template):
|
|
116
100
|
|
|
101
|
+
Here is an example configuration:
|
|
117
102
|
```bash
|
|
118
103
|
# Model family (azure, openai, or ollama)
|
|
119
104
|
MODEL_FAMILY=azure
|
|
@@ -161,6 +146,33 @@ agent = get_agent(system_prompt="You are a helpful assistant.")
|
|
|
161
146
|
result, nodes = await run_agent(agent, "Tell me about AI")
|
|
162
147
|
```
|
|
163
148
|
|
|
149
|
+
### Node Debugging and Visualization
|
|
150
|
+
|
|
151
|
+
The [`print_nodes`](aixtools/agents/print_nodes.py) module provides a clean, indented output for easy reading of the node from agent execution.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from aixtools.agents.print_nodes import print_nodes, print_node
|
|
155
|
+
from aixtools.agents.agent import get_agent, run_agent
|
|
156
|
+
|
|
157
|
+
agent = get_agent(system_prompt="You are a helpful assistant.")
|
|
158
|
+
result, nodes = await run_agent(agent, "Explain quantum computing")
|
|
159
|
+
# Print all execution nodes for debugging
|
|
160
|
+
print_nodes(nodes)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Features:**
|
|
164
|
+
- **Node Type Detection**: Automatically handles different node types (`UserPromptNode`, `CallToolsNode`, `ModelRequestNode`, `End`)
|
|
165
|
+
- **Formatted Output**: Provides clean, indented output for easy reading
|
|
166
|
+
- **Tool Call Visualization**: Shows tool names and arguments for tool calls
|
|
167
|
+
- **Text Content Display**: Formats text parts with proper indentation
|
|
168
|
+
- **Model Request Summary**: Shows character count for model requests to avoid verbose output
|
|
169
|
+
|
|
170
|
+
**Node Types Supported:**
|
|
171
|
+
- `UserPromptNode` - Displays user prompts with indentation
|
|
172
|
+
- `CallToolsNode` - Shows tool calls with names and arguments
|
|
173
|
+
- `ModelRequestNode` - Summarizes model requests with character count
|
|
174
|
+
- `End` - Marks the end of execution (output suppressed by default)
|
|
175
|
+
|
|
164
176
|
### Agent Batch Processing
|
|
165
177
|
|
|
166
178
|
Process multiple agent queries simultaneously with built-in concurrency control and result aggregation.
|
|
@@ -299,9 +311,39 @@ with ObjectLogger() as logger:
|
|
|
299
311
|
logger.log(agent_response)
|
|
300
312
|
```
|
|
301
313
|
|
|
302
|
-
### MCP
|
|
314
|
+
### MCP logging
|
|
315
|
+
|
|
316
|
+
AIXtools provides MCP support for both client and server implementations with easier logging for debugging pourposes.
|
|
317
|
+
|
|
318
|
+
**Example:**
|
|
319
|
+
|
|
320
|
+
Let's assume we have an MCP server that runs an agent tool.
|
|
321
|
+
|
|
322
|
+
Note that the `ctx: Context` parameter is passed to `run_agent`, this will enable logging to the MCP client.
|
|
323
|
+
|
|
324
|
+
```python
|
|
325
|
+
@mcp.tool
|
|
326
|
+
async def my_tool_with_agent(query: str, ctx: Context) -> str:
|
|
327
|
+
""" A tool that uses an gents to process the query """
|
|
328
|
+
agent = get_agent()
|
|
329
|
+
async with get_qb_agent() as agent:
|
|
330
|
+
ret, nodes = await run_agent(agent=agent, prompt=query, ctx=ctx) # Enable MCP logging
|
|
331
|
+
return str(ret)
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
On the client side, you can create an agent connected to the MCP server, the "nodes" from the MCP server will show on the STDOUT so you can see what's going on the MCP server's agent loop
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
mcp = get_mcp_client("http://localhost:8000") # Get an MCP client with a default log handler that prints to STDOUT
|
|
338
|
+
agent = get_agent(toolsets=[mcp])
|
|
339
|
+
async with agent:
|
|
340
|
+
# The messages from the MCP server will be printed to the STDOUT
|
|
341
|
+
ret, nodes = await run_agent(agent, prompt="...")
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
#### MCP Server Logging
|
|
303
345
|
|
|
304
|
-
|
|
346
|
+
Create MCP servers with built-in logging capabilities.
|
|
305
347
|
|
|
306
348
|
```python
|
|
307
349
|
from aixtools.mcp.fast_mcp_log import FastMcpLog
|
|
@@ -371,6 +413,77 @@ By default, the "FaultyMCP" includes several tools you can use in your tests:
|
|
|
371
413
|
- `freeze_server(seconds)` - Simulates server freeze
|
|
372
414
|
- `throw_404_exception()` - Throws HTTP 404 error
|
|
373
415
|
|
|
416
|
+
### Evals
|
|
417
|
+
|
|
418
|
+
Run comprehensive Agent/LLM evaluations using the built-in evaluation discovery based on Pydantic-AI framework.
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
# Run all evaluations
|
|
422
|
+
evals
|
|
423
|
+
|
|
424
|
+
# Run evaluations with filtering
|
|
425
|
+
evals --filter "specific_test"
|
|
426
|
+
|
|
427
|
+
# Run with verbose output and detailed reporting
|
|
428
|
+
evals --verbose --include-input --include-output --include-reasons
|
|
429
|
+
|
|
430
|
+
# Specify custom evaluations directory
|
|
431
|
+
evals --evals-dir /path/to/evals
|
|
432
|
+
|
|
433
|
+
# Set minimum assertions threshold
|
|
434
|
+
evals --min-assertions 0.8
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
**Command Line Options:**
|
|
438
|
+
- `--evals-dir` - Directory containing eval_*.py files (default: evals)
|
|
439
|
+
- `--filter` - Filter to run only matching evaluations
|
|
440
|
+
- `--include-input` - Include input in report output
|
|
441
|
+
- `--include-output` - Include output in report output
|
|
442
|
+
- `--include-evaluator-failures` - Include evaluator failures in report
|
|
443
|
+
- `--include-reasons` - Include reasons in report output
|
|
444
|
+
- `--min-assertions` - Minimum assertions average required for success (default: 1.0)
|
|
445
|
+
- `--verbose` - Print detailed information about discovery and processing
|
|
446
|
+
|
|
447
|
+
The evaluation system discovers and runs all Dataset objects from eval_*.py files in the specified directory, similar to test runners but specifically designed for LLM evaluations using pydantic_evals.
|
|
448
|
+
|
|
449
|
+
**Discovery Mechanism:**
|
|
450
|
+
|
|
451
|
+
The evaluation framework uses an automatic discovery system that:
|
|
452
|
+
|
|
453
|
+
1. **File Discovery**: Scans the specified directory for files matching the pattern `eval_*.py`
|
|
454
|
+
2. **Dataset Discovery**: Within each file, looks for variables named `dataset_*` that are instances of `pydantic_evals.Dataset`
|
|
455
|
+
3. **Target Function Discovery**: Automatically finds the first async function in each module that doesn't start with an underscore (`_`) to use as the evaluation target
|
|
456
|
+
4. **Filtering**: Supports filtering by module name, file name, dataset name, or fully qualified name
|
|
457
|
+
|
|
458
|
+
**Example Evaluation File Structure:**
|
|
459
|
+
```python
|
|
460
|
+
# eval_math_operations.py
|
|
461
|
+
from pydantic_evals import Dataset, Case
|
|
462
|
+
|
|
463
|
+
# This dataset will be discovered automatically
|
|
464
|
+
dataset_addition = Dataset(
|
|
465
|
+
name="Addition Tests",
|
|
466
|
+
cases=[
|
|
467
|
+
Case(input="What is 2 + 2?", expected="4"),
|
|
468
|
+
Case(input="What is 10 + 5?", expected="15"),
|
|
469
|
+
],
|
|
470
|
+
evaluators=[...]
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# This function will be used as the evaluation target
|
|
474
|
+
async def evaluate_math_agent(input_text: str) -> str:
|
|
475
|
+
# Your agent evaluation logic here
|
|
476
|
+
agent = get_agent(system_prompt="You are a math assistant.")
|
|
477
|
+
result, _ = await run_agent(agent, input_text)
|
|
478
|
+
return result
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
The discovery system will:
|
|
482
|
+
- Find `eval_math_operations.py` in the evals directory
|
|
483
|
+
- Discover `dataset_addition` as an evaluation dataset
|
|
484
|
+
- Use `evaluate_math_agent` as the target function for evaluation
|
|
485
|
+
- Run each case through the target function and evaluate results
|
|
486
|
+
|
|
374
487
|
## Testing & Tools
|
|
375
488
|
|
|
376
489
|
AIXtools provides comprehensive testing utilities and diagnostic tools for AI agent development and debugging.
|
|
@@ -397,7 +510,49 @@ test_model = AixTestModel()
|
|
|
397
510
|
|
|
398
511
|
### Tool Doctor System
|
|
399
512
|
|
|
400
|
-
Automated tool analysis and recommendation system for optimizing agent tool usage.
|
|
513
|
+
Automated tool analysis and recommendation system for optimizing agent tool usage and analyzing MCP servers.
|
|
514
|
+
|
|
515
|
+
#### MCP Tool Doctor
|
|
516
|
+
|
|
517
|
+
Analyze tools from MCP (Model Context Protocol) servers and receive AI-powered recommendations for improvement.
|
|
518
|
+
|
|
519
|
+
```python
|
|
520
|
+
from aixtools.tools.doctor.mcp_tool_doctor import tool_doctor_mcp
|
|
521
|
+
from pydantic_ai.mcp import MCPServerStreamableHTTP, MCPServerStdio
|
|
522
|
+
|
|
523
|
+
# Analyze HTTP MCP server
|
|
524
|
+
recommendations = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
|
|
525
|
+
for rec in recommendations:
|
|
526
|
+
print(rec)
|
|
527
|
+
|
|
528
|
+
# Analyze STDIO MCP server
|
|
529
|
+
server = MCPServerStdio(command='fastmcp', args=['run', 'my_server.py'])
|
|
530
|
+
recommendations = await tool_doctor_mcp(mcp_server=server, verbose=True)
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
**Command Line Usage:**
|
|
534
|
+
|
|
535
|
+
```bash
|
|
536
|
+
# Analyze HTTP MCP server (default)
|
|
537
|
+
tool_doctor_mcp
|
|
538
|
+
|
|
539
|
+
# Analyze specific HTTP MCP server
|
|
540
|
+
tool_doctor_mcp --mcp-url http://localhost:9000/mcp --verbose
|
|
541
|
+
|
|
542
|
+
# Analyze STDIO MCP server
|
|
543
|
+
tool_doctor_mcp --stdio-command fastmcp --stdio-args run my_server.py --debug
|
|
544
|
+
|
|
545
|
+
# Available options:
|
|
546
|
+
# --mcp-url URL URL of HTTP MCP server (default: http://127.0.0.1:8000/mcp)
|
|
547
|
+
# --stdio-command CMD Command to run STDIO MCP server
|
|
548
|
+
# --stdio-args ARGS Arguments for STDIO MCP server command
|
|
549
|
+
# --verbose Enable verbose output
|
|
550
|
+
# --debug Enable debug output
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
#### Traditional Tool Doctor
|
|
554
|
+
|
|
555
|
+
Analyze tool usage patterns from agent logs and get optimization recommendations.
|
|
401
556
|
|
|
402
557
|
```python
|
|
403
558
|
from aixtools.tools.doctor.tool_doctor import ToolDoctor
|