aixtools 0.1.11__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aixtools might be problematic. Click here for more details.

Files changed (95) hide show
  1. {aixtools-0.1.11 → aixtools-0.2.0}/PKG-INFO +185 -30
  2. {aixtools-0.1.11 → aixtools-0.2.0}/README.md +182 -28
  3. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/_version.py +3 -3
  4. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/agents/agent.py +26 -7
  5. aixtools-0.2.0/aixtools/agents/print_nodes.py +54 -0
  6. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/agents/prompt.py +2 -2
  7. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/compliance/private_data.py +1 -1
  8. aixtools-0.2.0/aixtools/evals/__init__.py +0 -0
  9. aixtools-0.2.0/aixtools/evals/discovery.py +174 -0
  10. aixtools-0.2.0/aixtools/evals/evals.py +74 -0
  11. aixtools-0.2.0/aixtools/evals/run_evals.py +110 -0
  12. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/log_objects.py +24 -23
  13. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/client.py +46 -1
  14. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/server/__init__.py +0 -6
  15. aixtools-0.2.0/aixtools/server/path.py +129 -0
  16. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/testing/aix_test_model.py +7 -1
  17. aixtools-0.2.0/aixtools/tools/doctor/mcp_tool_doctor.py +79 -0
  18. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/tools/doctor/tool_doctor.py +4 -0
  19. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/tools/doctor/tool_recommendation.py +5 -0
  20. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/config.py +0 -1
  21. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools.egg-info/SOURCES.txt +6 -1
  22. {aixtools-0.1.11 → aixtools-0.2.0}/pyproject.toml +4 -1
  23. aixtools-0.1.11/aixtools/server/path.py +0 -72
  24. aixtools-0.1.11/aixtools/server/workspace_privacy.py +0 -65
  25. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/config.toml +0 -0
  26. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/bn.json +0 -0
  27. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/en-US.json +0 -0
  28. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/gu.json +0 -0
  29. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/he-IL.json +0 -0
  30. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/hi.json +0 -0
  31. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/ja.json +0 -0
  32. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/kn.json +0 -0
  33. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/ml.json +0 -0
  34. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/mr.json +0 -0
  35. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/nl.json +0 -0
  36. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/ta.json +0 -0
  37. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/te.json +0 -0
  38. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/.chainlit/translations/zh-CN.json +0 -0
  39. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/__init__.py +0 -0
  40. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/app.py +0 -0
  41. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/__init__.py +0 -0
  42. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/card.py +0 -0
  43. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/pydantic_ai_adapter/agent_executor.py +0 -0
  44. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/pydantic_ai_adapter/storage.py +0 -0
  45. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/remote_agent_connection.py +0 -0
  46. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/google_sdk/utils.py +0 -0
  47. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/a2a/utils.py +0 -0
  48. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/agents/__init__.py +0 -0
  49. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/agents/agent_batch.py +0 -0
  50. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/app.py +0 -0
  51. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/chainlit.md +0 -0
  52. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/compliance/__init__.py +0 -0
  53. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/context.py +0 -0
  54. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/db/__init__.py +0 -0
  55. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/db/database.py +0 -0
  56. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/db/vector_db.py +0 -0
  57. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/google/client.py +0 -0
  58. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/__init__.py +0 -0
  59. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/app.py +0 -0
  60. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/display.py +0 -0
  61. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/export.py +0 -0
  62. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/filters.py +0 -0
  63. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/log_utils.py +0 -0
  64. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/log_view/node_summary.py +0 -0
  65. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logfilters/__init__.py +0 -0
  66. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logfilters/context_filter.py +0 -0
  67. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/__init__.py +0 -0
  68. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/logging_config.py +0 -0
  69. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/mcp_log_models.py +0 -0
  70. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/mcp_logger.py +0 -0
  71. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/model_patch_logging.py +0 -0
  72. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/logging/open_telemetry.py +0 -0
  73. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/__init__.py +0 -0
  74. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/example_client.py +0 -0
  75. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/example_server.py +0 -0
  76. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/fast_mcp_log.py +0 -0
  77. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/mcp/faulty_mcp.py +0 -0
  78. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/model_patch/model_patch.py +0 -0
  79. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/server/app_mounter.py +0 -0
  80. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/server/utils.py +0 -0
  81. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/testing/__init__.py +0 -0
  82. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/testing/mock_tool.py +0 -0
  83. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/testing/model_patch_cache.py +0 -0
  84. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/tools/doctor/__init__.py +0 -0
  85. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/__init__.py +0 -0
  86. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/chainlit/cl_agent_show.py +0 -0
  87. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/chainlit/cl_utils.py +0 -0
  88. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/config_util.py +0 -0
  89. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/enum_with_description.py +0 -0
  90. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/files.py +0 -0
  91. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/persisted_dict.py +0 -0
  92. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/utils/utils.py +0 -0
  93. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/vault/__init__.py +0 -0
  94. {aixtools-0.1.11 → aixtools-0.2.0}/aixtools/vault/vault.py +0 -0
  95. {aixtools-0.1.11 → aixtools-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aixtools
3
- Version: 0.1.11
3
+ Version: 0.2.0
4
4
  Summary: Tools for AI exploration and debugging
5
5
  Requires-Python: >=3.11.2
6
6
  Description-Content-Type: text/markdown
@@ -18,7 +18,8 @@ Requires-Dist: langchain-openai>=0.3.14
18
18
  Requires-Dist: mcp>=1.11.0
19
19
  Requires-Dist: mypy>=1.18.2
20
20
  Requires-Dist: pandas>=2.2.3
21
- Requires-Dist: pydantic-ai>=0.4.10
21
+ Requires-Dist: pydantic-evals>=0.4.10
22
+ Requires-Dist: pydantic-ai>=1.0.9
22
23
  Requires-Dist: pylint>=3.3.7
23
24
  Requires-Dist: rich>=14.0.0
24
25
  Requires-Dist: ruff>=0.11.6
@@ -45,20 +46,16 @@ A2A
45
46
  - Google SDK Integration for A2A - `aixtools/a2a/google_sdk/`
46
47
  - PydanticAI Adapter for Google SDK - `aixtools/a2a/google_sdk/pydantic_ai_adapter/`
47
48
 
48
- Databases
49
- - Database Integration - `aixtools/db/`
50
- - Vector Database Support - `aixtools/db/vector_db.py`
51
-
52
49
  Logging & Debugging
53
- - Log Viewing Application - `aixtools/log_view/`
50
+ - Log Viewing Web Application - `log_view`
54
51
  - Object Logging System - `aixtools/logging/`
55
52
  - Model Patch Logging - `aixtools/logging/model_patch_logging.py`
56
53
  - Log Filtering System - `aixtools/logfilters/`
57
54
  - FastMCP Logging - `aixtools/mcp/fast_mcp_log.py`
58
- - Command Line Interface for Log Viewing - Entry point: `log_view`
59
55
  - MCP (Model Context Protocol) Support - `aixtools/logging/mcp_log_models.py`, `aixtools/logging/mcp_logger.py`
60
56
 
61
- Testing & Tools
57
+ Testing Tools & Evals
58
+ - Evaluations - `aixtools/evals/` - Entry point: `evals`
62
59
  - Testing Utilities - `aixtools/testing/`
63
60
  - Mock Tool System - `aixtools/testing/mock_tool.py`
64
61
  - Model Patch Caching - `aixtools/testing/model_patch_cache.py`
@@ -66,6 +63,10 @@ Testing & Tools
66
63
  - Tool Recommendation Engine - `aixtools/tools/doctor/tool_recommendation.py`
67
64
  - FaultyMCP - `aixtools/mcp/faulty_mcp.py`
68
65
 
66
+ Databases
67
+ - Database Integration - `aixtools/db/`
68
+ - Vector Database Support - `aixtools/db/vector_db.py`
69
+
69
70
  Chainlit & HTTP Server
70
71
  - Chainlit Integration - `aixtools/app.py`, `aixtools/chainlit.md`
71
72
  - Chainlit Utilities - `aixtools/utils/chainlit/`
@@ -81,28 +82,11 @@ Programming utils
81
82
 
82
83
  ## Installation
83
84
 
84
- ### From GitHub
85
-
86
85
  ```bash
87
86
  uv add aixtools
88
87
  ```
89
88
 
90
- ### Development Setup
91
-
92
- ```bash
93
- # Create a new project
94
- uv init MyNewProject
95
- cd MyNewProject
96
-
97
- # Add virtual environment and activate it
98
- uv venv .venv
99
- source .venv/bin/activate
100
-
101
- # Add this package
102
- uv add aixtools
103
- ```
104
-
105
- ### Updating
89
+ **Updating**
106
90
 
107
91
  ```bash
108
92
  uv add --upgrade aixtools
@@ -114,6 +98,7 @@ AIXtools requires environment variables for model providers.
114
98
 
115
99
  **IMPORTANT:** Create a `.env` file based on [`.env_template`](./.env_template):
116
100
 
101
+ Here is an example configuration:
117
102
  ```bash
118
103
  # Model family (azure, openai, or ollama)
119
104
  MODEL_FAMILY=azure
@@ -161,6 +146,33 @@ agent = get_agent(system_prompt="You are a helpful assistant.")
161
146
  result, nodes = await run_agent(agent, "Tell me about AI")
162
147
  ```
163
148
 
149
+ ### Node Debugging and Visualization
150
+
151
+ The [`print_nodes`](aixtools/agents/print_nodes.py) module provides a clean, indented output for easy reading of the node from agent execution.
152
+
153
+ ```python
154
+ from aixtools.agents.print_nodes import print_nodes, print_node
155
+ from aixtools.agents.agent import get_agent, run_agent
156
+
157
+ agent = get_agent(system_prompt="You are a helpful assistant.")
158
+ result, nodes = await run_agent(agent, "Explain quantum computing")
159
+ # Print all execution nodes for debugging
160
+ print_nodes(nodes)
161
+ ```
162
+
163
+ **Features:**
164
+ - **Node Type Detection**: Automatically handles different node types (`UserPromptNode`, `CallToolsNode`, `ModelRequestNode`, `End`)
165
+ - **Formatted Output**: Provides clean, indented output for easy reading
166
+ - **Tool Call Visualization**: Shows tool names and arguments for tool calls
167
+ - **Text Content Display**: Formats text parts with proper indentation
168
+ - **Model Request Summary**: Shows character count for model requests to avoid verbose output
169
+
170
+ **Node Types Supported:**
171
+ - `UserPromptNode` - Displays user prompts with indentation
172
+ - `CallToolsNode` - Shows tool calls with names and arguments
173
+ - `ModelRequestNode` - Summarizes model requests with character count
174
+ - `End` - Marks the end of execution (output suppressed by default)
175
+
164
176
  ### Agent Batch Processing
165
177
 
166
178
  Process multiple agent queries simultaneously with built-in concurrency control and result aggregation.
@@ -299,9 +311,39 @@ with ObjectLogger() as logger:
299
311
  logger.log(agent_response)
300
312
  ```
301
313
 
302
- ### MCP Logger
314
+ ### MCP logging
315
+
316
+ AIXtools provides MCP support for both client and server implementations with easier logging for debugging pourposes.
317
+
318
+ **Example:**
319
+
320
+ Let's assume we have an MCP server that runs an agent tool.
321
+
322
+ Note that the `ctx: Context` parameter is passed to `run_agent`, this will enable logging to the MCP client.
323
+
324
+ ```python
325
+ @mcp.tool
326
+ async def my_tool_with_agent(query: str, ctx: Context) -> str:
327
+ """ A tool that uses an gents to process the query """
328
+ agent = get_agent()
329
+ async with get_qb_agent() as agent:
330
+ ret, nodes = await run_agent(agent=agent, prompt=query, ctx=ctx) # Enable MCP logging
331
+ return str(ret)
332
+ ```
333
+
334
+ On the client side, you can create an agent connected to the MCP server, the "nodes" from the MCP server will show on the STDOUT so you can see what's going on the MCP server's agent loop
335
+
336
+ ```python
337
+ mcp = get_mcp_client("http://localhost:8000") # Get an MCP client with a default log handler that prints to STDOUT
338
+ agent = get_agent(toolsets=[mcp])
339
+ async with agent:
340
+ # The messages from the MCP server will be printed to the STDOUT
341
+ ret, nodes = await run_agent(agent, prompt="...")
342
+ ```
343
+
344
+ #### MCP Server Logging
303
345
 
304
- This is an MCP server that can log MCP requests and responses.
346
+ Create MCP servers with built-in logging capabilities.
305
347
 
306
348
  ```python
307
349
  from aixtools.mcp.fast_mcp_log import FastMcpLog
@@ -371,6 +413,77 @@ By default, the "FaultyMCP" includes several tools you can use in your tests:
371
413
  - `freeze_server(seconds)` - Simulates server freeze
372
414
  - `throw_404_exception()` - Throws HTTP 404 error
373
415
 
416
+ ### Evals
417
+
418
+ Run comprehensive Agent/LLM evaluations using the built-in evaluation discovery based on Pydantic-AI framework.
419
+
420
+ ```bash
421
+ # Run all evaluations
422
+ evals
423
+
424
+ # Run evaluations with filtering
425
+ evals --filter "specific_test"
426
+
427
+ # Run with verbose output and detailed reporting
428
+ evals --verbose --include-input --include-output --include-reasons
429
+
430
+ # Specify custom evaluations directory
431
+ evals --evals-dir /path/to/evals
432
+
433
+ # Set minimum assertions threshold
434
+ evals --min-assertions 0.8
435
+ ```
436
+
437
+ **Command Line Options:**
438
+ - `--evals-dir` - Directory containing eval_*.py files (default: evals)
439
+ - `--filter` - Filter to run only matching evaluations
440
+ - `--include-input` - Include input in report output
441
+ - `--include-output` - Include output in report output
442
+ - `--include-evaluator-failures` - Include evaluator failures in report
443
+ - `--include-reasons` - Include reasons in report output
444
+ - `--min-assertions` - Minimum assertions average required for success (default: 1.0)
445
+ - `--verbose` - Print detailed information about discovery and processing
446
+
447
+ The evaluation system discovers and runs all Dataset objects from eval_*.py files in the specified directory, similar to test runners but specifically designed for LLM evaluations using pydantic_evals.
448
+
449
+ **Discovery Mechanism:**
450
+
451
+ The evaluation framework uses an automatic discovery system that:
452
+
453
+ 1. **File Discovery**: Scans the specified directory for files matching the pattern `eval_*.py`
454
+ 2. **Dataset Discovery**: Within each file, looks for variables named `dataset_*` that are instances of `pydantic_evals.Dataset`
455
+ 3. **Target Function Discovery**: Automatically finds the first async function in each module that doesn't start with an underscore (`_`) to use as the evaluation target
456
+ 4. **Filtering**: Supports filtering by module name, file name, dataset name, or fully qualified name
457
+
458
+ **Example Evaluation File Structure:**
459
+ ```python
460
+ # eval_math_operations.py
461
+ from pydantic_evals import Dataset, Case
462
+
463
+ # This dataset will be discovered automatically
464
+ dataset_addition = Dataset(
465
+ name="Addition Tests",
466
+ cases=[
467
+ Case(input="What is 2 + 2?", expected="4"),
468
+ Case(input="What is 10 + 5?", expected="15"),
469
+ ],
470
+ evaluators=[...]
471
+ )
472
+
473
+ # This function will be used as the evaluation target
474
+ async def evaluate_math_agent(input_text: str) -> str:
475
+ # Your agent evaluation logic here
476
+ agent = get_agent(system_prompt="You are a math assistant.")
477
+ result, _ = await run_agent(agent, input_text)
478
+ return result
479
+ ```
480
+
481
+ The discovery system will:
482
+ - Find `eval_math_operations.py` in the evals directory
483
+ - Discover `dataset_addition` as an evaluation dataset
484
+ - Use `evaluate_math_agent` as the target function for evaluation
485
+ - Run each case through the target function and evaluate results
486
+
374
487
  ## Testing & Tools
375
488
 
376
489
  AIXtools provides comprehensive testing utilities and diagnostic tools for AI agent development and debugging.
@@ -397,7 +510,49 @@ test_model = AixTestModel()
397
510
 
398
511
  ### Tool Doctor System
399
512
 
400
- Automated tool analysis and recommendation system for optimizing agent tool usage.
513
+ Automated tool analysis and recommendation system for optimizing agent tool usage and analyzing MCP servers.
514
+
515
+ #### MCP Tool Doctor
516
+
517
+ Analyze tools from MCP (Model Context Protocol) servers and receive AI-powered recommendations for improvement.
518
+
519
+ ```python
520
+ from aixtools.tools.doctor.mcp_tool_doctor import tool_doctor_mcp
521
+ from pydantic_ai.mcp import MCPServerStreamableHTTP, MCPServerStdio
522
+
523
+ # Analyze HTTP MCP server
524
+ recommendations = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
525
+ for rec in recommendations:
526
+ print(rec)
527
+
528
+ # Analyze STDIO MCP server
529
+ server = MCPServerStdio(command='fastmcp', args=['run', 'my_server.py'])
530
+ recommendations = await tool_doctor_mcp(mcp_server=server, verbose=True)
531
+ ```
532
+
533
+ **Command Line Usage:**
534
+
535
+ ```bash
536
+ # Analyze HTTP MCP server (default)
537
+ tool_doctor_mcp
538
+
539
+ # Analyze specific HTTP MCP server
540
+ tool_doctor_mcp --mcp-url http://localhost:9000/mcp --verbose
541
+
542
+ # Analyze STDIO MCP server
543
+ tool_doctor_mcp --stdio-command fastmcp --stdio-args run my_server.py --debug
544
+
545
+ # Available options:
546
+ # --mcp-url URL URL of HTTP MCP server (default: http://127.0.0.1:8000/mcp)
547
+ # --stdio-command CMD Command to run STDIO MCP server
548
+ # --stdio-args ARGS Arguments for STDIO MCP server command
549
+ # --verbose Enable verbose output
550
+ # --debug Enable debug output
551
+ ```
552
+
553
+ #### Traditional Tool Doctor
554
+
555
+ Analyze tool usage patterns from agent logs and get optimization recommendations.
401
556
 
402
557
  ```python
403
558
  from aixtools.tools.doctor.tool_doctor import ToolDoctor
@@ -14,20 +14,16 @@ A2A
14
14
  - Google SDK Integration for A2A - `aixtools/a2a/google_sdk/`
15
15
  - PydanticAI Adapter for Google SDK - `aixtools/a2a/google_sdk/pydantic_ai_adapter/`
16
16
 
17
- Databases
18
- - Database Integration - `aixtools/db/`
19
- - Vector Database Support - `aixtools/db/vector_db.py`
20
-
21
17
  Logging & Debugging
22
- - Log Viewing Application - `aixtools/log_view/`
18
+ - Log Viewing Web Application - `log_view`
23
19
  - Object Logging System - `aixtools/logging/`
24
20
  - Model Patch Logging - `aixtools/logging/model_patch_logging.py`
25
21
  - Log Filtering System - `aixtools/logfilters/`
26
22
  - FastMCP Logging - `aixtools/mcp/fast_mcp_log.py`
27
- - Command Line Interface for Log Viewing - Entry point: `log_view`
28
23
  - MCP (Model Context Protocol) Support - `aixtools/logging/mcp_log_models.py`, `aixtools/logging/mcp_logger.py`
29
24
 
30
- Testing & Tools
25
+ Testing Tools & Evals
26
+ - Evaluations - `aixtools/evals/` - Entry point: `evals`
31
27
  - Testing Utilities - `aixtools/testing/`
32
28
  - Mock Tool System - `aixtools/testing/mock_tool.py`
33
29
  - Model Patch Caching - `aixtools/testing/model_patch_cache.py`
@@ -35,6 +31,10 @@ Testing & Tools
35
31
  - Tool Recommendation Engine - `aixtools/tools/doctor/tool_recommendation.py`
36
32
  - FaultyMCP - `aixtools/mcp/faulty_mcp.py`
37
33
 
34
+ Databases
35
+ - Database Integration - `aixtools/db/`
36
+ - Vector Database Support - `aixtools/db/vector_db.py`
37
+
38
38
  Chainlit & HTTP Server
39
39
  - Chainlit Integration - `aixtools/app.py`, `aixtools/chainlit.md`
40
40
  - Chainlit Utilities - `aixtools/utils/chainlit/`
@@ -50,28 +50,11 @@ Programming utils
50
50
 
51
51
  ## Installation
52
52
 
53
- ### From GitHub
54
-
55
53
  ```bash
56
54
  uv add aixtools
57
55
  ```
58
56
 
59
- ### Development Setup
60
-
61
- ```bash
62
- # Create a new project
63
- uv init MyNewProject
64
- cd MyNewProject
65
-
66
- # Add virtual environment and activate it
67
- uv venv .venv
68
- source .venv/bin/activate
69
-
70
- # Add this package
71
- uv add aixtools
72
- ```
73
-
74
- ### Updating
57
+ **Updating**
75
58
 
76
59
  ```bash
77
60
  uv add --upgrade aixtools
@@ -83,6 +66,7 @@ AIXtools requires environment variables for model providers.
83
66
 
84
67
  **IMPORTANT:** Create a `.env` file based on [`.env_template`](./.env_template):
85
68
 
69
+ Here is an example configuration:
86
70
  ```bash
87
71
  # Model family (azure, openai, or ollama)
88
72
  MODEL_FAMILY=azure
@@ -130,6 +114,33 @@ agent = get_agent(system_prompt="You are a helpful assistant.")
130
114
  result, nodes = await run_agent(agent, "Tell me about AI")
131
115
  ```
132
116
 
117
+ ### Node Debugging and Visualization
118
+
119
+ The [`print_nodes`](aixtools/agents/print_nodes.py) module provides a clean, indented output for easy reading of the node from agent execution.
120
+
121
+ ```python
122
+ from aixtools.agents.print_nodes import print_nodes, print_node
123
+ from aixtools.agents.agent import get_agent, run_agent
124
+
125
+ agent = get_agent(system_prompt="You are a helpful assistant.")
126
+ result, nodes = await run_agent(agent, "Explain quantum computing")
127
+ # Print all execution nodes for debugging
128
+ print_nodes(nodes)
129
+ ```
130
+
131
+ **Features:**
132
+ - **Node Type Detection**: Automatically handles different node types (`UserPromptNode`, `CallToolsNode`, `ModelRequestNode`, `End`)
133
+ - **Formatted Output**: Provides clean, indented output for easy reading
134
+ - **Tool Call Visualization**: Shows tool names and arguments for tool calls
135
+ - **Text Content Display**: Formats text parts with proper indentation
136
+ - **Model Request Summary**: Shows character count for model requests to avoid verbose output
137
+
138
+ **Node Types Supported:**
139
+ - `UserPromptNode` - Displays user prompts with indentation
140
+ - `CallToolsNode` - Shows tool calls with names and arguments
141
+ - `ModelRequestNode` - Summarizes model requests with character count
142
+ - `End` - Marks the end of execution (output suppressed by default)
143
+
133
144
  ### Agent Batch Processing
134
145
 
135
146
  Process multiple agent queries simultaneously with built-in concurrency control and result aggregation.
@@ -268,9 +279,39 @@ with ObjectLogger() as logger:
268
279
  logger.log(agent_response)
269
280
  ```
270
281
 
271
- ### MCP Logger
282
+ ### MCP logging
283
+
284
+ AIXtools provides MCP support for both client and server implementations with easier logging for debugging pourposes.
285
+
286
+ **Example:**
287
+
288
+ Let's assume we have an MCP server that runs an agent tool.
289
+
290
+ Note that the `ctx: Context` parameter is passed to `run_agent`, this will enable logging to the MCP client.
291
+
292
+ ```python
293
+ @mcp.tool
294
+ async def my_tool_with_agent(query: str, ctx: Context) -> str:
295
+ """ A tool that uses an gents to process the query """
296
+ agent = get_agent()
297
+ async with get_qb_agent() as agent:
298
+ ret, nodes = await run_agent(agent=agent, prompt=query, ctx=ctx) # Enable MCP logging
299
+ return str(ret)
300
+ ```
301
+
302
+ On the client side, you can create an agent connected to the MCP server, the "nodes" from the MCP server will show on the STDOUT so you can see what's going on the MCP server's agent loop
303
+
304
+ ```python
305
+ mcp = get_mcp_client("http://localhost:8000") # Get an MCP client with a default log handler that prints to STDOUT
306
+ agent = get_agent(toolsets=[mcp])
307
+ async with agent:
308
+ # The messages from the MCP server will be printed to the STDOUT
309
+ ret, nodes = await run_agent(agent, prompt="...")
310
+ ```
311
+
312
+ #### MCP Server Logging
272
313
 
273
- This is an MCP server that can log MCP requests and responses.
314
+ Create MCP servers with built-in logging capabilities.
274
315
 
275
316
  ```python
276
317
  from aixtools.mcp.fast_mcp_log import FastMcpLog
@@ -340,6 +381,77 @@ By default, the "FaultyMCP" includes several tools you can use in your tests:
340
381
  - `freeze_server(seconds)` - Simulates server freeze
341
382
  - `throw_404_exception()` - Throws HTTP 404 error
342
383
 
384
+ ### Evals
385
+
386
+ Run comprehensive Agent/LLM evaluations using the built-in evaluation discovery based on Pydantic-AI framework.
387
+
388
+ ```bash
389
+ # Run all evaluations
390
+ evals
391
+
392
+ # Run evaluations with filtering
393
+ evals --filter "specific_test"
394
+
395
+ # Run with verbose output and detailed reporting
396
+ evals --verbose --include-input --include-output --include-reasons
397
+
398
+ # Specify custom evaluations directory
399
+ evals --evals-dir /path/to/evals
400
+
401
+ # Set minimum assertions threshold
402
+ evals --min-assertions 0.8
403
+ ```
404
+
405
+ **Command Line Options:**
406
+ - `--evals-dir` - Directory containing eval_*.py files (default: evals)
407
+ - `--filter` - Filter to run only matching evaluations
408
+ - `--include-input` - Include input in report output
409
+ - `--include-output` - Include output in report output
410
+ - `--include-evaluator-failures` - Include evaluator failures in report
411
+ - `--include-reasons` - Include reasons in report output
412
+ - `--min-assertions` - Minimum assertions average required for success (default: 1.0)
413
+ - `--verbose` - Print detailed information about discovery and processing
414
+
415
+ The evaluation system discovers and runs all Dataset objects from eval_*.py files in the specified directory, similar to test runners but specifically designed for LLM evaluations using pydantic_evals.
416
+
417
+ **Discovery Mechanism:**
418
+
419
+ The evaluation framework uses an automatic discovery system that:
420
+
421
+ 1. **File Discovery**: Scans the specified directory for files matching the pattern `eval_*.py`
422
+ 2. **Dataset Discovery**: Within each file, looks for variables named `dataset_*` that are instances of `pydantic_evals.Dataset`
423
+ 3. **Target Function Discovery**: Automatically finds the first async function in each module that doesn't start with an underscore (`_`) to use as the evaluation target
424
+ 4. **Filtering**: Supports filtering by module name, file name, dataset name, or fully qualified name
425
+
426
+ **Example Evaluation File Structure:**
427
+ ```python
428
+ # eval_math_operations.py
429
+ from pydantic_evals import Dataset, Case
430
+
431
+ # This dataset will be discovered automatically
432
+ dataset_addition = Dataset(
433
+ name="Addition Tests",
434
+ cases=[
435
+ Case(input="What is 2 + 2?", expected="4"),
436
+ Case(input="What is 10 + 5?", expected="15"),
437
+ ],
438
+ evaluators=[...]
439
+ )
440
+
441
+ # This function will be used as the evaluation target
442
+ async def evaluate_math_agent(input_text: str) -> str:
443
+ # Your agent evaluation logic here
444
+ agent = get_agent(system_prompt="You are a math assistant.")
445
+ result, _ = await run_agent(agent, input_text)
446
+ return result
447
+ ```
448
+
449
+ The discovery system will:
450
+ - Find `eval_math_operations.py` in the evals directory
451
+ - Discover `dataset_addition` as an evaluation dataset
452
+ - Use `evaluate_math_agent` as the target function for evaluation
453
+ - Run each case through the target function and evaluate results
454
+
343
455
  ## Testing & Tools
344
456
 
345
457
  AIXtools provides comprehensive testing utilities and diagnostic tools for AI agent development and debugging.
@@ -366,7 +478,49 @@ test_model = AixTestModel()
366
478
 
367
479
  ### Tool Doctor System
368
480
 
369
- Automated tool analysis and recommendation system for optimizing agent tool usage.
481
+ Automated tool analysis and recommendation system for optimizing agent tool usage and analyzing MCP servers.
482
+
483
+ #### MCP Tool Doctor
484
+
485
+ Analyze tools from MCP (Model Context Protocol) servers and receive AI-powered recommendations for improvement.
486
+
487
+ ```python
488
+ from aixtools.tools.doctor.mcp_tool_doctor import tool_doctor_mcp
489
+ from pydantic_ai.mcp import MCPServerStreamableHTTP, MCPServerStdio
490
+
491
+ # Analyze HTTP MCP server
492
+ recommendations = await tool_doctor_mcp(mcp_url='http://127.0.0.1:8000/mcp')
493
+ for rec in recommendations:
494
+ print(rec)
495
+
496
+ # Analyze STDIO MCP server
497
+ server = MCPServerStdio(command='fastmcp', args=['run', 'my_server.py'])
498
+ recommendations = await tool_doctor_mcp(mcp_server=server, verbose=True)
499
+ ```
500
+
501
+ **Command Line Usage:**
502
+
503
+ ```bash
504
+ # Analyze HTTP MCP server (default)
505
+ tool_doctor_mcp
506
+
507
+ # Analyze specific HTTP MCP server
508
+ tool_doctor_mcp --mcp-url http://localhost:9000/mcp --verbose
509
+
510
+ # Analyze STDIO MCP server
511
+ tool_doctor_mcp --stdio-command fastmcp --stdio-args run my_server.py --debug
512
+
513
+ # Available options:
514
+ # --mcp-url URL URL of HTTP MCP server (default: http://127.0.0.1:8000/mcp)
515
+ # --stdio-command CMD Command to run STDIO MCP server
516
+ # --stdio-args ARGS Arguments for STDIO MCP server command
517
+ # --verbose Enable verbose output
518
+ # --debug Enable debug output
519
+ ```
520
+
521
+ #### Traditional Tool Doctor
522
+
523
+ Analyze tool usage patterns from agent logs and get optimization recommendations.
370
524
 
371
525
  ```python
372
526
  from aixtools.tools.doctor.tool_doctor import ToolDoctor
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.11'
32
- __version_tuple__ = version_tuple = (0, 1, 11)
31
+ __version__ = version = '0.2.0'
32
+ __version_tuple__ = version_tuple = (0, 2, 0)
33
33
 
34
- __commit_id__ = commit_id = 'g32157e2f8'
34
+ __commit_id__ = commit_id = 'g9a5eeee69'
@@ -5,10 +5,11 @@ Core agent implementation providing model selection and configuration for AI age
5
5
  from types import NoneType
6
6
  from typing import Any
7
7
 
8
+ from fastmcp import Context
8
9
  from openai import AsyncAzureOpenAI
9
10
  from pydantic_ai import Agent
10
11
  from pydantic_ai.models.bedrock import BedrockConverseModel
11
- from pydantic_ai.models.openai import OpenAIModel
12
+ from pydantic_ai.models.openai import OpenAIChatModel
12
13
  from pydantic_ai.providers.bedrock import BedrockProvider
13
14
  from pydantic_ai.providers.openai import OpenAIProvider
14
15
  from pydantic_ai.settings import ModelSettings
@@ -54,14 +55,14 @@ def _get_model_ollama(model_name=OLLAMA_MODEL_NAME, ollama_url=OLLAMA_URL):
54
55
  assert ollama_url, "OLLAMA_URL is not set"
55
56
  assert model_name, "Model name is not set"
56
57
  provider = OpenAIProvider(base_url=ollama_url)
57
- return OpenAIModel(model_name=model_name, provider=provider)
58
+ return OpenAIChatModel(model_name=model_name, provider=provider)
58
59
 
59
60
 
60
61
  def _get_model_openai(model_name=OPENAI_MODEL_NAME, openai_api_key=OPENAI_API_KEY):
61
62
  assert openai_api_key, "OPENAI_API_KEY is not set"
62
63
  assert model_name, "Model name is not set"
63
64
  provider = OpenAIProvider(api_key=openai_api_key)
64
- return OpenAIModel(model_name=model_name, provider=provider)
65
+ return OpenAIChatModel(model_name=model_name, provider=provider)
65
66
 
66
67
 
67
68
  def _get_model_openai_azure(
@@ -77,7 +78,7 @@ def _get_model_openai_azure(
77
78
  client = AsyncAzureOpenAI(
78
79
  azure_endpoint=azure_openai_endpoint, api_version=azure_openai_api_version, api_key=azure_openai_api_key
79
80
  )
80
- return OpenAIModel(model_name=model_name, provider=OpenAIProvider(openai_client=client))
81
+ return OpenAIChatModel(model_name=model_name, provider=OpenAIProvider(openai_client=client))
81
82
 
82
83
 
83
84
  def _get_model_open_router(
@@ -87,7 +88,7 @@ def _get_model_open_router(
87
88
  assert openrouter_api_key, "OPENROUTER_API_KEY is not set"
88
89
  assert model_name, "Model name is not set, missing 'OPENROUTER_MODEL_NAME' environment variable?"
89
90
  provider = OpenAIProvider(base_url=openrouter_api_url, api_key=openrouter_api_key)
90
- return OpenAIModel(model_name, provider=provider)
91
+ return OpenAIChatModel(model_name, provider=provider)
91
92
 
92
93
 
93
94
  def get_model(model_family=MODEL_FAMILY, model_name=None, **kwargs):
@@ -146,8 +147,22 @@ async def run_agent( # noqa: PLR0913, pylint: disable=too-many-arguments,too-ma
146
147
  debug: bool = False,
147
148
  log_model_requests: bool = False,
148
149
  parent_logger: ObjectLogger | None = None,
150
+ ctx: Context | None = None,
149
151
  ):
150
- """Query the LLM"""
152
+ """
153
+ Run the agent with the given prompt and log the execution details.
154
+ Args:
155
+ agent (Agent): The PydanticAI agent to run.
156
+ prompt (str | list[str]): The input prompt(s) for the agent.
157
+ usage_limits (UsageLimits | None): Optional usage limits for the agent.
158
+ verbose (bool): If True, enables verbose logging.
159
+ debug (bool): If True, enables debug logging.
160
+ log_model_requests (bool): If True, logs model requests and responses.
161
+ parent_logger (ObjectLogger | None): Optional parent logger for hierarchical logging.
162
+ ctx (Context | None): Optional FastMCP context for logging messages to the MCP client.
163
+ Returns:
164
+ tuple[final_output, nodes]: A tuple containing the agent's final output and a list of all logged nodes.
165
+ """
151
166
  # Results
152
167
  nodes, result = [], None
153
168
  async with agent.iter(prompt, usage_limits=usage_limits) as agent_run:
@@ -158,7 +173,11 @@ async def run_agent( # noqa: PLR0913, pylint: disable=too-many-arguments,too-ma
158
173
  agent.model = model_patch_logging(agent.model, agent_logger)
159
174
  # Run the agent
160
175
  async for node in agent_run:
161
- agent_logger.log(node)
176
+ await agent_logger.log(node) # Log each node
177
+ if ctx:
178
+ # If we are executing in an MCP server, send info messages to the client for better debugging
179
+ server_name = ctx.fastmcp.name
180
+ await ctx.info(f"MCP server {server_name}: {node}")
162
181
  nodes.append(node)
163
182
  result = agent_run.result
164
183
  return result.output if result else None, nodes