jarviscore-framework 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/autoagent_distributed_example.py +211 -0
- examples/custom_profile_decorator.py +134 -0
- examples/custom_profile_wrap.py +168 -0
- examples/customagent_distributed_example.py +362 -0
- examples/customagent_p2p_example.py +347 -0
- jarviscore/__init__.py +49 -36
- jarviscore/adapter/__init__.py +15 -9
- jarviscore/adapter/decorator.py +23 -19
- jarviscore/adapter/wrapper.py +303 -0
- jarviscore/cli/scaffold.py +1 -1
- jarviscore/cli/smoketest.py +3 -2
- jarviscore/core/agent.py +44 -1
- jarviscore/core/mesh.py +196 -35
- jarviscore/data/examples/autoagent_distributed_example.py +211 -0
- jarviscore/data/examples/customagent_distributed_example.py +362 -0
- jarviscore/data/examples/customagent_p2p_example.py +347 -0
- jarviscore/docs/API_REFERENCE.md +264 -51
- jarviscore/docs/AUTOAGENT_GUIDE.md +198 -0
- jarviscore/docs/CONFIGURATION.md +35 -21
- jarviscore/docs/CUSTOMAGENT_GUIDE.md +415 -0
- jarviscore/docs/GETTING_STARTED.md +106 -13
- jarviscore/docs/TROUBLESHOOTING.md +144 -6
- jarviscore/docs/USER_GUIDE.md +138 -361
- jarviscore/orchestration/engine.py +20 -8
- jarviscore/p2p/__init__.py +10 -0
- jarviscore/p2p/coordinator.py +129 -0
- jarviscore/p2p/messages.py +87 -0
- jarviscore/p2p/peer_client.py +576 -0
- jarviscore/p2p/peer_tool.py +268 -0
- {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/METADATA +60 -54
- jarviscore_framework-0.2.0.dist-info/RECORD +132 -0
- {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/WHEEL +1 -1
- {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/top_level.txt +1 -0
- test_logs/code_registry/functions/data_generator-558779ed_560ebc37.py +7 -0
- test_logs/code_registry/functions/data_generator-5ed3609e_560ebc37.py +7 -0
- test_logs/code_registry/functions/data_generator-66da0356_43970bb9.py +25 -0
- test_logs/code_registry/functions/data_generator-7a2fac83_583709d9.py +36 -0
- test_logs/code_registry/functions/data_generator-888b670f_aa235863.py +9 -0
- test_logs/code_registry/functions/data_generator-9ca5f642_aa235863.py +9 -0
- test_logs/code_registry/functions/data_generator-bfd90775_560ebc37.py +7 -0
- test_logs/code_registry/functions/data_generator-e95d2f7d_aa235863.py +9 -0
- test_logs/code_registry/functions/data_generator-f60ca8a2_327eb8c2.py +29 -0
- test_logs/code_registry/functions/mathematician-02adf9ee_958658d9.py +19 -0
- test_logs/code_registry/functions/mathematician-0706fb57_5df13441.py +23 -0
- test_logs/code_registry/functions/mathematician-153c9c4a_ba59c918.py +83 -0
- test_logs/code_registry/functions/mathematician-287e61c0_41daa793.py +18 -0
- test_logs/code_registry/functions/mathematician-2967af5a_863c2cc6.py +17 -0
- test_logs/code_registry/functions/mathematician-303ca6d6_5df13441.py +23 -0
- test_logs/code_registry/functions/mathematician-308a4afd_cbf5064d.py +73 -0
- test_logs/code_registry/functions/mathematician-353f16e2_0968bcf5.py +18 -0
- test_logs/code_registry/functions/mathematician-3c22475a_41daa793.py +17 -0
- test_logs/code_registry/functions/mathematician-5bac1029_0968bcf5.py +18 -0
- test_logs/code_registry/functions/mathematician-640f76b2_9198780b.py +19 -0
- test_logs/code_registry/functions/mathematician-752fa7ea_863c2cc6.py +17 -0
- test_logs/code_registry/functions/mathematician-baf9ef39_0968bcf5.py +18 -0
- test_logs/code_registry/functions/mathematician-bc8b2a2f_5df13441.py +23 -0
- test_logs/code_registry/functions/mathematician-c31e4686_41daa793.py +18 -0
- test_logs/code_registry/functions/mathematician-cc84c84c_863c2cc6.py +17 -0
- test_logs/code_registry/functions/mathematician-dd7c7144_9198780b.py +19 -0
- test_logs/code_registry/functions/mathematician-e671c256_41ea4487.py +74 -0
- test_logs/code_registry/functions/report_generator-1a878fcc_18d44bdc.py +47 -0
- test_logs/code_registry/functions/report_generator-25c1c331_cea57d0d.py +35 -0
- test_logs/code_registry/functions/report_generator-37552117_e711c2b9.py +35 -0
- test_logs/code_registry/functions/report_generator-bc662768_e711c2b9.py +35 -0
- test_logs/code_registry/functions/report_generator-d6c0e76b_5e7722ec.py +44 -0
- test_logs/code_registry/functions/report_generator-f270fb02_680529c3.py +44 -0
- test_logs/code_registry/functions/text_processor-11393b14_4370d3ed.py +40 -0
- test_logs/code_registry/functions/text_processor-7d02dfc3_d3b569be.py +37 -0
- test_logs/code_registry/functions/text_processor-8adb5e32_9168c5fe.py +13 -0
- test_logs/code_registry/functions/text_processor-c58ffc19_78b4ceac.py +42 -0
- test_logs/code_registry/functions/text_processor-cd5977b1_9168c5fe.py +13 -0
- test_logs/code_registry/functions/text_processor-ec1c8773_9168c5fe.py +13 -0
- tests/test_01_analyst_standalone.py +124 -0
- tests/test_02_assistant_standalone.py +164 -0
- tests/test_03_analyst_with_framework.py +945 -0
- tests/test_04_assistant_with_framework.py +1002 -0
- tests/test_05_integration.py +1301 -0
- tests/test_06_real_llm_integration.py +760 -0
- tests/test_07_distributed_single_node.py +578 -0
- tests/test_08_distributed_multi_node.py +454 -0
- tests/test_09_distributed_autoagent.py +509 -0
- tests/test_10_distributed_customagent.py +787 -0
- tests/test_mesh.py +35 -4
- jarviscore_framework-0.1.1.dist-info/RECORD +0 -69
- {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test 6: Real LLM Integration Test
|
|
3
|
+
|
|
4
|
+
This test uses ACTUAL LLM API calls (not mocks) to verify that:
|
|
5
|
+
1. The LLM correctly sees peer tools in the tool list
|
|
6
|
+
2. The LLM decides to use ask_peer when appropriate
|
|
7
|
+
3. The tool execution works end-to-end
|
|
8
|
+
4. The response flows back correctly
|
|
9
|
+
|
|
10
|
+
IMPORTANT: This test makes real API calls and costs money.
|
|
11
|
+
Run with: pytest tests/test_06_real_llm_integration.py -v -s
|
|
12
|
+
|
|
13
|
+
Prerequisites:
|
|
14
|
+
- .env file with CLAUDE_API_KEY (or other provider keys)
|
|
15
|
+
- Network connectivity
|
|
16
|
+
"""
|
|
17
|
+
import asyncio
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
import pytest
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
sys.path.insert(0, '.')
|
|
24
|
+
|
|
25
|
+
from jarviscore.core.agent import Agent
|
|
26
|
+
from jarviscore.core.mesh import Mesh
|
|
27
|
+
from jarviscore.p2p.peer_client import PeerClient
|
|
28
|
+
|
|
29
|
+
# Setup logging to see what's happening
|
|
30
|
+
logging.basicConfig(level=logging.INFO)
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Skip all tests if no API key is configured
|
|
34
|
+
try:
|
|
35
|
+
from jarviscore.config import settings
|
|
36
|
+
HAS_API_KEY = bool(
|
|
37
|
+
settings.claude_api_key or
|
|
38
|
+
settings.azure_api_key or
|
|
39
|
+
settings.gemini_api_key
|
|
40
|
+
)
|
|
41
|
+
except Exception:
|
|
42
|
+
HAS_API_KEY = False
|
|
43
|
+
|
|
44
|
+
pytestmark = pytest.mark.skipif(
|
|
45
|
+
not HAS_API_KEY,
|
|
46
|
+
reason="No LLM API key configured in .env"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
51
|
+
# REAL LLM CLIENT WITH TOOL SUPPORT
|
|
52
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
53
|
+
|
|
54
|
+
class RealLLMClient:
|
|
55
|
+
"""
|
|
56
|
+
Real LLM client with native tool calling support.
|
|
57
|
+
|
|
58
|
+
Uses Anthropic Claude API directly for proper tool_use handling.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self):
|
|
62
|
+
from anthropic import Anthropic
|
|
63
|
+
from jarviscore.config import settings
|
|
64
|
+
|
|
65
|
+
# Get API key and endpoint
|
|
66
|
+
api_key = settings.claude_api_key
|
|
67
|
+
endpoint = settings.claude_endpoint
|
|
68
|
+
|
|
69
|
+
if not api_key:
|
|
70
|
+
raise RuntimeError("No Claude API key found in settings")
|
|
71
|
+
|
|
72
|
+
# Initialize client
|
|
73
|
+
if endpoint:
|
|
74
|
+
self.client = Anthropic(api_key=api_key, base_url=endpoint)
|
|
75
|
+
else:
|
|
76
|
+
self.client = Anthropic(api_key=api_key)
|
|
77
|
+
|
|
78
|
+
self.model = settings.claude_model or "claude-sonnet-4-20250514"
|
|
79
|
+
logger.info(f"RealLLMClient initialized with model: {self.model}")
|
|
80
|
+
|
|
81
|
+
def chat_with_tools(
|
|
82
|
+
self,
|
|
83
|
+
messages: list,
|
|
84
|
+
tools: list,
|
|
85
|
+
system: str = None,
|
|
86
|
+
max_tokens: int = 1024
|
|
87
|
+
) -> dict:
|
|
88
|
+
"""
|
|
89
|
+
Send chat with tools and get response.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
messages: List of message dicts [{"role": "user", "content": "..."}]
|
|
93
|
+
tools: List of tool definitions in Anthropic format
|
|
94
|
+
system: Optional system prompt
|
|
95
|
+
max_tokens: Max tokens to generate
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
{
|
|
99
|
+
"type": "text" | "tool_use",
|
|
100
|
+
"content": str, # if text
|
|
101
|
+
"tool_name": str, # if tool_use
|
|
102
|
+
"tool_args": dict, # if tool_use
|
|
103
|
+
"tool_use_id": str # if tool_use
|
|
104
|
+
}
|
|
105
|
+
"""
|
|
106
|
+
# Build request
|
|
107
|
+
request_kwargs = {
|
|
108
|
+
"model": self.model,
|
|
109
|
+
"max_tokens": max_tokens,
|
|
110
|
+
"messages": messages,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if system:
|
|
114
|
+
request_kwargs["system"] = system
|
|
115
|
+
|
|
116
|
+
if tools:
|
|
117
|
+
request_kwargs["tools"] = tools
|
|
118
|
+
|
|
119
|
+
# Make the call
|
|
120
|
+
logger.info(f"Calling LLM with {len(messages)} messages and {len(tools)} tools")
|
|
121
|
+
response = self.client.messages.create(**request_kwargs)
|
|
122
|
+
|
|
123
|
+
# Parse response
|
|
124
|
+
result = {"stop_reason": response.stop_reason}
|
|
125
|
+
|
|
126
|
+
for block in response.content:
|
|
127
|
+
if block.type == "text":
|
|
128
|
+
result["type"] = "text"
|
|
129
|
+
result["content"] = block.text
|
|
130
|
+
elif block.type == "tool_use":
|
|
131
|
+
result["type"] = "tool_use"
|
|
132
|
+
result["tool_name"] = block.name
|
|
133
|
+
result["tool_args"] = block.input
|
|
134
|
+
result["tool_use_id"] = block.id
|
|
135
|
+
|
|
136
|
+
logger.info(f"LLM response type: {result.get('type')}")
|
|
137
|
+
return result
|
|
138
|
+
|
|
139
|
+
def continue_with_tool_result(
|
|
140
|
+
self,
|
|
141
|
+
messages: list,
|
|
142
|
+
tool_use_id: str,
|
|
143
|
+
tool_result: str,
|
|
144
|
+
tools: list = None,
|
|
145
|
+
system: str = None,
|
|
146
|
+
max_tokens: int = 1024
|
|
147
|
+
) -> dict:
|
|
148
|
+
"""
|
|
149
|
+
Continue conversation with tool result.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
messages: Previous messages
|
|
153
|
+
tool_use_id: The tool_use block ID
|
|
154
|
+
tool_result: Result from tool execution
|
|
155
|
+
tools: Tool definitions (for potential further calls)
|
|
156
|
+
system: System prompt
|
|
157
|
+
max_tokens: Max tokens
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Same format as chat_with_tools
|
|
161
|
+
"""
|
|
162
|
+
# Add tool result to messages
|
|
163
|
+
messages = messages + [
|
|
164
|
+
{
|
|
165
|
+
"role": "user",
|
|
166
|
+
"content": [
|
|
167
|
+
{
|
|
168
|
+
"type": "tool_result",
|
|
169
|
+
"tool_use_id": tool_use_id,
|
|
170
|
+
"content": tool_result
|
|
171
|
+
}
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
return self.chat_with_tools(messages, tools or [], system, max_tokens)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
180
|
+
# TEST AGENTS
|
|
181
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
182
|
+
|
|
183
|
+
class AnalystAgent(Agent):
|
|
184
|
+
"""Analyst agent that can analyze data - NOW WITH REAL LLM."""
|
|
185
|
+
role = "analyst"
|
|
186
|
+
capabilities = ["analysis", "data_interpretation", "reporting"]
|
|
187
|
+
|
|
188
|
+
def __init__(self, agent_id=None):
|
|
189
|
+
super().__init__(agent_id)
|
|
190
|
+
self.requests_received = []
|
|
191
|
+
self.llm = None # Will be set to use real LLM
|
|
192
|
+
|
|
193
|
+
def get_tools(self) -> list:
|
|
194
|
+
"""Return tools including peer tools."""
|
|
195
|
+
tools = [
|
|
196
|
+
{
|
|
197
|
+
"name": "statistical_analysis",
|
|
198
|
+
"description": "Run statistical analysis on numeric data",
|
|
199
|
+
"input_schema": {
|
|
200
|
+
"type": "object",
|
|
201
|
+
"properties": {
|
|
202
|
+
"data": {"type": "string", "description": "Data to analyze"}
|
|
203
|
+
},
|
|
204
|
+
"required": ["data"]
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"name": "trend_detection",
|
|
209
|
+
"description": "Detect trends and patterns in time series data",
|
|
210
|
+
"input_schema": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {
|
|
213
|
+
"data": {"type": "string", "description": "Time series data"}
|
|
214
|
+
},
|
|
215
|
+
"required": ["data"]
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
]
|
|
219
|
+
if self.peers:
|
|
220
|
+
tools.extend(self.peers.as_tool().schema)
|
|
221
|
+
return tools
|
|
222
|
+
|
|
223
|
+
async def execute_tool(self, tool_name: str, args: dict) -> str:
|
|
224
|
+
"""Execute tool."""
|
|
225
|
+
if self.peers and tool_name in self.peers.as_tool().tool_names:
|
|
226
|
+
return await self.peers.as_tool().execute(tool_name, args)
|
|
227
|
+
if tool_name == "statistical_analysis":
|
|
228
|
+
return f"Statistical analysis of '{args.get('data', '')}': mean=150.3, std=23.4, variance=547.6"
|
|
229
|
+
if tool_name == "trend_detection":
|
|
230
|
+
return f"Trend analysis: Upward trend detected with 92% confidence, growth rate 3.2% month-over-month"
|
|
231
|
+
return f"Unknown tool: {tool_name}"
|
|
232
|
+
|
|
233
|
+
async def process_with_llm(self, query: str) -> str:
|
|
234
|
+
"""Process request using real LLM."""
|
|
235
|
+
if not self.llm:
|
|
236
|
+
# Fallback to simple response if no LLM
|
|
237
|
+
return f"Analysis of '{query}': Positive trends detected with 87% confidence."
|
|
238
|
+
|
|
239
|
+
system_prompt = (
|
|
240
|
+
"You are an expert data analyst. You specialize in analyzing data, "
|
|
241
|
+
"finding patterns, and providing insights. You have tools for statistical "
|
|
242
|
+
"analysis and trend detection. Be concise but thorough in your analysis. "
|
|
243
|
+
"If you need more data, say so. Respond directly without using tools if "
|
|
244
|
+
"you can provide a good analysis from the data given."
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
tools = self.get_tools()
|
|
248
|
+
# Remove peer tools for analyst's own processing (avoid infinite loops)
|
|
249
|
+
tools = [t for t in tools if t["name"] not in ["ask_peer", "broadcast_update", "list_peers"]]
|
|
250
|
+
|
|
251
|
+
messages = [{"role": "user", "content": query}]
|
|
252
|
+
|
|
253
|
+
logger.info(f"[analyst] Processing with LLM: {query[:50]}...")
|
|
254
|
+
|
|
255
|
+
response = self.llm.chat_with_tools(messages, tools, system_prompt)
|
|
256
|
+
|
|
257
|
+
# Handle tool use - simpler approach to avoid message format issues
|
|
258
|
+
if response.get("type") == "tool_use":
|
|
259
|
+
tool_name = response["tool_name"]
|
|
260
|
+
tool_args = response["tool_args"]
|
|
261
|
+
tool_use_id = response["tool_use_id"]
|
|
262
|
+
|
|
263
|
+
logger.info(f"[analyst] Using tool: {tool_name}")
|
|
264
|
+
|
|
265
|
+
tool_result = await self.execute_tool(tool_name, tool_args)
|
|
266
|
+
|
|
267
|
+
# Add assistant tool use message
|
|
268
|
+
messages.append({
|
|
269
|
+
"role": "assistant",
|
|
270
|
+
"content": [{"type": "tool_use", "id": tool_use_id, "name": tool_name, "input": tool_args}]
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
# Add tool result message
|
|
274
|
+
messages.append({
|
|
275
|
+
"role": "user",
|
|
276
|
+
"content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": tool_result}]
|
|
277
|
+
})
|
|
278
|
+
|
|
279
|
+
# Get final response (don't use more tools to keep it simple)
|
|
280
|
+
response = self.llm.chat_with_tools(messages, [], system_prompt)
|
|
281
|
+
|
|
282
|
+
return response.get("content", "Analysis complete.")
|
|
283
|
+
|
|
284
|
+
async def run(self):
|
|
285
|
+
"""Listen for incoming requests."""
|
|
286
|
+
logger.info(f"[{self.role}] Starting run loop")
|
|
287
|
+
while not self.shutdown_requested:
|
|
288
|
+
msg = await self.peers.receive(timeout=0.5)
|
|
289
|
+
if msg is None:
|
|
290
|
+
continue
|
|
291
|
+
if msg.is_request:
|
|
292
|
+
query = msg.data.get("query", "")
|
|
293
|
+
logger.info(f"[{self.role}] ===== RECEIVED REQUEST =====")
|
|
294
|
+
logger.info(f"[{self.role}] From: {msg.data.get('from', 'unknown')}")
|
|
295
|
+
logger.info(f"[{self.role}] Query: {query}")
|
|
296
|
+
|
|
297
|
+
self.requests_received.append(query)
|
|
298
|
+
|
|
299
|
+
# Process with LLM
|
|
300
|
+
result = await self.process_with_llm(query)
|
|
301
|
+
|
|
302
|
+
logger.info(f"[{self.role}] ===== SENDING RESPONSE =====")
|
|
303
|
+
logger.info(f"[{self.role}] Response: {result[:100]}...")
|
|
304
|
+
|
|
305
|
+
await self.peers.respond(msg, {"response": result})
|
|
306
|
+
|
|
307
|
+
async def execute_task(self, task):
|
|
308
|
+
return {}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class AssistantAgent(Agent):
|
|
312
|
+
"""Assistant agent that coordinates with other agents."""
|
|
313
|
+
role = "assistant"
|
|
314
|
+
capabilities = ["chat", "coordination", "search"]
|
|
315
|
+
|
|
316
|
+
def __init__(self, agent_id=None):
|
|
317
|
+
super().__init__(agent_id)
|
|
318
|
+
self.llm = None # Will be set in test
|
|
319
|
+
self.tool_calls = []
|
|
320
|
+
|
|
321
|
+
def get_tools(self) -> list:
|
|
322
|
+
"""Return tools including peer tools."""
|
|
323
|
+
tools = [
|
|
324
|
+
{
|
|
325
|
+
"name": "web_search",
|
|
326
|
+
"description": "Search the web for information",
|
|
327
|
+
"input_schema": {
|
|
328
|
+
"type": "object",
|
|
329
|
+
"properties": {
|
|
330
|
+
"query": {"type": "string", "description": "Search query"}
|
|
331
|
+
},
|
|
332
|
+
"required": ["query"]
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
]
|
|
336
|
+
if self.peers:
|
|
337
|
+
tools.extend(self.peers.as_tool().schema)
|
|
338
|
+
return tools
|
|
339
|
+
|
|
340
|
+
async def execute_tool(self, tool_name: str, args: dict) -> str:
|
|
341
|
+
"""Execute tool."""
|
|
342
|
+
self.tool_calls.append({"tool": tool_name, "args": args})
|
|
343
|
+
|
|
344
|
+
if self.peers and tool_name in self.peers.as_tool().tool_names:
|
|
345
|
+
return await self.peers.as_tool().execute(tool_name, args)
|
|
346
|
+
if tool_name == "web_search":
|
|
347
|
+
return f"Search results for '{args.get('query', '')}': Found 10 relevant articles."
|
|
348
|
+
return f"Unknown tool: {tool_name}"
|
|
349
|
+
|
|
350
|
+
async def chat(self, user_message: str, system_prompt: str = None) -> str:
|
|
351
|
+
"""
|
|
352
|
+
Complete LLM chat loop with real tool calling.
|
|
353
|
+
|
|
354
|
+
This is the KEY method that demonstrates real LLM tool use.
|
|
355
|
+
"""
|
|
356
|
+
if not self.llm:
|
|
357
|
+
raise RuntimeError("LLM not initialized")
|
|
358
|
+
|
|
359
|
+
# Default system prompt
|
|
360
|
+
if not system_prompt:
|
|
361
|
+
system_prompt = (
|
|
362
|
+
"You are a helpful assistant. You have access to tools including "
|
|
363
|
+
"the ability to ask other specialist agents for help. "
|
|
364
|
+
"If a user asks for data analysis, you should use the ask_peer tool "
|
|
365
|
+
"to ask the analyst for help. Be concise in your responses."
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# Get tools
|
|
369
|
+
tools = self.get_tools()
|
|
370
|
+
logger.info(f"[assistant] Tools available: {[t['name'] for t in tools]}")
|
|
371
|
+
|
|
372
|
+
# Initial message
|
|
373
|
+
messages = [{"role": "user", "content": user_message}]
|
|
374
|
+
|
|
375
|
+
# Call LLM
|
|
376
|
+
response = self.llm.chat_with_tools(messages, tools, system_prompt)
|
|
377
|
+
|
|
378
|
+
# Handle tool use loop (max 3 iterations)
|
|
379
|
+
iterations = 0
|
|
380
|
+
while response.get("type") == "tool_use" and iterations < 3:
|
|
381
|
+
iterations += 1
|
|
382
|
+
|
|
383
|
+
tool_name = response["tool_name"]
|
|
384
|
+
tool_args = response["tool_args"]
|
|
385
|
+
tool_use_id = response["tool_use_id"]
|
|
386
|
+
|
|
387
|
+
logger.info(f"[assistant] LLM decided to use tool: {tool_name}")
|
|
388
|
+
logger.info(f"[assistant] Tool args: {tool_args}")
|
|
389
|
+
|
|
390
|
+
# Execute the tool
|
|
391
|
+
tool_result = await self.execute_tool(tool_name, tool_args)
|
|
392
|
+
logger.info(f"[assistant] Tool result: {tool_result[:100]}...")
|
|
393
|
+
|
|
394
|
+
# Add assistant's tool use to messages
|
|
395
|
+
messages.append({
|
|
396
|
+
"role": "assistant",
|
|
397
|
+
"content": [
|
|
398
|
+
{
|
|
399
|
+
"type": "tool_use",
|
|
400
|
+
"id": tool_use_id,
|
|
401
|
+
"name": tool_name,
|
|
402
|
+
"input": tool_args
|
|
403
|
+
}
|
|
404
|
+
]
|
|
405
|
+
})
|
|
406
|
+
|
|
407
|
+
# Continue with tool result
|
|
408
|
+
response = self.llm.continue_with_tool_result(
|
|
409
|
+
messages, tool_use_id, tool_result, tools, system_prompt
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Return final text response
|
|
413
|
+
return response.get("content", "No response generated")
|
|
414
|
+
|
|
415
|
+
async def run(self):
|
|
416
|
+
"""Listen for incoming requests."""
|
|
417
|
+
while not self.shutdown_requested:
|
|
418
|
+
msg = await self.peers.receive(timeout=0.5)
|
|
419
|
+
if msg is None:
|
|
420
|
+
continue
|
|
421
|
+
if msg.is_request:
|
|
422
|
+
# For simplicity, just echo back
|
|
423
|
+
await self.peers.respond(msg, {"response": f"Received: {msg.data}"})
|
|
424
|
+
|
|
425
|
+
async def execute_task(self, task):
|
|
426
|
+
return {}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
430
|
+
# TESTS
|
|
431
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
432
|
+
|
|
433
|
+
class TestRealLLMIntegration:
|
|
434
|
+
"""
|
|
435
|
+
Tests that use REAL LLM API calls.
|
|
436
|
+
|
|
437
|
+
These tests verify that the entire tool-use flow works with actual LLM.
|
|
438
|
+
"""
|
|
439
|
+
|
|
440
|
+
@pytest.fixture
|
|
441
|
+
def real_mesh(self):
|
|
442
|
+
"""Create mesh with real LLM-powered agents."""
|
|
443
|
+
mesh = Mesh(mode="p2p")
|
|
444
|
+
|
|
445
|
+
analyst = mesh.add(AnalystAgent)
|
|
446
|
+
assistant = mesh.add(AssistantAgent)
|
|
447
|
+
|
|
448
|
+
# Wire up peers
|
|
449
|
+
for agent in mesh.agents:
|
|
450
|
+
agent.peers = PeerClient(
|
|
451
|
+
coordinator=None,
|
|
452
|
+
agent_id=agent.agent_id,
|
|
453
|
+
agent_role=agent.role,
|
|
454
|
+
agent_registry=mesh._agent_registry,
|
|
455
|
+
node_id="local"
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# Initialize real LLM for BOTH agents
|
|
459
|
+
assistant.llm = RealLLMClient()
|
|
460
|
+
analyst.llm = RealLLMClient()
|
|
461
|
+
|
|
462
|
+
return mesh, analyst, assistant
|
|
463
|
+
|
|
464
|
+
@pytest.mark.asyncio
|
|
465
|
+
async def test_llm_sees_peer_tools(self, real_mesh):
|
|
466
|
+
"""
|
|
467
|
+
Test 1: Verify LLM receives correct tool schemas.
|
|
468
|
+
|
|
469
|
+
This confirms the tool definitions are properly formatted.
|
|
470
|
+
"""
|
|
471
|
+
mesh, analyst, assistant = real_mesh
|
|
472
|
+
|
|
473
|
+
tools = assistant.get_tools()
|
|
474
|
+
tool_names = [t["name"] for t in tools]
|
|
475
|
+
|
|
476
|
+
print("\n" + "="*60)
|
|
477
|
+
print("TEST: LLM sees peer tools")
|
|
478
|
+
print("="*60)
|
|
479
|
+
print(f"Tools available: {tool_names}")
|
|
480
|
+
|
|
481
|
+
# Verify peer tools are present
|
|
482
|
+
assert "ask_peer" in tool_names, "ask_peer tool should be available"
|
|
483
|
+
assert "broadcast_update" in tool_names, "broadcast_update should be available"
|
|
484
|
+
assert "list_peers" in tool_names, "list_peers should be available"
|
|
485
|
+
|
|
486
|
+
# Verify ask_peer shows analyst
|
|
487
|
+
ask_peer_tool = next(t for t in tools if t["name"] == "ask_peer")
|
|
488
|
+
roles_enum = ask_peer_tool["input_schema"]["properties"]["role"]["enum"]
|
|
489
|
+
print(f"ask_peer roles enum: {roles_enum}")
|
|
490
|
+
assert "analyst" in roles_enum, "analyst should be in ask_peer roles"
|
|
491
|
+
|
|
492
|
+
print("PASSED: LLM sees correct peer tools")
|
|
493
|
+
|
|
494
|
+
@pytest.mark.asyncio
|
|
495
|
+
async def test_llm_delegates_to_analyst(self, real_mesh):
|
|
496
|
+
"""
|
|
497
|
+
Test 2: LLM decides to delegate analysis to analyst peer.
|
|
498
|
+
|
|
499
|
+
This is THE key test - proves real LLM uses ask_peer correctly.
|
|
500
|
+
"""
|
|
501
|
+
mesh, analyst, assistant = real_mesh
|
|
502
|
+
|
|
503
|
+
print("\n" + "="*60)
|
|
504
|
+
print("TEST: LLM delegates to analyst")
|
|
505
|
+
print("="*60)
|
|
506
|
+
|
|
507
|
+
# Start analyst listening
|
|
508
|
+
analyst_task = asyncio.create_task(analyst.run())
|
|
509
|
+
await asyncio.sleep(0.2) # Give time to start
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
# Send a message that SHOULD trigger delegation
|
|
513
|
+
user_message = "Please analyze the Q4 sales data and tell me if there are any concerning trends."
|
|
514
|
+
|
|
515
|
+
print(f"\nUser message: {user_message}")
|
|
516
|
+
print("\nCalling LLM...")
|
|
517
|
+
|
|
518
|
+
response = await assistant.chat(user_message)
|
|
519
|
+
|
|
520
|
+
print(f"\nFinal response: {response}")
|
|
521
|
+
print(f"\nTool calls made: {assistant.tool_calls}")
|
|
522
|
+
|
|
523
|
+
# Verify ask_peer was called
|
|
524
|
+
peer_calls = [c for c in assistant.tool_calls if c["tool"] == "ask_peer"]
|
|
525
|
+
|
|
526
|
+
assert len(peer_calls) >= 1, "LLM should have used ask_peer tool"
|
|
527
|
+
assert peer_calls[0]["args"]["role"] == "analyst", "Should have asked analyst"
|
|
528
|
+
|
|
529
|
+
# Verify analyst received the request
|
|
530
|
+
assert len(analyst.requests_received) >= 1, "Analyst should have received request"
|
|
531
|
+
|
|
532
|
+
print("\nPASSED: LLM correctly delegated to analyst!")
|
|
533
|
+
|
|
534
|
+
finally:
|
|
535
|
+
analyst.request_shutdown()
|
|
536
|
+
analyst_task.cancel()
|
|
537
|
+
try:
|
|
538
|
+
await analyst_task
|
|
539
|
+
except asyncio.CancelledError:
|
|
540
|
+
pass
|
|
541
|
+
|
|
542
|
+
@pytest.mark.asyncio
|
|
543
|
+
async def test_llm_uses_local_tool_when_appropriate(self, real_mesh):
|
|
544
|
+
"""
|
|
545
|
+
Test 3: LLM uses local tool (web_search) when appropriate.
|
|
546
|
+
|
|
547
|
+
This verifies LLM doesn't ALWAYS delegate - it chooses correctly.
|
|
548
|
+
"""
|
|
549
|
+
mesh, analyst, assistant = real_mesh
|
|
550
|
+
|
|
551
|
+
print("\n" + "="*60)
|
|
552
|
+
print("TEST: LLM uses local tool when appropriate")
|
|
553
|
+
print("="*60)
|
|
554
|
+
|
|
555
|
+
# Send a message that should use web_search, not ask_peer
|
|
556
|
+
user_message = "Search the web for the latest Python 3.12 features."
|
|
557
|
+
|
|
558
|
+
print(f"\nUser message: {user_message}")
|
|
559
|
+
print("\nCalling LLM...")
|
|
560
|
+
|
|
561
|
+
response = await assistant.chat(user_message)
|
|
562
|
+
|
|
563
|
+
print(f"\nFinal response: {response}")
|
|
564
|
+
print(f"\nTool calls made: {assistant.tool_calls}")
|
|
565
|
+
|
|
566
|
+
# Check if web_search was used
|
|
567
|
+
search_calls = [c for c in assistant.tool_calls if c["tool"] == "web_search"]
|
|
568
|
+
|
|
569
|
+
# Note: LLM might not use any tool, or might use search
|
|
570
|
+
# The key is it shouldn't use ask_peer for a search request
|
|
571
|
+
peer_calls = [c for c in assistant.tool_calls if c["tool"] == "ask_peer"]
|
|
572
|
+
|
|
573
|
+
print(f"\nSearch calls: {len(search_calls)}, Peer calls: {len(peer_calls)}")
|
|
574
|
+
|
|
575
|
+
# If LLM used tools, it should prefer search over analyst for this query
|
|
576
|
+
if assistant.tool_calls:
|
|
577
|
+
# Either used search, or if it used ask_peer it should be rare
|
|
578
|
+
print("LLM made tool calls - checking they were appropriate")
|
|
579
|
+
else:
|
|
580
|
+
print("LLM responded directly without tools (also valid)")
|
|
581
|
+
|
|
582
|
+
print("\nPASSED: LLM made appropriate tool choice")
|
|
583
|
+
|
|
584
|
+
@pytest.mark.asyncio
|
|
585
|
+
async def test_llm_responds_directly_when_no_tools_needed(self, real_mesh):
|
|
586
|
+
"""
|
|
587
|
+
Test 4: LLM responds directly when no tools are needed.
|
|
588
|
+
"""
|
|
589
|
+
mesh, analyst, assistant = real_mesh
|
|
590
|
+
|
|
591
|
+
print("\n" + "="*60)
|
|
592
|
+
print("TEST: LLM responds directly when no tools needed")
|
|
593
|
+
print("="*60)
|
|
594
|
+
|
|
595
|
+
# Send a simple message that doesn't need tools
|
|
596
|
+
user_message = "Hello! How are you today?"
|
|
597
|
+
|
|
598
|
+
print(f"\nUser message: {user_message}")
|
|
599
|
+
print("\nCalling LLM...")
|
|
600
|
+
|
|
601
|
+
response = await assistant.chat(user_message)
|
|
602
|
+
|
|
603
|
+
print(f"\nFinal response: {response}")
|
|
604
|
+
print(f"\nTool calls made: {assistant.tool_calls}")
|
|
605
|
+
|
|
606
|
+
# For a greeting, LLM typically shouldn't need tools
|
|
607
|
+
# But this isn't a hard requirement - just informational
|
|
608
|
+
print(f"\nTool calls count: {len(assistant.tool_calls)}")
|
|
609
|
+
|
|
610
|
+
assert response, "Should have gotten a response"
|
|
611
|
+
print("\nPASSED: LLM responded appropriately")
|
|
612
|
+
|
|
613
|
+
@pytest.mark.asyncio
|
|
614
|
+
async def test_full_conversation_flow(self, real_mesh):
|
|
615
|
+
"""
|
|
616
|
+
Test 5: Full conversation with multiple turns and tool use.
|
|
617
|
+
"""
|
|
618
|
+
mesh, analyst, assistant = real_mesh
|
|
619
|
+
|
|
620
|
+
print("\n" + "="*60)
|
|
621
|
+
print("TEST: Full conversation flow")
|
|
622
|
+
print("="*60)
|
|
623
|
+
|
|
624
|
+
# Start analyst
|
|
625
|
+
analyst_task = asyncio.create_task(analyst.run())
|
|
626
|
+
await asyncio.sleep(0.2)
|
|
627
|
+
|
|
628
|
+
try:
|
|
629
|
+
# Turn 1: Analysis request
|
|
630
|
+
print("\n--- Turn 1: Analysis Request ---")
|
|
631
|
+
response1 = await assistant.chat(
|
|
632
|
+
"I need you to analyze our customer retention data."
|
|
633
|
+
)
|
|
634
|
+
print(f"Response: {response1[:200]}...")
|
|
635
|
+
|
|
636
|
+
# Reset tool calls for next turn
|
|
637
|
+
assistant.tool_calls = []
|
|
638
|
+
|
|
639
|
+
# Turn 2: Follow-up (might or might not need tools)
|
|
640
|
+
print("\n--- Turn 2: Follow-up ---")
|
|
641
|
+
response2 = await assistant.chat(
|
|
642
|
+
"What does that analysis suggest we should do?"
|
|
643
|
+
)
|
|
644
|
+
print(f"Response: {response2[:200]}...")
|
|
645
|
+
|
|
646
|
+
print("\n" + "="*60)
|
|
647
|
+
print("FULL CONVERSATION TEST COMPLETE")
|
|
648
|
+
print("="*60)
|
|
649
|
+
|
|
650
|
+
finally:
|
|
651
|
+
analyst.request_shutdown()
|
|
652
|
+
analyst_task.cancel()
|
|
653
|
+
try:
|
|
654
|
+
await analyst_task
|
|
655
|
+
except asyncio.CancelledError:
|
|
656
|
+
pass
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
660
|
+
# MANUAL RUN
|
|
661
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
662
|
+
|
|
663
|
+
async def run_manual_test():
|
|
664
|
+
"""Run a manual test with verbose output."""
|
|
665
|
+
print("\n" + "="*70)
|
|
666
|
+
print("REAL LLM INTEGRATION TEST - BOTH AGENTS USE LLM")
|
|
667
|
+
print("="*70)
|
|
668
|
+
|
|
669
|
+
# Setup mesh
|
|
670
|
+
mesh = Mesh(mode="p2p")
|
|
671
|
+
|
|
672
|
+
analyst = mesh.add(AnalystAgent)
|
|
673
|
+
assistant = mesh.add(AssistantAgent)
|
|
674
|
+
|
|
675
|
+
for agent in mesh.agents:
|
|
676
|
+
agent.peers = PeerClient(
|
|
677
|
+
coordinator=None,
|
|
678
|
+
agent_id=agent.agent_id,
|
|
679
|
+
agent_role=agent.role,
|
|
680
|
+
agent_registry=mesh._agent_registry,
|
|
681
|
+
node_id="local"
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
# Initialize real LLM for BOTH agents
|
|
685
|
+
assistant.llm = RealLLMClient()
|
|
686
|
+
analyst.llm = RealLLMClient()
|
|
687
|
+
print("\n[SETUP] Both assistant AND analyst have their own LLM!")
|
|
688
|
+
|
|
689
|
+
# Start analyst
|
|
690
|
+
analyst_task = asyncio.create_task(analyst.run())
|
|
691
|
+
await asyncio.sleep(0.2)
|
|
692
|
+
|
|
693
|
+
print("\n[SETUP] Mesh created with:")
|
|
694
|
+
print(f" - Analyst (role: {analyst.role})")
|
|
695
|
+
print(f" - Assistant (role: {assistant.role})")
|
|
696
|
+
print(f" - Assistant sees peers: {[p['role'] for p in assistant.peers.list_peers()]}")
|
|
697
|
+
|
|
698
|
+
print("\n[TOOLS] Assistant has these tools:")
|
|
699
|
+
for tool in assistant.get_tools():
|
|
700
|
+
print(f" - {tool['name']}: {tool['description'][:60]}...")
|
|
701
|
+
|
|
702
|
+
# Test conversation
|
|
703
|
+
print("\n" + "-"*70)
|
|
704
|
+
print("CONVERSATION TEST")
|
|
705
|
+
print("-"*70)
|
|
706
|
+
|
|
707
|
+
try:
|
|
708
|
+
# Message that should trigger ask_peer - WITH ACTUAL DATA
|
|
709
|
+
message = """Please analyze this monthly revenue data and identify any anomalies:
|
|
710
|
+
|
|
711
|
+
Jan: $120,000
|
|
712
|
+
Feb: $125,000
|
|
713
|
+
Mar: $118,000
|
|
714
|
+
Apr: $245,000 (big spike!)
|
|
715
|
+
May: $130,000
|
|
716
|
+
Jun: $128,000
|
|
717
|
+
Jul: $15,000 (big drop!)
|
|
718
|
+
Aug: $135,000
|
|
719
|
+
Sep: $140,000
|
|
720
|
+
Oct: $142,000
|
|
721
|
+
Nov: $155,000
|
|
722
|
+
Dec: $180,000
|
|
723
|
+
|
|
724
|
+
What patterns do you see? Are April and July anomalies?"""
|
|
725
|
+
|
|
726
|
+
print(f"\n[USER] {message}")
|
|
727
|
+
print("\n[ASSISTANT LLM PROCESSING...]")
|
|
728
|
+
|
|
729
|
+
response = await assistant.chat(message)
|
|
730
|
+
|
|
731
|
+
print(f"\n[ASSISTANT] {response}")
|
|
732
|
+
|
|
733
|
+
print("\n[TOOL CALLS MADE]")
|
|
734
|
+
for call in assistant.tool_calls:
|
|
735
|
+
print(f" - {call['tool']}: {call['args']}")
|
|
736
|
+
|
|
737
|
+
print("\n[ANALYST RECEIVED]")
|
|
738
|
+
for req in analyst.requests_received:
|
|
739
|
+
print(f" - {req}")
|
|
740
|
+
|
|
741
|
+
finally:
|
|
742
|
+
analyst.request_shutdown()
|
|
743
|
+
analyst_task.cancel()
|
|
744
|
+
try:
|
|
745
|
+
await analyst_task
|
|
746
|
+
except asyncio.CancelledError:
|
|
747
|
+
pass
|
|
748
|
+
|
|
749
|
+
print("\n" + "="*70)
|
|
750
|
+
print("TEST COMPLETE")
|
|
751
|
+
print("="*70)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
if __name__ == "__main__":
|
|
755
|
+
# Load environment
|
|
756
|
+
from dotenv import load_dotenv
|
|
757
|
+
load_dotenv()
|
|
758
|
+
|
|
759
|
+
# Run manual test
|
|
760
|
+
asyncio.run(run_manual_test())
|