jarviscore-framework 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. examples/autoagent_distributed_example.py +211 -0
  2. examples/custom_profile_decorator.py +134 -0
  3. examples/custom_profile_wrap.py +168 -0
  4. examples/customagent_distributed_example.py +362 -0
  5. examples/customagent_p2p_example.py +347 -0
  6. jarviscore/__init__.py +49 -36
  7. jarviscore/adapter/__init__.py +15 -9
  8. jarviscore/adapter/decorator.py +23 -19
  9. jarviscore/adapter/wrapper.py +303 -0
  10. jarviscore/cli/scaffold.py +1 -1
  11. jarviscore/cli/smoketest.py +3 -2
  12. jarviscore/core/agent.py +44 -1
  13. jarviscore/core/mesh.py +196 -35
  14. jarviscore/data/examples/autoagent_distributed_example.py +211 -0
  15. jarviscore/data/examples/customagent_distributed_example.py +362 -0
  16. jarviscore/data/examples/customagent_p2p_example.py +347 -0
  17. jarviscore/docs/API_REFERENCE.md +264 -51
  18. jarviscore/docs/AUTOAGENT_GUIDE.md +198 -0
  19. jarviscore/docs/CONFIGURATION.md +35 -21
  20. jarviscore/docs/CUSTOMAGENT_GUIDE.md +415 -0
  21. jarviscore/docs/GETTING_STARTED.md +106 -13
  22. jarviscore/docs/TROUBLESHOOTING.md +144 -6
  23. jarviscore/docs/USER_GUIDE.md +138 -361
  24. jarviscore/orchestration/engine.py +20 -8
  25. jarviscore/p2p/__init__.py +10 -0
  26. jarviscore/p2p/coordinator.py +129 -0
  27. jarviscore/p2p/messages.py +87 -0
  28. jarviscore/p2p/peer_client.py +576 -0
  29. jarviscore/p2p/peer_tool.py +268 -0
  30. {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/METADATA +60 -54
  31. jarviscore_framework-0.2.0.dist-info/RECORD +132 -0
  32. {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/WHEEL +1 -1
  33. {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/top_level.txt +1 -0
  34. test_logs/code_registry/functions/data_generator-558779ed_560ebc37.py +7 -0
  35. test_logs/code_registry/functions/data_generator-5ed3609e_560ebc37.py +7 -0
  36. test_logs/code_registry/functions/data_generator-66da0356_43970bb9.py +25 -0
  37. test_logs/code_registry/functions/data_generator-7a2fac83_583709d9.py +36 -0
  38. test_logs/code_registry/functions/data_generator-888b670f_aa235863.py +9 -0
  39. test_logs/code_registry/functions/data_generator-9ca5f642_aa235863.py +9 -0
  40. test_logs/code_registry/functions/data_generator-bfd90775_560ebc37.py +7 -0
  41. test_logs/code_registry/functions/data_generator-e95d2f7d_aa235863.py +9 -0
  42. test_logs/code_registry/functions/data_generator-f60ca8a2_327eb8c2.py +29 -0
  43. test_logs/code_registry/functions/mathematician-02adf9ee_958658d9.py +19 -0
  44. test_logs/code_registry/functions/mathematician-0706fb57_5df13441.py +23 -0
  45. test_logs/code_registry/functions/mathematician-153c9c4a_ba59c918.py +83 -0
  46. test_logs/code_registry/functions/mathematician-287e61c0_41daa793.py +18 -0
  47. test_logs/code_registry/functions/mathematician-2967af5a_863c2cc6.py +17 -0
  48. test_logs/code_registry/functions/mathematician-303ca6d6_5df13441.py +23 -0
  49. test_logs/code_registry/functions/mathematician-308a4afd_cbf5064d.py +73 -0
  50. test_logs/code_registry/functions/mathematician-353f16e2_0968bcf5.py +18 -0
  51. test_logs/code_registry/functions/mathematician-3c22475a_41daa793.py +17 -0
  52. test_logs/code_registry/functions/mathematician-5bac1029_0968bcf5.py +18 -0
  53. test_logs/code_registry/functions/mathematician-640f76b2_9198780b.py +19 -0
  54. test_logs/code_registry/functions/mathematician-752fa7ea_863c2cc6.py +17 -0
  55. test_logs/code_registry/functions/mathematician-baf9ef39_0968bcf5.py +18 -0
  56. test_logs/code_registry/functions/mathematician-bc8b2a2f_5df13441.py +23 -0
  57. test_logs/code_registry/functions/mathematician-c31e4686_41daa793.py +18 -0
  58. test_logs/code_registry/functions/mathematician-cc84c84c_863c2cc6.py +17 -0
  59. test_logs/code_registry/functions/mathematician-dd7c7144_9198780b.py +19 -0
  60. test_logs/code_registry/functions/mathematician-e671c256_41ea4487.py +74 -0
  61. test_logs/code_registry/functions/report_generator-1a878fcc_18d44bdc.py +47 -0
  62. test_logs/code_registry/functions/report_generator-25c1c331_cea57d0d.py +35 -0
  63. test_logs/code_registry/functions/report_generator-37552117_e711c2b9.py +35 -0
  64. test_logs/code_registry/functions/report_generator-bc662768_e711c2b9.py +35 -0
  65. test_logs/code_registry/functions/report_generator-d6c0e76b_5e7722ec.py +44 -0
  66. test_logs/code_registry/functions/report_generator-f270fb02_680529c3.py +44 -0
  67. test_logs/code_registry/functions/text_processor-11393b14_4370d3ed.py +40 -0
  68. test_logs/code_registry/functions/text_processor-7d02dfc3_d3b569be.py +37 -0
  69. test_logs/code_registry/functions/text_processor-8adb5e32_9168c5fe.py +13 -0
  70. test_logs/code_registry/functions/text_processor-c58ffc19_78b4ceac.py +42 -0
  71. test_logs/code_registry/functions/text_processor-cd5977b1_9168c5fe.py +13 -0
  72. test_logs/code_registry/functions/text_processor-ec1c8773_9168c5fe.py +13 -0
  73. tests/test_01_analyst_standalone.py +124 -0
  74. tests/test_02_assistant_standalone.py +164 -0
  75. tests/test_03_analyst_with_framework.py +945 -0
  76. tests/test_04_assistant_with_framework.py +1002 -0
  77. tests/test_05_integration.py +1301 -0
  78. tests/test_06_real_llm_integration.py +760 -0
  79. tests/test_07_distributed_single_node.py +578 -0
  80. tests/test_08_distributed_multi_node.py +454 -0
  81. tests/test_09_distributed_autoagent.py +509 -0
  82. tests/test_10_distributed_customagent.py +787 -0
  83. tests/test_mesh.py +35 -4
  84. jarviscore_framework-0.1.1.dist-info/RECORD +0 -69
  85. {jarviscore_framework-0.1.1.dist-info → jarviscore_framework-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,760 @@
1
+ """
2
+ Test 6: Real LLM Integration Test
3
+
4
+ This test uses ACTUAL LLM API calls (not mocks) to verify that:
5
+ 1. The LLM correctly sees peer tools in the tool list
6
+ 2. The LLM decides to use ask_peer when appropriate
7
+ 3. The tool execution works end-to-end
8
+ 4. The response flows back correctly
9
+
10
+ IMPORTANT: This test makes real API calls and costs money.
11
+ Run with: pytest tests/test_06_real_llm_integration.py -v -s
12
+
13
+ Prerequisites:
14
+ - .env file with CLAUDE_API_KEY (or other provider keys)
15
+ - Network connectivity
16
+ """
17
+ import asyncio
18
+ import os
19
+ import sys
20
+ import pytest
21
+ import logging
22
+
23
+ sys.path.insert(0, '.')
24
+
25
+ from jarviscore.core.agent import Agent
26
+ from jarviscore.core.mesh import Mesh
27
+ from jarviscore.p2p.peer_client import PeerClient
28
+
29
+ # Setup logging to see what's happening
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Skip all tests if no API key is configured
34
+ try:
35
+ from jarviscore.config import settings
36
+ HAS_API_KEY = bool(
37
+ settings.claude_api_key or
38
+ settings.azure_api_key or
39
+ settings.gemini_api_key
40
+ )
41
+ except Exception:
42
+ HAS_API_KEY = False
43
+
44
+ pytestmark = pytest.mark.skipif(
45
+ not HAS_API_KEY,
46
+ reason="No LLM API key configured in .env"
47
+ )
48
+
49
+
50
+ # ═══════════════════════════════════════════════════════════════════════════════
51
+ # REAL LLM CLIENT WITH TOOL SUPPORT
52
+ # ═══════════════════════════════════════════════════════════════════════════════
53
+
54
+ class RealLLMClient:
55
+ """
56
+ Real LLM client with native tool calling support.
57
+
58
+ Uses Anthropic Claude API directly for proper tool_use handling.
59
+ """
60
+
61
+ def __init__(self):
62
+ from anthropic import Anthropic
63
+ from jarviscore.config import settings
64
+
65
+ # Get API key and endpoint
66
+ api_key = settings.claude_api_key
67
+ endpoint = settings.claude_endpoint
68
+
69
+ if not api_key:
70
+ raise RuntimeError("No Claude API key found in settings")
71
+
72
+ # Initialize client
73
+ if endpoint:
74
+ self.client = Anthropic(api_key=api_key, base_url=endpoint)
75
+ else:
76
+ self.client = Anthropic(api_key=api_key)
77
+
78
+ self.model = settings.claude_model or "claude-sonnet-4-20250514"
79
+ logger.info(f"RealLLMClient initialized with model: {self.model}")
80
+
81
+ def chat_with_tools(
82
+ self,
83
+ messages: list,
84
+ tools: list,
85
+ system: str = None,
86
+ max_tokens: int = 1024
87
+ ) -> dict:
88
+ """
89
+ Send chat with tools and get response.
90
+
91
+ Args:
92
+ messages: List of message dicts [{"role": "user", "content": "..."}]
93
+ tools: List of tool definitions in Anthropic format
94
+ system: Optional system prompt
95
+ max_tokens: Max tokens to generate
96
+
97
+ Returns:
98
+ {
99
+ "type": "text" | "tool_use",
100
+ "content": str, # if text
101
+ "tool_name": str, # if tool_use
102
+ "tool_args": dict, # if tool_use
103
+ "tool_use_id": str # if tool_use
104
+ }
105
+ """
106
+ # Build request
107
+ request_kwargs = {
108
+ "model": self.model,
109
+ "max_tokens": max_tokens,
110
+ "messages": messages,
111
+ }
112
+
113
+ if system:
114
+ request_kwargs["system"] = system
115
+
116
+ if tools:
117
+ request_kwargs["tools"] = tools
118
+
119
+ # Make the call
120
+ logger.info(f"Calling LLM with {len(messages)} messages and {len(tools)} tools")
121
+ response = self.client.messages.create(**request_kwargs)
122
+
123
+ # Parse response
124
+ result = {"stop_reason": response.stop_reason}
125
+
126
+ for block in response.content:
127
+ if block.type == "text":
128
+ result["type"] = "text"
129
+ result["content"] = block.text
130
+ elif block.type == "tool_use":
131
+ result["type"] = "tool_use"
132
+ result["tool_name"] = block.name
133
+ result["tool_args"] = block.input
134
+ result["tool_use_id"] = block.id
135
+
136
+ logger.info(f"LLM response type: {result.get('type')}")
137
+ return result
138
+
139
+ def continue_with_tool_result(
140
+ self,
141
+ messages: list,
142
+ tool_use_id: str,
143
+ tool_result: str,
144
+ tools: list = None,
145
+ system: str = None,
146
+ max_tokens: int = 1024
147
+ ) -> dict:
148
+ """
149
+ Continue conversation with tool result.
150
+
151
+ Args:
152
+ messages: Previous messages
153
+ tool_use_id: The tool_use block ID
154
+ tool_result: Result from tool execution
155
+ tools: Tool definitions (for potential further calls)
156
+ system: System prompt
157
+ max_tokens: Max tokens
158
+
159
+ Returns:
160
+ Same format as chat_with_tools
161
+ """
162
+ # Add tool result to messages
163
+ messages = messages + [
164
+ {
165
+ "role": "user",
166
+ "content": [
167
+ {
168
+ "type": "tool_result",
169
+ "tool_use_id": tool_use_id,
170
+ "content": tool_result
171
+ }
172
+ ]
173
+ }
174
+ ]
175
+
176
+ return self.chat_with_tools(messages, tools or [], system, max_tokens)
177
+
178
+
179
+ # ═══════════════════════════════════════════════════════════════════════════════
180
+ # TEST AGENTS
181
+ # ═══════════════════════════════════════════════════════════════════════════════
182
+
183
+ class AnalystAgent(Agent):
184
+ """Analyst agent that can analyze data - NOW WITH REAL LLM."""
185
+ role = "analyst"
186
+ capabilities = ["analysis", "data_interpretation", "reporting"]
187
+
188
+ def __init__(self, agent_id=None):
189
+ super().__init__(agent_id)
190
+ self.requests_received = []
191
+ self.llm = None # Will be set to use real LLM
192
+
193
+ def get_tools(self) -> list:
194
+ """Return tools including peer tools."""
195
+ tools = [
196
+ {
197
+ "name": "statistical_analysis",
198
+ "description": "Run statistical analysis on numeric data",
199
+ "input_schema": {
200
+ "type": "object",
201
+ "properties": {
202
+ "data": {"type": "string", "description": "Data to analyze"}
203
+ },
204
+ "required": ["data"]
205
+ }
206
+ },
207
+ {
208
+ "name": "trend_detection",
209
+ "description": "Detect trends and patterns in time series data",
210
+ "input_schema": {
211
+ "type": "object",
212
+ "properties": {
213
+ "data": {"type": "string", "description": "Time series data"}
214
+ },
215
+ "required": ["data"]
216
+ }
217
+ }
218
+ ]
219
+ if self.peers:
220
+ tools.extend(self.peers.as_tool().schema)
221
+ return tools
222
+
223
+ async def execute_tool(self, tool_name: str, args: dict) -> str:
224
+ """Execute tool."""
225
+ if self.peers and tool_name in self.peers.as_tool().tool_names:
226
+ return await self.peers.as_tool().execute(tool_name, args)
227
+ if tool_name == "statistical_analysis":
228
+ return f"Statistical analysis of '{args.get('data', '')}': mean=150.3, std=23.4, variance=547.6"
229
+ if tool_name == "trend_detection":
230
+ return f"Trend analysis: Upward trend detected with 92% confidence, growth rate 3.2% month-over-month"
231
+ return f"Unknown tool: {tool_name}"
232
+
233
+ async def process_with_llm(self, query: str) -> str:
234
+ """Process request using real LLM."""
235
+ if not self.llm:
236
+ # Fallback to simple response if no LLM
237
+ return f"Analysis of '{query}': Positive trends detected with 87% confidence."
238
+
239
+ system_prompt = (
240
+ "You are an expert data analyst. You specialize in analyzing data, "
241
+ "finding patterns, and providing insights. You have tools for statistical "
242
+ "analysis and trend detection. Be concise but thorough in your analysis. "
243
+ "If you need more data, say so. Respond directly without using tools if "
244
+ "you can provide a good analysis from the data given."
245
+ )
246
+
247
+ tools = self.get_tools()
248
+ # Remove peer tools for analyst's own processing (avoid infinite loops)
249
+ tools = [t for t in tools if t["name"] not in ["ask_peer", "broadcast_update", "list_peers"]]
250
+
251
+ messages = [{"role": "user", "content": query}]
252
+
253
+ logger.info(f"[analyst] Processing with LLM: {query[:50]}...")
254
+
255
+ response = self.llm.chat_with_tools(messages, tools, system_prompt)
256
+
257
+ # Handle tool use - simpler approach to avoid message format issues
258
+ if response.get("type") == "tool_use":
259
+ tool_name = response["tool_name"]
260
+ tool_args = response["tool_args"]
261
+ tool_use_id = response["tool_use_id"]
262
+
263
+ logger.info(f"[analyst] Using tool: {tool_name}")
264
+
265
+ tool_result = await self.execute_tool(tool_name, tool_args)
266
+
267
+ # Add assistant tool use message
268
+ messages.append({
269
+ "role": "assistant",
270
+ "content": [{"type": "tool_use", "id": tool_use_id, "name": tool_name, "input": tool_args}]
271
+ })
272
+
273
+ # Add tool result message
274
+ messages.append({
275
+ "role": "user",
276
+ "content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": tool_result}]
277
+ })
278
+
279
+ # Get final response (don't use more tools to keep it simple)
280
+ response = self.llm.chat_with_tools(messages, [], system_prompt)
281
+
282
+ return response.get("content", "Analysis complete.")
283
+
284
+ async def run(self):
285
+ """Listen for incoming requests."""
286
+ logger.info(f"[{self.role}] Starting run loop")
287
+ while not self.shutdown_requested:
288
+ msg = await self.peers.receive(timeout=0.5)
289
+ if msg is None:
290
+ continue
291
+ if msg.is_request:
292
+ query = msg.data.get("query", "")
293
+ logger.info(f"[{self.role}] ===== RECEIVED REQUEST =====")
294
+ logger.info(f"[{self.role}] From: {msg.data.get('from', 'unknown')}")
295
+ logger.info(f"[{self.role}] Query: {query}")
296
+
297
+ self.requests_received.append(query)
298
+
299
+ # Process with LLM
300
+ result = await self.process_with_llm(query)
301
+
302
+ logger.info(f"[{self.role}] ===== SENDING RESPONSE =====")
303
+ logger.info(f"[{self.role}] Response: {result[:100]}...")
304
+
305
+ await self.peers.respond(msg, {"response": result})
306
+
307
+ async def execute_task(self, task):
308
+ return {}
309
+
310
+
311
+ class AssistantAgent(Agent):
312
+ """Assistant agent that coordinates with other agents."""
313
+ role = "assistant"
314
+ capabilities = ["chat", "coordination", "search"]
315
+
316
+ def __init__(self, agent_id=None):
317
+ super().__init__(agent_id)
318
+ self.llm = None # Will be set in test
319
+ self.tool_calls = []
320
+
321
+ def get_tools(self) -> list:
322
+ """Return tools including peer tools."""
323
+ tools = [
324
+ {
325
+ "name": "web_search",
326
+ "description": "Search the web for information",
327
+ "input_schema": {
328
+ "type": "object",
329
+ "properties": {
330
+ "query": {"type": "string", "description": "Search query"}
331
+ },
332
+ "required": ["query"]
333
+ }
334
+ }
335
+ ]
336
+ if self.peers:
337
+ tools.extend(self.peers.as_tool().schema)
338
+ return tools
339
+
340
+ async def execute_tool(self, tool_name: str, args: dict) -> str:
341
+ """Execute tool."""
342
+ self.tool_calls.append({"tool": tool_name, "args": args})
343
+
344
+ if self.peers and tool_name in self.peers.as_tool().tool_names:
345
+ return await self.peers.as_tool().execute(tool_name, args)
346
+ if tool_name == "web_search":
347
+ return f"Search results for '{args.get('query', '')}': Found 10 relevant articles."
348
+ return f"Unknown tool: {tool_name}"
349
+
350
+ async def chat(self, user_message: str, system_prompt: str = None) -> str:
351
+ """
352
+ Complete LLM chat loop with real tool calling.
353
+
354
+ This is the KEY method that demonstrates real LLM tool use.
355
+ """
356
+ if not self.llm:
357
+ raise RuntimeError("LLM not initialized")
358
+
359
+ # Default system prompt
360
+ if not system_prompt:
361
+ system_prompt = (
362
+ "You are a helpful assistant. You have access to tools including "
363
+ "the ability to ask other specialist agents for help. "
364
+ "If a user asks for data analysis, you should use the ask_peer tool "
365
+ "to ask the analyst for help. Be concise in your responses."
366
+ )
367
+
368
+ # Get tools
369
+ tools = self.get_tools()
370
+ logger.info(f"[assistant] Tools available: {[t['name'] for t in tools]}")
371
+
372
+ # Initial message
373
+ messages = [{"role": "user", "content": user_message}]
374
+
375
+ # Call LLM
376
+ response = self.llm.chat_with_tools(messages, tools, system_prompt)
377
+
378
+ # Handle tool use loop (max 3 iterations)
379
+ iterations = 0
380
+ while response.get("type") == "tool_use" and iterations < 3:
381
+ iterations += 1
382
+
383
+ tool_name = response["tool_name"]
384
+ tool_args = response["tool_args"]
385
+ tool_use_id = response["tool_use_id"]
386
+
387
+ logger.info(f"[assistant] LLM decided to use tool: {tool_name}")
388
+ logger.info(f"[assistant] Tool args: {tool_args}")
389
+
390
+ # Execute the tool
391
+ tool_result = await self.execute_tool(tool_name, tool_args)
392
+ logger.info(f"[assistant] Tool result: {tool_result[:100]}...")
393
+
394
+ # Add assistant's tool use to messages
395
+ messages.append({
396
+ "role": "assistant",
397
+ "content": [
398
+ {
399
+ "type": "tool_use",
400
+ "id": tool_use_id,
401
+ "name": tool_name,
402
+ "input": tool_args
403
+ }
404
+ ]
405
+ })
406
+
407
+ # Continue with tool result
408
+ response = self.llm.continue_with_tool_result(
409
+ messages, tool_use_id, tool_result, tools, system_prompt
410
+ )
411
+
412
+ # Return final text response
413
+ return response.get("content", "No response generated")
414
+
415
+ async def run(self):
416
+ """Listen for incoming requests."""
417
+ while not self.shutdown_requested:
418
+ msg = await self.peers.receive(timeout=0.5)
419
+ if msg is None:
420
+ continue
421
+ if msg.is_request:
422
+ # For simplicity, just echo back
423
+ await self.peers.respond(msg, {"response": f"Received: {msg.data}"})
424
+
425
+ async def execute_task(self, task):
426
+ return {}
427
+
428
+
429
+ # ═══════════════════════════════════════════════════════════════════════════════
430
+ # TESTS
431
+ # ═══════════════════════════════════════════════════════════════════════════════
432
+
433
+ class TestRealLLMIntegration:
434
+ """
435
+ Tests that use REAL LLM API calls.
436
+
437
+ These tests verify that the entire tool-use flow works with actual LLM.
438
+ """
439
+
440
+ @pytest.fixture
441
+ def real_mesh(self):
442
+ """Create mesh with real LLM-powered agents."""
443
+ mesh = Mesh(mode="p2p")
444
+
445
+ analyst = mesh.add(AnalystAgent)
446
+ assistant = mesh.add(AssistantAgent)
447
+
448
+ # Wire up peers
449
+ for agent in mesh.agents:
450
+ agent.peers = PeerClient(
451
+ coordinator=None,
452
+ agent_id=agent.agent_id,
453
+ agent_role=agent.role,
454
+ agent_registry=mesh._agent_registry,
455
+ node_id="local"
456
+ )
457
+
458
+ # Initialize real LLM for BOTH agents
459
+ assistant.llm = RealLLMClient()
460
+ analyst.llm = RealLLMClient()
461
+
462
+ return mesh, analyst, assistant
463
+
464
+ @pytest.mark.asyncio
465
+ async def test_llm_sees_peer_tools(self, real_mesh):
466
+ """
467
+ Test 1: Verify LLM receives correct tool schemas.
468
+
469
+ This confirms the tool definitions are properly formatted.
470
+ """
471
+ mesh, analyst, assistant = real_mesh
472
+
473
+ tools = assistant.get_tools()
474
+ tool_names = [t["name"] for t in tools]
475
+
476
+ print("\n" + "="*60)
477
+ print("TEST: LLM sees peer tools")
478
+ print("="*60)
479
+ print(f"Tools available: {tool_names}")
480
+
481
+ # Verify peer tools are present
482
+ assert "ask_peer" in tool_names, "ask_peer tool should be available"
483
+ assert "broadcast_update" in tool_names, "broadcast_update should be available"
484
+ assert "list_peers" in tool_names, "list_peers should be available"
485
+
486
+ # Verify ask_peer shows analyst
487
+ ask_peer_tool = next(t for t in tools if t["name"] == "ask_peer")
488
+ roles_enum = ask_peer_tool["input_schema"]["properties"]["role"]["enum"]
489
+ print(f"ask_peer roles enum: {roles_enum}")
490
+ assert "analyst" in roles_enum, "analyst should be in ask_peer roles"
491
+
492
+ print("PASSED: LLM sees correct peer tools")
493
+
494
+ @pytest.mark.asyncio
495
+ async def test_llm_delegates_to_analyst(self, real_mesh):
496
+ """
497
+ Test 2: LLM decides to delegate analysis to analyst peer.
498
+
499
+ This is THE key test - proves real LLM uses ask_peer correctly.
500
+ """
501
+ mesh, analyst, assistant = real_mesh
502
+
503
+ print("\n" + "="*60)
504
+ print("TEST: LLM delegates to analyst")
505
+ print("="*60)
506
+
507
+ # Start analyst listening
508
+ analyst_task = asyncio.create_task(analyst.run())
509
+ await asyncio.sleep(0.2) # Give time to start
510
+
511
+ try:
512
+ # Send a message that SHOULD trigger delegation
513
+ user_message = "Please analyze the Q4 sales data and tell me if there are any concerning trends."
514
+
515
+ print(f"\nUser message: {user_message}")
516
+ print("\nCalling LLM...")
517
+
518
+ response = await assistant.chat(user_message)
519
+
520
+ print(f"\nFinal response: {response}")
521
+ print(f"\nTool calls made: {assistant.tool_calls}")
522
+
523
+ # Verify ask_peer was called
524
+ peer_calls = [c for c in assistant.tool_calls if c["tool"] == "ask_peer"]
525
+
526
+ assert len(peer_calls) >= 1, "LLM should have used ask_peer tool"
527
+ assert peer_calls[0]["args"]["role"] == "analyst", "Should have asked analyst"
528
+
529
+ # Verify analyst received the request
530
+ assert len(analyst.requests_received) >= 1, "Analyst should have received request"
531
+
532
+ print("\nPASSED: LLM correctly delegated to analyst!")
533
+
534
+ finally:
535
+ analyst.request_shutdown()
536
+ analyst_task.cancel()
537
+ try:
538
+ await analyst_task
539
+ except asyncio.CancelledError:
540
+ pass
541
+
542
+ @pytest.mark.asyncio
543
+ async def test_llm_uses_local_tool_when_appropriate(self, real_mesh):
544
+ """
545
+ Test 3: LLM uses local tool (web_search) when appropriate.
546
+
547
+ This verifies LLM doesn't ALWAYS delegate - it chooses correctly.
548
+ """
549
+ mesh, analyst, assistant = real_mesh
550
+
551
+ print("\n" + "="*60)
552
+ print("TEST: LLM uses local tool when appropriate")
553
+ print("="*60)
554
+
555
+ # Send a message that should use web_search, not ask_peer
556
+ user_message = "Search the web for the latest Python 3.12 features."
557
+
558
+ print(f"\nUser message: {user_message}")
559
+ print("\nCalling LLM...")
560
+
561
+ response = await assistant.chat(user_message)
562
+
563
+ print(f"\nFinal response: {response}")
564
+ print(f"\nTool calls made: {assistant.tool_calls}")
565
+
566
+ # Check if web_search was used
567
+ search_calls = [c for c in assistant.tool_calls if c["tool"] == "web_search"]
568
+
569
+ # Note: LLM might not use any tool, or might use search
570
+ # The key is it shouldn't use ask_peer for a search request
571
+ peer_calls = [c for c in assistant.tool_calls if c["tool"] == "ask_peer"]
572
+
573
+ print(f"\nSearch calls: {len(search_calls)}, Peer calls: {len(peer_calls)}")
574
+
575
+ # If LLM used tools, it should prefer search over analyst for this query
576
+ if assistant.tool_calls:
577
+ # Either used search, or if it used ask_peer it should be rare
578
+ print("LLM made tool calls - checking they were appropriate")
579
+ else:
580
+ print("LLM responded directly without tools (also valid)")
581
+
582
+ print("\nPASSED: LLM made appropriate tool choice")
583
+
584
+ @pytest.mark.asyncio
585
+ async def test_llm_responds_directly_when_no_tools_needed(self, real_mesh):
586
+ """
587
+ Test 4: LLM responds directly when no tools are needed.
588
+ """
589
+ mesh, analyst, assistant = real_mesh
590
+
591
+ print("\n" + "="*60)
592
+ print("TEST: LLM responds directly when no tools needed")
593
+ print("="*60)
594
+
595
+ # Send a simple message that doesn't need tools
596
+ user_message = "Hello! How are you today?"
597
+
598
+ print(f"\nUser message: {user_message}")
599
+ print("\nCalling LLM...")
600
+
601
+ response = await assistant.chat(user_message)
602
+
603
+ print(f"\nFinal response: {response}")
604
+ print(f"\nTool calls made: {assistant.tool_calls}")
605
+
606
+ # For a greeting, LLM typically shouldn't need tools
607
+ # But this isn't a hard requirement - just informational
608
+ print(f"\nTool calls count: {len(assistant.tool_calls)}")
609
+
610
+ assert response, "Should have gotten a response"
611
+ print("\nPASSED: LLM responded appropriately")
612
+
613
+ @pytest.mark.asyncio
614
+ async def test_full_conversation_flow(self, real_mesh):
615
+ """
616
+ Test 5: Full conversation with multiple turns and tool use.
617
+ """
618
+ mesh, analyst, assistant = real_mesh
619
+
620
+ print("\n" + "="*60)
621
+ print("TEST: Full conversation flow")
622
+ print("="*60)
623
+
624
+ # Start analyst
625
+ analyst_task = asyncio.create_task(analyst.run())
626
+ await asyncio.sleep(0.2)
627
+
628
+ try:
629
+ # Turn 1: Analysis request
630
+ print("\n--- Turn 1: Analysis Request ---")
631
+ response1 = await assistant.chat(
632
+ "I need you to analyze our customer retention data."
633
+ )
634
+ print(f"Response: {response1[:200]}...")
635
+
636
+ # Reset tool calls for next turn
637
+ assistant.tool_calls = []
638
+
639
+ # Turn 2: Follow-up (might or might not need tools)
640
+ print("\n--- Turn 2: Follow-up ---")
641
+ response2 = await assistant.chat(
642
+ "What does that analysis suggest we should do?"
643
+ )
644
+ print(f"Response: {response2[:200]}...")
645
+
646
+ print("\n" + "="*60)
647
+ print("FULL CONVERSATION TEST COMPLETE")
648
+ print("="*60)
649
+
650
+ finally:
651
+ analyst.request_shutdown()
652
+ analyst_task.cancel()
653
+ try:
654
+ await analyst_task
655
+ except asyncio.CancelledError:
656
+ pass
657
+
658
+
659
+ # ═══════════════════════════════════════════════════════════════════════════════
660
+ # MANUAL RUN
661
+ # ═══════════════════════════════════════════════════════════════════════════════
662
+
663
+ async def run_manual_test():
664
+ """Run a manual test with verbose output."""
665
+ print("\n" + "="*70)
666
+ print("REAL LLM INTEGRATION TEST - BOTH AGENTS USE LLM")
667
+ print("="*70)
668
+
669
+ # Setup mesh
670
+ mesh = Mesh(mode="p2p")
671
+
672
+ analyst = mesh.add(AnalystAgent)
673
+ assistant = mesh.add(AssistantAgent)
674
+
675
+ for agent in mesh.agents:
676
+ agent.peers = PeerClient(
677
+ coordinator=None,
678
+ agent_id=agent.agent_id,
679
+ agent_role=agent.role,
680
+ agent_registry=mesh._agent_registry,
681
+ node_id="local"
682
+ )
683
+
684
+ # Initialize real LLM for BOTH agents
685
+ assistant.llm = RealLLMClient()
686
+ analyst.llm = RealLLMClient()
687
+ print("\n[SETUP] Both assistant AND analyst have their own LLM!")
688
+
689
+ # Start analyst
690
+ analyst_task = asyncio.create_task(analyst.run())
691
+ await asyncio.sleep(0.2)
692
+
693
+ print("\n[SETUP] Mesh created with:")
694
+ print(f" - Analyst (role: {analyst.role})")
695
+ print(f" - Assistant (role: {assistant.role})")
696
+ print(f" - Assistant sees peers: {[p['role'] for p in assistant.peers.list_peers()]}")
697
+
698
+ print("\n[TOOLS] Assistant has these tools:")
699
+ for tool in assistant.get_tools():
700
+ print(f" - {tool['name']}: {tool['description'][:60]}...")
701
+
702
+ # Test conversation
703
+ print("\n" + "-"*70)
704
+ print("CONVERSATION TEST")
705
+ print("-"*70)
706
+
707
+ try:
708
+ # Message that should trigger ask_peer - WITH ACTUAL DATA
709
+ message = """Please analyze this monthly revenue data and identify any anomalies:
710
+
711
+ Jan: $120,000
712
+ Feb: $125,000
713
+ Mar: $118,000
714
+ Apr: $245,000 (big spike!)
715
+ May: $130,000
716
+ Jun: $128,000
717
+ Jul: $15,000 (big drop!)
718
+ Aug: $135,000
719
+ Sep: $140,000
720
+ Oct: $142,000
721
+ Nov: $155,000
722
+ Dec: $180,000
723
+
724
+ What patterns do you see? Are April and July anomalies?"""
725
+
726
+ print(f"\n[USER] {message}")
727
+ print("\n[ASSISTANT LLM PROCESSING...]")
728
+
729
+ response = await assistant.chat(message)
730
+
731
+ print(f"\n[ASSISTANT] {response}")
732
+
733
+ print("\n[TOOL CALLS MADE]")
734
+ for call in assistant.tool_calls:
735
+ print(f" - {call['tool']}: {call['args']}")
736
+
737
+ print("\n[ANALYST RECEIVED]")
738
+ for req in analyst.requests_received:
739
+ print(f" - {req}")
740
+
741
+ finally:
742
+ analyst.request_shutdown()
743
+ analyst_task.cancel()
744
+ try:
745
+ await analyst_task
746
+ except asyncio.CancelledError:
747
+ pass
748
+
749
+ print("\n" + "="*70)
750
+ print("TEST COMPLETE")
751
+ print("="*70)
752
+
753
+
754
+ if __name__ == "__main__":
755
+ # Load environment
756
+ from dotenv import load_dotenv
757
+ load_dotenv()
758
+
759
+ # Run manual test
760
+ asyncio.run(run_manual_test())