ambivo-agents 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ambivo_agents/__init__.py +91 -0
- ambivo_agents/agents/__init__.py +21 -0
- ambivo_agents/agents/assistant.py +203 -0
- ambivo_agents/agents/code_executor.py +133 -0
- ambivo_agents/agents/code_executor2.py +222 -0
- ambivo_agents/agents/knowledge_base.py +935 -0
- ambivo_agents/agents/media_editor.py +992 -0
- ambivo_agents/agents/moderator.py +617 -0
- ambivo_agents/agents/simple_web_search.py +404 -0
- ambivo_agents/agents/web_scraper.py +1027 -0
- ambivo_agents/agents/web_search.py +933 -0
- ambivo_agents/agents/youtube_download.py +784 -0
- ambivo_agents/cli.py +699 -0
- ambivo_agents/config/__init__.py +4 -0
- ambivo_agents/config/loader.py +301 -0
- ambivo_agents/core/__init__.py +33 -0
- ambivo_agents/core/base.py +1024 -0
- ambivo_agents/core/history.py +606 -0
- ambivo_agents/core/llm.py +333 -0
- ambivo_agents/core/memory.py +640 -0
- ambivo_agents/executors/__init__.py +8 -0
- ambivo_agents/executors/docker_executor.py +108 -0
- ambivo_agents/executors/media_executor.py +237 -0
- ambivo_agents/executors/youtube_executor.py +404 -0
- ambivo_agents/services/__init__.py +6 -0
- ambivo_agents/services/agent_service.py +605 -0
- ambivo_agents/services/factory.py +370 -0
- ambivo_agents-1.0.1.dist-info/METADATA +1090 -0
- ambivo_agents-1.0.1.dist-info/RECORD +33 -0
- ambivo_agents-1.0.1.dist-info/WHEEL +5 -0
- ambivo_agents-1.0.1.dist-info/entry_points.txt +3 -0
- ambivo_agents-1.0.1.dist-info/licenses/LICENSE +21 -0
- ambivo_agents-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,933 @@
|
|
1
|
+
# ambivo_agents/agents/web_search.py - Complete and Corrected LLM-Aware Web Search Agent
|
2
|
+
"""
|
3
|
+
LLM-Aware Web Search Agent with conversation history and intelligent intent detection
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import json
|
8
|
+
import uuid
|
9
|
+
import time
|
10
|
+
import requests
|
11
|
+
from typing import Dict, List, Any, Optional
|
12
|
+
from datetime import datetime
|
13
|
+
from dataclasses import dataclass
|
14
|
+
|
15
|
+
from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
|
16
|
+
from ..config.loader import load_config, get_config_section
|
17
|
+
from ..core.history import WebAgentHistoryMixin, ContextType
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class SearchResult:
|
22
|
+
"""Single search result data structure"""
|
23
|
+
title: str
|
24
|
+
url: str
|
25
|
+
snippet: str
|
26
|
+
source: str = ""
|
27
|
+
rank: int = 0
|
28
|
+
score: float = 0.0
|
29
|
+
timestamp: Optional[datetime] = None
|
30
|
+
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class SearchResponse:
|
34
|
+
"""Search response containing multiple results"""
|
35
|
+
query: str
|
36
|
+
results: List[SearchResult]
|
37
|
+
total_results: int
|
38
|
+
search_time: float
|
39
|
+
provider: str
|
40
|
+
status: str = "success"
|
41
|
+
error: Optional[str] = None
|
42
|
+
|
43
|
+
|
44
|
+
class WebSearchServiceAdapter:
|
45
|
+
"""Web Search Service Adapter supporting multiple search providers"""
|
46
|
+
|
47
|
+
def __init__(self):
|
48
|
+
# Load configuration from YAML
|
49
|
+
config = load_config()
|
50
|
+
self.search_config = get_config_section('web_search', config)
|
51
|
+
|
52
|
+
self.providers = {}
|
53
|
+
self.current_provider = None
|
54
|
+
|
55
|
+
# Initialize available providers
|
56
|
+
self._initialize_providers()
|
57
|
+
|
58
|
+
# Set default provider
|
59
|
+
self.current_provider = self._get_best_provider()
|
60
|
+
|
61
|
+
def _initialize_providers(self):
|
62
|
+
"""Initialize available search providers"""
|
63
|
+
|
64
|
+
# Brave Search API
|
65
|
+
if self.search_config.get('brave_api_key'):
|
66
|
+
self.providers['brave'] = {
|
67
|
+
'name': 'brave',
|
68
|
+
'api_key': self.search_config['brave_api_key'],
|
69
|
+
'base_url': 'https://api.search.brave.com/res/v1/web/search',
|
70
|
+
'priority': 2,
|
71
|
+
'available': True,
|
72
|
+
'rate_limit_delay': 2.0
|
73
|
+
}
|
74
|
+
|
75
|
+
# AVES API
|
76
|
+
if self.search_config.get('avesapi_api_key'):
|
77
|
+
self.providers['aves'] = {
|
78
|
+
'name': 'aves',
|
79
|
+
'api_key': self.search_config['avesapi_api_key'],
|
80
|
+
'base_url': 'https://api.avesapi.com/search',
|
81
|
+
'priority': 1,
|
82
|
+
'available': True,
|
83
|
+
'rate_limit_delay': 1.5
|
84
|
+
}
|
85
|
+
|
86
|
+
if not self.providers:
|
87
|
+
raise ValueError("No search providers configured in web_search section")
|
88
|
+
|
89
|
+
def _get_best_provider(self) -> Optional[str]:
|
90
|
+
"""Get the best available provider"""
|
91
|
+
available_providers = [
|
92
|
+
(name, config) for name, config in self.providers.items()
|
93
|
+
if config.get('available', False)
|
94
|
+
]
|
95
|
+
|
96
|
+
if not available_providers:
|
97
|
+
return None
|
98
|
+
|
99
|
+
available_providers.sort(key=lambda x: x[1]['priority'])
|
100
|
+
return available_providers[0][0]
|
101
|
+
|
102
|
+
async def search_web(self,
|
103
|
+
query: str,
|
104
|
+
max_results: int = 10,
|
105
|
+
country: str = "US",
|
106
|
+
language: str = "en") -> SearchResponse:
|
107
|
+
"""Perform web search using the current provider with rate limiting"""
|
108
|
+
start_time = time.time()
|
109
|
+
|
110
|
+
if not self.current_provider:
|
111
|
+
return SearchResponse(
|
112
|
+
query=query,
|
113
|
+
results=[],
|
114
|
+
total_results=0,
|
115
|
+
search_time=0.0,
|
116
|
+
provider="none",
|
117
|
+
status="error",
|
118
|
+
error="No search provider available"
|
119
|
+
)
|
120
|
+
|
121
|
+
# Rate limiting
|
122
|
+
provider_config = self.providers[self.current_provider]
|
123
|
+
if 'last_request_time' in provider_config:
|
124
|
+
elapsed = time.time() - provider_config['last_request_time']
|
125
|
+
delay = provider_config.get('rate_limit_delay', 1.0)
|
126
|
+
if elapsed < delay:
|
127
|
+
await asyncio.sleep(delay - elapsed)
|
128
|
+
|
129
|
+
provider_config['last_request_time'] = time.time()
|
130
|
+
|
131
|
+
try:
|
132
|
+
if self.current_provider == 'brave':
|
133
|
+
return await self._search_brave(query, max_results, country)
|
134
|
+
elif self.current_provider == 'aves':
|
135
|
+
return await self._search_aves(query, max_results)
|
136
|
+
else:
|
137
|
+
raise ValueError(f"Unknown provider: {self.current_provider}")
|
138
|
+
|
139
|
+
except Exception as e:
|
140
|
+
search_time = time.time() - start_time
|
141
|
+
|
142
|
+
# Mark provider as temporarily unavailable on certain errors
|
143
|
+
error_str = str(e).lower()
|
144
|
+
if any(keyword in error_str for keyword in ['429', 'rate limit', 'quota exceeded']):
|
145
|
+
self.providers[self.current_provider]['available'] = False
|
146
|
+
self.providers[self.current_provider]['cooldown_until'] = time.time() + 300
|
147
|
+
|
148
|
+
# Try fallback provider
|
149
|
+
fallback = self._try_fallback_provider()
|
150
|
+
if fallback:
|
151
|
+
return await self.search_web(query, max_results, country, language)
|
152
|
+
|
153
|
+
return SearchResponse(
|
154
|
+
query=query,
|
155
|
+
results=[],
|
156
|
+
total_results=0,
|
157
|
+
search_time=search_time,
|
158
|
+
provider=self.current_provider,
|
159
|
+
status="error",
|
160
|
+
error=str(e)
|
161
|
+
)
|
162
|
+
|
163
|
+
async def _search_brave(self, query: str, max_results: int, country: str) -> SearchResponse:
|
164
|
+
"""Search using Brave Search API"""
|
165
|
+
start_time = time.time()
|
166
|
+
|
167
|
+
provider_config = self.providers['brave']
|
168
|
+
|
169
|
+
headers = {
|
170
|
+
'Accept': 'application/json',
|
171
|
+
'Accept-Encoding': 'gzip',
|
172
|
+
'X-Subscription-Token': provider_config['api_key']
|
173
|
+
}
|
174
|
+
|
175
|
+
params = {
|
176
|
+
'q': query,
|
177
|
+
'count': min(max_results, 20),
|
178
|
+
'country': country,
|
179
|
+
'search_lang': 'en',
|
180
|
+
'ui_lang': 'en-US',
|
181
|
+
'freshness': 'pd'
|
182
|
+
}
|
183
|
+
|
184
|
+
try:
|
185
|
+
response = requests.get(
|
186
|
+
provider_config['base_url'],
|
187
|
+
headers=headers,
|
188
|
+
params=params,
|
189
|
+
timeout=15
|
190
|
+
)
|
191
|
+
|
192
|
+
if response.status_code == 429:
|
193
|
+
retry_after = response.headers.get('Retry-After', '300')
|
194
|
+
raise Exception(f"Rate limit exceeded. Retry after {retry_after} seconds")
|
195
|
+
elif response.status_code == 401:
|
196
|
+
raise Exception(f"Authentication failed - check Brave API key")
|
197
|
+
elif response.status_code == 403:
|
198
|
+
raise Exception(f"Brave API access forbidden - check subscription")
|
199
|
+
|
200
|
+
response.raise_for_status()
|
201
|
+
|
202
|
+
data = response.json()
|
203
|
+
search_time = time.time() - start_time
|
204
|
+
|
205
|
+
results = []
|
206
|
+
web_results = data.get('web', {}).get('results', [])
|
207
|
+
|
208
|
+
for i, result in enumerate(web_results[:max_results]):
|
209
|
+
results.append(SearchResult(
|
210
|
+
title=result.get('title', ''),
|
211
|
+
url=result.get('url', ''),
|
212
|
+
snippet=result.get('description', ''),
|
213
|
+
source='brave',
|
214
|
+
rank=i + 1,
|
215
|
+
score=1.0 - (i * 0.1),
|
216
|
+
timestamp=datetime.now()
|
217
|
+
))
|
218
|
+
|
219
|
+
return SearchResponse(
|
220
|
+
query=query,
|
221
|
+
results=results,
|
222
|
+
total_results=len(results),
|
223
|
+
search_time=search_time,
|
224
|
+
provider='brave',
|
225
|
+
status='success'
|
226
|
+
)
|
227
|
+
|
228
|
+
except Exception as e:
|
229
|
+
search_time = time.time() - start_time
|
230
|
+
raise Exception(f"Brave Search API error: {e}")
|
231
|
+
|
232
|
+
async def _search_aves(self, query: str, max_results: int) -> SearchResponse:
|
233
|
+
"""Search using AVES API"""
|
234
|
+
start_time = time.time()
|
235
|
+
|
236
|
+
provider_config = self.providers['aves']
|
237
|
+
|
238
|
+
headers = {
|
239
|
+
'User-Agent': 'AmbivoAgentSystem/1.0'
|
240
|
+
}
|
241
|
+
|
242
|
+
params = {
|
243
|
+
'apikey': provider_config['api_key'],
|
244
|
+
'type': 'web',
|
245
|
+
'query': query,
|
246
|
+
'device': 'desktop',
|
247
|
+
'output': 'json',
|
248
|
+
'num': min(max_results, 10)
|
249
|
+
}
|
250
|
+
|
251
|
+
try:
|
252
|
+
response = requests.get(
|
253
|
+
provider_config['base_url'],
|
254
|
+
headers=headers,
|
255
|
+
params=params,
|
256
|
+
timeout=15
|
257
|
+
)
|
258
|
+
|
259
|
+
if response.status_code == 403:
|
260
|
+
raise Exception(f"AVES API access forbidden - check API key or quota")
|
261
|
+
elif response.status_code == 401:
|
262
|
+
raise Exception(f"AVES API authentication failed - invalid API key")
|
263
|
+
elif response.status_code == 429:
|
264
|
+
raise Exception(f"AVES API rate limit exceeded")
|
265
|
+
|
266
|
+
response.raise_for_status()
|
267
|
+
|
268
|
+
data = response.json()
|
269
|
+
search_time = time.time() - start_time
|
270
|
+
|
271
|
+
results = []
|
272
|
+
|
273
|
+
result_section = data.get('result', {})
|
274
|
+
search_results = result_section.get('organic_results', [])
|
275
|
+
|
276
|
+
if not search_results:
|
277
|
+
search_results = data.get('organic_results',
|
278
|
+
data.get('results', data.get('items', data.get('data', []))))
|
279
|
+
|
280
|
+
for i, result in enumerate(search_results[:max_results]):
|
281
|
+
title = result.get('title', 'No Title')
|
282
|
+
url = result.get('url', result.get('link', result.get('href', '')))
|
283
|
+
snippet = result.get('description', result.get('snippet', result.get('summary', '')))
|
284
|
+
position = result.get('position', i + 1)
|
285
|
+
|
286
|
+
results.append(SearchResult(
|
287
|
+
title=title,
|
288
|
+
url=url,
|
289
|
+
snippet=snippet,
|
290
|
+
source='aves',
|
291
|
+
rank=position,
|
292
|
+
score=result.get('score', 1.0 - (i * 0.1)),
|
293
|
+
timestamp=datetime.now()
|
294
|
+
))
|
295
|
+
|
296
|
+
total_results_count = result_section.get('total_results', len(results))
|
297
|
+
|
298
|
+
return SearchResponse(
|
299
|
+
query=query,
|
300
|
+
results=results,
|
301
|
+
total_results=total_results_count,
|
302
|
+
search_time=search_time,
|
303
|
+
provider='aves',
|
304
|
+
status='success'
|
305
|
+
)
|
306
|
+
|
307
|
+
except Exception as e:
|
308
|
+
search_time = time.time() - start_time
|
309
|
+
raise Exception(f"AVES Search API error: {e}")
|
310
|
+
|
311
|
+
def _try_fallback_provider(self) -> bool:
|
312
|
+
"""Try to switch to a fallback provider"""
|
313
|
+
current_priority = self.providers[self.current_provider]['priority']
|
314
|
+
|
315
|
+
fallback_providers = [
|
316
|
+
(name, config) for name, config in self.providers.items()
|
317
|
+
if config['priority'] > current_priority and config.get('available', False)
|
318
|
+
]
|
319
|
+
|
320
|
+
if fallback_providers:
|
321
|
+
fallback_providers.sort(key=lambda x: x[1]['priority'])
|
322
|
+
self.current_provider = fallback_providers[0][0]
|
323
|
+
return True
|
324
|
+
|
325
|
+
return False
|
326
|
+
|
327
|
+
async def search_news(self, query: str, max_results: int = 10, days_back: int = 7) -> SearchResponse:
|
328
|
+
"""Search for news articles"""
|
329
|
+
news_query = f"{query} news latest recent"
|
330
|
+
return await self.search_web(news_query, max_results)
|
331
|
+
|
332
|
+
async def search_academic(self, query: str, max_results: int = 10) -> SearchResponse:
|
333
|
+
"""Search for academic content"""
|
334
|
+
academic_query = f"{query} research paper study academic"
|
335
|
+
return await self.search_web(academic_query, max_results)
|
336
|
+
|
337
|
+
|
338
|
+
class WebSearchAgent(BaseAgent, WebAgentHistoryMixin):
|
339
|
+
"""LLM-Aware Web Search Agent with conversation context and intelligent routing"""
|
340
|
+
|
341
|
+
def __init__(self, agent_id: str = None, memory_manager=None, llm_service=None, **kwargs):
|
342
|
+
if agent_id is None:
|
343
|
+
agent_id = f"search_{str(uuid.uuid4())[:8]}"
|
344
|
+
|
345
|
+
super().__init__(
|
346
|
+
agent_id=agent_id,
|
347
|
+
role=AgentRole.RESEARCHER,
|
348
|
+
memory_manager=memory_manager,
|
349
|
+
llm_service=llm_service,
|
350
|
+
name="Web Search Agent",
|
351
|
+
description="LLM-aware web search agent with conversation history",
|
352
|
+
**kwargs
|
353
|
+
)
|
354
|
+
|
355
|
+
# Initialize history mixin
|
356
|
+
self.setup_history_mixin()
|
357
|
+
|
358
|
+
# Initialize search service
|
359
|
+
try:
|
360
|
+
self.search_service = WebSearchServiceAdapter()
|
361
|
+
except Exception as e:
|
362
|
+
raise RuntimeError(f"Failed to initialize Web Search Service: {e}")
|
363
|
+
|
364
|
+
# Add web search tools
|
365
|
+
self._add_search_tools()
|
366
|
+
|
367
|
+
async def _llm_analyze_intent(self, user_message: str, conversation_context: str = "") -> Dict[str, Any]:
|
368
|
+
"""Use LLM to analyze user intent and extract relevant information"""
|
369
|
+
if not self.llm_service:
|
370
|
+
# Fallback to keyword-based analysis
|
371
|
+
return self._keyword_based_analysis(user_message)
|
372
|
+
|
373
|
+
prompt = f"""
|
374
|
+
Analyze this user message in the context of a web search conversation and extract:
|
375
|
+
1. Primary intent (search_general, search_news, search_academic, refine_search, help_request)
|
376
|
+
2. Search query/terms (clean and optimized for search)
|
377
|
+
3. Search type preferences (web, news, academic, images)
|
378
|
+
4. Context references (referring to previous searches, "this", "that", "more about")
|
379
|
+
5. Specific requirements (time range, source type, country, etc.)
|
380
|
+
|
381
|
+
Conversation Context:
|
382
|
+
{conversation_context}
|
383
|
+
|
384
|
+
Current User Message: {user_message}
|
385
|
+
|
386
|
+
Respond in JSON format:
|
387
|
+
{{
|
388
|
+
"primary_intent": "search_general|search_news|search_academic|refine_search|help_request",
|
389
|
+
"search_query": "optimized search terms",
|
390
|
+
"search_type": "web|news|academic",
|
391
|
+
"uses_context_reference": true/false,
|
392
|
+
"context_type": "previous_search|previous_result|general",
|
393
|
+
"requirements": {{
|
394
|
+
"time_range": "recent|specific_date|any",
|
395
|
+
"max_results": number,
|
396
|
+
"country": "country_code",
|
397
|
+
"language": "language_code"
|
398
|
+
}},
|
399
|
+
"confidence": 0.0-1.0
|
400
|
+
}}
|
401
|
+
"""
|
402
|
+
|
403
|
+
try:
|
404
|
+
response = await self.llm_service.generate_response(prompt)
|
405
|
+
# Try to parse JSON response
|
406
|
+
import re
|
407
|
+
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
408
|
+
if json_match:
|
409
|
+
return json.loads(json_match.group())
|
410
|
+
else:
|
411
|
+
# If LLM doesn't return JSON, extract key information
|
412
|
+
return self._extract_intent_from_llm_response(response, user_message)
|
413
|
+
except Exception as e:
|
414
|
+
# Fallback to keyword analysis
|
415
|
+
return self._keyword_based_analysis(user_message)
|
416
|
+
|
417
|
+
def _keyword_based_analysis(self, user_message: str) -> Dict[str, Any]:
|
418
|
+
"""Fallback keyword-based intent analysis"""
|
419
|
+
content_lower = user_message.lower()
|
420
|
+
|
421
|
+
# Determine intent
|
422
|
+
if any(word in content_lower for word in ['news', 'latest', 'recent', 'breaking']):
|
423
|
+
intent = 'search_news'
|
424
|
+
search_type = 'news'
|
425
|
+
elif any(word in content_lower for word in ['research', 'academic', 'paper', 'study', 'journal']):
|
426
|
+
intent = 'search_academic'
|
427
|
+
search_type = 'academic'
|
428
|
+
elif any(word in content_lower for word in ['search', 'find', 'look up', 'google']):
|
429
|
+
intent = 'search_general'
|
430
|
+
search_type = 'web'
|
431
|
+
elif any(word in content_lower for word in ['help', 'how to', 'what can']):
|
432
|
+
intent = 'help_request'
|
433
|
+
search_type = 'web'
|
434
|
+
else:
|
435
|
+
intent = 'search_general'
|
436
|
+
search_type = 'web'
|
437
|
+
|
438
|
+
# Extract query
|
439
|
+
query = self._extract_query_from_message(user_message)
|
440
|
+
|
441
|
+
# Check for context references
|
442
|
+
context_words = ['this', 'that', 'it', 'them', 'more', 'similar', 'related']
|
443
|
+
uses_context = any(word in content_lower for word in context_words)
|
444
|
+
|
445
|
+
return {
|
446
|
+
"primary_intent": intent,
|
447
|
+
"search_query": query,
|
448
|
+
"search_type": search_type,
|
449
|
+
"uses_context_reference": uses_context,
|
450
|
+
"context_type": "previous_search" if uses_context else "none",
|
451
|
+
"requirements": {
|
452
|
+
"time_range": "recent" if 'recent' in content_lower else "any",
|
453
|
+
"max_results": 5,
|
454
|
+
"country": "US",
|
455
|
+
"language": "en"
|
456
|
+
},
|
457
|
+
"confidence": 0.7
|
458
|
+
}
|
459
|
+
|
460
|
+
def _extract_intent_from_llm_response(self, llm_response: str, user_message: str) -> Dict[str, Any]:
|
461
|
+
"""Extract intent from LLM response that isn't JSON"""
|
462
|
+
# Simple extraction from LLM text response
|
463
|
+
content_lower = llm_response.lower()
|
464
|
+
|
465
|
+
if 'news' in content_lower:
|
466
|
+
intent = 'search_news'
|
467
|
+
search_type = 'news'
|
468
|
+
elif 'academic' in content_lower or 'research' in content_lower:
|
469
|
+
intent = 'search_academic'
|
470
|
+
search_type = 'academic'
|
471
|
+
else:
|
472
|
+
intent = 'search_general'
|
473
|
+
search_type = 'web'
|
474
|
+
|
475
|
+
return {
|
476
|
+
"primary_intent": intent,
|
477
|
+
"search_query": self._extract_query_from_message(user_message),
|
478
|
+
"search_type": search_type,
|
479
|
+
"uses_context_reference": False,
|
480
|
+
"context_type": "none",
|
481
|
+
"requirements": {"max_results": 5, "country": "US", "language": "en"},
|
482
|
+
"confidence": 0.6
|
483
|
+
}
|
484
|
+
|
485
|
+
async def process_message(self, message: AgentMessage, context: ExecutionContext = None) -> AgentMessage:
|
486
|
+
"""Process message with LLM-based intent detection and history context"""
|
487
|
+
self.memory.store_message(message)
|
488
|
+
|
489
|
+
try:
|
490
|
+
user_message = message.content
|
491
|
+
|
492
|
+
# Update conversation state
|
493
|
+
self.update_conversation_state(user_message)
|
494
|
+
|
495
|
+
# Get conversation context for LLM analysis
|
496
|
+
conversation_context = self._get_conversation_context_summary()
|
497
|
+
|
498
|
+
# Use LLM to analyze intent
|
499
|
+
intent_analysis = await self._llm_analyze_intent(user_message, conversation_context)
|
500
|
+
|
501
|
+
# Route request based on LLM analysis
|
502
|
+
response_content = await self._route_with_llm_analysis(intent_analysis, user_message, context)
|
503
|
+
|
504
|
+
response = self.create_response(
|
505
|
+
content=response_content,
|
506
|
+
recipient_id=message.sender_id,
|
507
|
+
session_id=message.session_id,
|
508
|
+
conversation_id=message.conversation_id
|
509
|
+
)
|
510
|
+
|
511
|
+
self.memory.store_message(response)
|
512
|
+
return response
|
513
|
+
|
514
|
+
except Exception as e:
|
515
|
+
error_response = self.create_response(
|
516
|
+
content=f"Web Search Agent error: {str(e)}",
|
517
|
+
recipient_id=message.sender_id,
|
518
|
+
message_type=MessageType.ERROR,
|
519
|
+
session_id=message.session_id,
|
520
|
+
conversation_id=message.conversation_id
|
521
|
+
)
|
522
|
+
return error_response
|
523
|
+
|
524
|
+
def _get_conversation_context_summary(self) -> str:
|
525
|
+
"""Get a summary of recent conversation for LLM context"""
|
526
|
+
try:
|
527
|
+
recent_history = self.get_conversation_history_with_context(limit=3,
|
528
|
+
context_types=[ContextType.SEARCH_TERM])
|
529
|
+
|
530
|
+
context_summary = []
|
531
|
+
for msg in recent_history:
|
532
|
+
if msg.get('message_type') == 'user_input':
|
533
|
+
content = msg.get('content', '')
|
534
|
+
extracted_context = msg.get('extracted_context', {})
|
535
|
+
search_terms = extracted_context.get('search_term', [])
|
536
|
+
|
537
|
+
if search_terms:
|
538
|
+
context_summary.append(f"Previous search: {search_terms[0]}")
|
539
|
+
else:
|
540
|
+
context_summary.append(f"Previous message: {content[:50]}...")
|
541
|
+
|
542
|
+
return "\n".join(context_summary) if context_summary else "No previous context"
|
543
|
+
except:
|
544
|
+
return "No previous context"
|
545
|
+
|
546
|
+
async def _route_with_llm_analysis(self, intent_analysis: Dict[str, Any], user_message: str,
|
547
|
+
context: ExecutionContext) -> str:
|
548
|
+
"""Route request based on LLM intent analysis"""
|
549
|
+
|
550
|
+
primary_intent = intent_analysis.get("primary_intent", "search_general")
|
551
|
+
search_query = intent_analysis.get("search_query", "")
|
552
|
+
search_type = intent_analysis.get("search_type", "web")
|
553
|
+
uses_context = intent_analysis.get("uses_context_reference", False)
|
554
|
+
requirements = intent_analysis.get("requirements", {})
|
555
|
+
|
556
|
+
# Handle context references
|
557
|
+
if uses_context and not search_query:
|
558
|
+
search_query = self._resolve_contextual_query(user_message)
|
559
|
+
|
560
|
+
# Route based on intent
|
561
|
+
if primary_intent == "help_request":
|
562
|
+
return await self._handle_help_request(user_message)
|
563
|
+
elif primary_intent == "search_news":
|
564
|
+
return await self._handle_news_search(search_query, requirements)
|
565
|
+
elif primary_intent == "search_academic":
|
566
|
+
return await self._handle_academic_search(search_query, requirements)
|
567
|
+
elif primary_intent == "refine_search":
|
568
|
+
return await self._handle_search_refinement(search_query, user_message)
|
569
|
+
else: # search_general
|
570
|
+
return await self._handle_general_search(search_query, requirements)
|
571
|
+
|
572
|
+
def _resolve_contextual_query(self, user_message: str) -> str:
|
573
|
+
"""Resolve contextual references to create a search query"""
|
574
|
+
recent_search = self.get_recent_search_term()
|
575
|
+
|
576
|
+
if recent_search:
|
577
|
+
# Check for refinement patterns
|
578
|
+
refinement_words = ['more', 'additional', 'other', 'similar', 'related', 'about this']
|
579
|
+
if any(word in user_message.lower() for word in refinement_words):
|
580
|
+
return f"{recent_search} {user_message.replace('this', '').replace('that', '').strip()}"
|
581
|
+
else:
|
582
|
+
return recent_search
|
583
|
+
|
584
|
+
return self._extract_query_from_message(user_message)
|
585
|
+
|
586
|
+
async def _handle_general_search_old(self, query: str, requirements: Dict[str, Any]) -> str:
|
587
|
+
"""Handle general web search"""
|
588
|
+
if not query:
|
589
|
+
return self._get_search_help_message()
|
590
|
+
|
591
|
+
try:
|
592
|
+
max_results = requirements.get("max_results", 5)
|
593
|
+
result = await self._search_web(query, max_results=max_results)
|
594
|
+
|
595
|
+
if result['success']:
|
596
|
+
return self._format_search_results(result, "General Search")
|
597
|
+
else:
|
598
|
+
return f"❌ **Search failed:** {result['error']}"
|
599
|
+
|
600
|
+
except Exception as e:
|
601
|
+
return f"❌ **Error during search:** {str(e)}"
|
602
|
+
|
603
|
+
async def _handle_news_search(self, query: str, requirements: Dict[str, Any]) -> str:
|
604
|
+
"""Handle news search"""
|
605
|
+
if not query:
|
606
|
+
return "I can search for news articles. What news topic are you interested in?"
|
607
|
+
|
608
|
+
try:
|
609
|
+
max_results = requirements.get("max_results", 5)
|
610
|
+
result = await self._search_news(query, max_results=max_results)
|
611
|
+
|
612
|
+
if result['success']:
|
613
|
+
return self._format_search_results(result, "News Search")
|
614
|
+
else:
|
615
|
+
return f"❌ **News search failed:** {result['error']}"
|
616
|
+
|
617
|
+
except Exception as e:
|
618
|
+
return f"❌ **Error during news search:** {str(e)}"
|
619
|
+
|
620
|
+
async def _handle_academic_search(self, query: str, requirements: Dict[str, Any]) -> str:
|
621
|
+
"""Handle academic search"""
|
622
|
+
if not query:
|
623
|
+
return "I can search for academic papers and research. What research topic are you looking for?"
|
624
|
+
|
625
|
+
try:
|
626
|
+
max_results = requirements.get("max_results", 5)
|
627
|
+
result = await self._search_academic(query, max_results=max_results)
|
628
|
+
|
629
|
+
if result['success']:
|
630
|
+
return self._format_search_results(result, "Academic Search")
|
631
|
+
else:
|
632
|
+
return f"❌ **Academic search failed:** {result['error']}"
|
633
|
+
|
634
|
+
except Exception as e:
|
635
|
+
return f"❌ **Error during academic search:** {str(e)}"
|
636
|
+
|
637
|
+
async def _handle_search_refinement(self, query: str, user_message: str) -> str:
|
638
|
+
"""Handle search refinement requests"""
|
639
|
+
recent_search = self.get_recent_search_term()
|
640
|
+
|
641
|
+
if recent_search:
|
642
|
+
refined_query = f"{recent_search} {query}".strip()
|
643
|
+
result = await self._search_web(refined_query, max_results=5)
|
644
|
+
|
645
|
+
if result['success']:
|
646
|
+
return f"🔍 **Refined Search Results**\n\n" \
|
647
|
+
f"**Original:** {recent_search}\n" \
|
648
|
+
f"**Refined:** {refined_query}\n\n" + \
|
649
|
+
self._format_search_results(result, "Refined Search", show_header=False)
|
650
|
+
else:
|
651
|
+
return f"❌ **Refined search failed:** {result['error']}"
|
652
|
+
else:
|
653
|
+
return await self._handle_general_search(query, {"max_results": 5})
|
654
|
+
|
655
|
+
async def _handle_help_request(self, user_message: str) -> str:
|
656
|
+
"""Handle help requests"""
|
657
|
+
return self._get_search_help_message()
|
658
|
+
|
659
|
+
def _format_search_results_old(self, result: Dict[str, Any], search_type: str, show_header: bool = True) -> str:
|
660
|
+
"""Format search results consistently"""
|
661
|
+
results = result.get('results', [])
|
662
|
+
query = result.get('query', '')
|
663
|
+
|
664
|
+
if show_header:
|
665
|
+
response = f"🔍 **{search_type} Results for:** {query}\n\n"
|
666
|
+
else:
|
667
|
+
response = ""
|
668
|
+
|
669
|
+
if results:
|
670
|
+
response += f"📊 **Found {len(results)} results:**\n\n"
|
671
|
+
for i, res in enumerate(results[:3], 1):
|
672
|
+
response += f"**{i}. {res['title']}**\n"
|
673
|
+
response += f"🔗 {res['url']}\n"
|
674
|
+
response += f"📝 {res['snippet'][:150]}...\n\n"
|
675
|
+
|
676
|
+
provider = result.get('provider', 'search engine')
|
677
|
+
search_time = result.get('search_time', 0)
|
678
|
+
response += f"⏱️ **Search completed in {search_time:.2f}s using {provider}**"
|
679
|
+
else:
|
680
|
+
response += "No results found. Try a different search term."
|
681
|
+
|
682
|
+
return response
|
683
|
+
|
684
|
+
def _format_search_results(self, result: Dict[str, Any], search_type: str, show_header: bool = True) -> str:
|
685
|
+
"""Format search results consistently - FIXED VERSION"""
|
686
|
+
results = result.get('results', [])
|
687
|
+
query = result.get('query', '')
|
688
|
+
|
689
|
+
if show_header:
|
690
|
+
response = f"🔍 **{search_type} Results for:** {query}\n\n"
|
691
|
+
else:
|
692
|
+
response = ""
|
693
|
+
|
694
|
+
if results:
|
695
|
+
response += f"📊 **Found {len(results)} results:**\n\n"
|
696
|
+
|
697
|
+
# FIXED: Safe iteration over results
|
698
|
+
for i, res in enumerate(results):
|
699
|
+
if i >= 3: # Limit to 3 results
|
700
|
+
break
|
701
|
+
|
702
|
+
# FIXED: Safe access to result properties
|
703
|
+
title = res.get('title', 'No title') or 'No title'
|
704
|
+
url = res.get('url', 'No URL') or 'No URL'
|
705
|
+
snippet = res.get('snippet', 'No description') or 'No description'
|
706
|
+
|
707
|
+
# FIXED: Safe string slicing
|
708
|
+
snippet_preview = str(snippet)[:150]
|
709
|
+
if len(str(snippet)) > 150:
|
710
|
+
snippet_preview += "..."
|
711
|
+
|
712
|
+
response += f"**{i + 1}. {title}**\n"
|
713
|
+
response += f"🔗 {url}\n"
|
714
|
+
response += f"📝 {snippet_preview}\n\n"
|
715
|
+
|
716
|
+
# FIXED: Safe access to result metadata
|
717
|
+
provider = result.get('provider', 'search engine')
|
718
|
+
search_time = result.get('search_time', 0)
|
719
|
+
|
720
|
+
# FIXED: Ensure search_time is a number
|
721
|
+
if not isinstance(search_time, (int, float)):
|
722
|
+
search_time = 0
|
723
|
+
|
724
|
+
response += f"⏱️ **Search completed in {search_time:.2f}s using {provider}**"
|
725
|
+
else:
|
726
|
+
response += "No results found. Try a different search term."
|
727
|
+
|
728
|
+
return response
|
729
|
+
|
730
|
+
async def _handle_general_search(self, query: str, requirements: Dict[str, Any]) -> str:
|
731
|
+
"""Handle general web search - FIXED VERSION"""
|
732
|
+
if not query:
|
733
|
+
return self._get_search_help_message()
|
734
|
+
|
735
|
+
try:
|
736
|
+
# FIXED: Safe access to requirements
|
737
|
+
max_results = requirements.get("max_results", 5)
|
738
|
+
if not isinstance(max_results, int) or max_results is None:
|
739
|
+
max_results = 5
|
740
|
+
|
741
|
+
result = await self._search_web(query, max_results=max_results)
|
742
|
+
|
743
|
+
if result['success']:
|
744
|
+
return self._format_search_results(result, "General Search")
|
745
|
+
else:
|
746
|
+
error_msg = result.get('error', 'Unknown error')
|
747
|
+
return f"❌ **Search failed:** {error_msg}"
|
748
|
+
|
749
|
+
except Exception as e:
|
750
|
+
return f"❌ **Error during search:** {str(e)}"
|
751
|
+
def _get_search_help_message(self) -> str:
|
752
|
+
"""Get contextual help message"""
|
753
|
+
recent_search = self.get_recent_search_term()
|
754
|
+
|
755
|
+
base_message = ("I'm your Web Search Agent! I can help you with:\n\n"
|
756
|
+
"🔍 **Web Search** - General information search\n"
|
757
|
+
"📰 **News Search** - Latest news and current events \n"
|
758
|
+
"🎓 **Academic Search** - Research papers and studies\n\n"
|
759
|
+
"💡 **Examples:**\n"
|
760
|
+
"• 'Search for AI trends in 2025'\n"
|
761
|
+
"• 'Find latest news about quantum computing'\n"
|
762
|
+
"• 'Look up machine learning research papers'\n")
|
763
|
+
|
764
|
+
if recent_search:
|
765
|
+
base_message += f"\n🎯 **Your last search:** {recent_search}\n"
|
766
|
+
base_message += "You can say things like 'more about this' or 'find similar topics'"
|
767
|
+
|
768
|
+
return base_message
|
769
|
+
|
770
|
+
def _extract_query_from_message(self, message: str) -> str:
|
771
|
+
"""Extract clean search query from message"""
|
772
|
+
# Remove common search prefixes
|
773
|
+
prefixes = ['search for', 'find', 'look up', 'search', 'find me', 'look for',
|
774
|
+
'google', 'search about', 'tell me about']
|
775
|
+
|
776
|
+
query = message.strip()
|
777
|
+
for prefix in prefixes:
|
778
|
+
if query.lower().startswith(prefix):
|
779
|
+
query = query[len(prefix):].strip()
|
780
|
+
break
|
781
|
+
|
782
|
+
return query
|
783
|
+
|
784
|
+
# Tool implementations
|
785
|
+
def _add_search_tools(self):
|
786
|
+
"""Add web search related tools"""
|
787
|
+
|
788
|
+
# General web search tool
|
789
|
+
self.add_tool(AgentTool(
|
790
|
+
name="search_web",
|
791
|
+
description="Search the web for information",
|
792
|
+
function=self._search_web,
|
793
|
+
parameters_schema={
|
794
|
+
"type": "object",
|
795
|
+
"properties": {
|
796
|
+
"query": {"type": "string", "description": "Search query"},
|
797
|
+
"max_results": {"type": "integer", "default": 10, "description": "Maximum number of results"},
|
798
|
+
"country": {"type": "string", "default": "US", "description": "Country for search results"},
|
799
|
+
"language": {"type": "string", "default": "en", "description": "Language for search results"}
|
800
|
+
},
|
801
|
+
"required": ["query"]
|
802
|
+
}
|
803
|
+
))
|
804
|
+
|
805
|
+
# News search tool
|
806
|
+
self.add_tool(AgentTool(
|
807
|
+
name="search_news",
|
808
|
+
description="Search for recent news articles",
|
809
|
+
function=self._search_news,
|
810
|
+
parameters_schema={
|
811
|
+
"type": "object",
|
812
|
+
"properties": {
|
813
|
+
"query": {"type": "string", "description": "News search query"},
|
814
|
+
"max_results": {"type": "integer", "default": 10, "description": "Maximum number of results"},
|
815
|
+
"days_back": {"type": "integer", "default": 7, "description": "How many days back to search"}
|
816
|
+
},
|
817
|
+
"required": ["query"]
|
818
|
+
}
|
819
|
+
))
|
820
|
+
|
821
|
+
# Academic search tool
|
822
|
+
self.add_tool(AgentTool(
|
823
|
+
name="search_academic",
|
824
|
+
description="Search for academic papers and research",
|
825
|
+
function=self._search_academic,
|
826
|
+
parameters_schema={
|
827
|
+
"type": "object",
|
828
|
+
"properties": {
|
829
|
+
"query": {"type": "string", "description": "Academic search query"},
|
830
|
+
"max_results": {"type": "integer", "default": 10, "description": "Maximum number of results"}
|
831
|
+
},
|
832
|
+
"required": ["query"]
|
833
|
+
}
|
834
|
+
))
|
835
|
+
|
836
|
+
async def _search_web(self, query: str, max_results: int = 10, country: str = "US", language: str = "en") -> Dict[
|
837
|
+
str, Any]:
|
838
|
+
"""Perform web search"""
|
839
|
+
try:
|
840
|
+
search_response = await self.search_service.search_web(
|
841
|
+
query=query,
|
842
|
+
max_results=max_results,
|
843
|
+
country=country,
|
844
|
+
language=language
|
845
|
+
)
|
846
|
+
|
847
|
+
if search_response.status == "success":
|
848
|
+
results_data = []
|
849
|
+
for result in search_response.results:
|
850
|
+
results_data.append({
|
851
|
+
"title": result.title,
|
852
|
+
"url": result.url,
|
853
|
+
"snippet": result.snippet,
|
854
|
+
"rank": result.rank,
|
855
|
+
"score": result.score
|
856
|
+
})
|
857
|
+
|
858
|
+
return {
|
859
|
+
"success": True,
|
860
|
+
"query": query,
|
861
|
+
"results": results_data,
|
862
|
+
"total_results": search_response.total_results,
|
863
|
+
"search_time": search_response.search_time,
|
864
|
+
"provider": search_response.provider
|
865
|
+
}
|
866
|
+
else:
|
867
|
+
return {
|
868
|
+
"success": False,
|
869
|
+
"error": search_response.error,
|
870
|
+
"provider": search_response.provider
|
871
|
+
}
|
872
|
+
|
873
|
+
except Exception as e:
|
874
|
+
return {"success": False, "error": str(e)}
|
875
|
+
|
876
|
+
async def _search_news(self, query: str, max_results: int = 10, days_back: int = 7) -> Dict[str, Any]:
|
877
|
+
"""Search for news articles"""
|
878
|
+
try:
|
879
|
+
search_response = await self.search_service.search_news(
|
880
|
+
query=query,
|
881
|
+
max_results=max_results,
|
882
|
+
days_back=days_back
|
883
|
+
)
|
884
|
+
|
885
|
+
return await self._format_search_response(search_response, "news")
|
886
|
+
|
887
|
+
except Exception as e:
|
888
|
+
return {"success": False, "error": str(e)}
|
889
|
+
|
890
|
+
async def _search_academic(self, query: str, max_results: int = 10) -> Dict[str, Any]:
|
891
|
+
"""Search for academic content"""
|
892
|
+
try:
|
893
|
+
search_response = await self.search_service.search_academic(
|
894
|
+
query=query,
|
895
|
+
max_results=max_results
|
896
|
+
)
|
897
|
+
|
898
|
+
return await self._format_search_response(search_response, "academic")
|
899
|
+
|
900
|
+
except Exception as e:
|
901
|
+
return {"success": False, "error": str(e)}
|
902
|
+
|
903
|
+
async def _format_search_response(self, search_response, search_type: str) -> Dict[str, Any]:
|
904
|
+
"""Format search response for consistent output"""
|
905
|
+
if search_response.status == "success":
|
906
|
+
results_data = []
|
907
|
+
for result in search_response.results:
|
908
|
+
results_data.append({
|
909
|
+
"title": result.title,
|
910
|
+
"url": result.url,
|
911
|
+
"snippet": result.snippet,
|
912
|
+
"rank": result.rank,
|
913
|
+
"score": result.score,
|
914
|
+
"source": result.source
|
915
|
+
})
|
916
|
+
|
917
|
+
return {
|
918
|
+
"success": True,
|
919
|
+
"search_type": search_type,
|
920
|
+
"query": search_response.query,
|
921
|
+
"results": results_data,
|
922
|
+
"total_results": search_response.total_results,
|
923
|
+
"search_time": search_response.search_time,
|
924
|
+
"provider": search_response.provider
|
925
|
+
}
|
926
|
+
else:
|
927
|
+
return {
|
928
|
+
"success": False,
|
929
|
+
"search_type": search_type,
|
930
|
+
"error": search_response.error,
|
931
|
+
"provider": search_response.provider
|
932
|
+
}
|
933
|
+
|