aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,773 @@
1
+ """
2
+ Core SearchTool Implementation
3
+
4
+ Enhanced Google Custom Search Tool with quality analysis, intent understanding,
5
+ intelligent caching, and comprehensive metrics.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ import time
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from pydantic import BaseModel, Field, ConfigDict
15
+
16
+ from aiecs.tools.base_tool import BaseTool
17
+ from aiecs.tools.tool_executor import cache_result_with_strategy
18
+ from aiecs.config.config import get_settings
19
+
20
+ # Import Google API with graceful fallback
21
+ try:
22
+ from googleapiclient.discovery import build
23
+ from googleapiclient.errors import HttpError
24
+ from google.auth.exceptions import GoogleAuthError
25
+ from google.oauth2 import service_account
26
+ GOOGLE_API_AVAILABLE = True
27
+ except ImportError:
28
+ GOOGLE_API_AVAILABLE = False
29
+ HttpError = Exception
30
+ GoogleAuthError = Exception
31
+
32
+ # Import search tool components
33
+ from .constants import *
34
+ from .rate_limiter import RateLimiter, CircuitBreaker
35
+ from .analyzers import ResultQualityAnalyzer, QueryIntentAnalyzer, ResultSummarizer
36
+ from .deduplicator import ResultDeduplicator
37
+ from .context import SearchContext
38
+ from .cache import IntelligentCache
39
+ from .metrics import EnhancedMetrics
40
+ from .error_handler import AgentFriendlyErrorHandler
41
+ from .schemas import (
42
+ SearchWebSchema,
43
+ SearchImagesSchema,
44
+ SearchNewsSchema,
45
+ SearchVideosSchema,
46
+ SearchPaginatedSchema,
47
+ SearchBatchSchema,
48
+ ValidateCredentialsSchema,
49
+ GetQuotaStatusSchema,
50
+ GetMetricsSchema,
51
+ GetMetricsReportSchema,
52
+ GetHealthScoreSchema,
53
+ GetSearchContextSchema,
54
+ )
55
+
56
+
57
+ class SearchTool(BaseTool):
58
+ """
59
+ Enhanced web search tool using Google Custom Search API.
60
+
61
+ Provides intelligent search with:
62
+ - Quality scoring and ranking
63
+ - Query intent analysis
64
+ - Result deduplication
65
+ - Context-aware search
66
+ - Intelligent Redis caching
67
+ - Comprehensive metrics
68
+ - Agent-friendly error handling
69
+ """
70
+
71
+ # Configuration schema
72
+ class Config(BaseModel):
73
+ """Configuration for the search tool"""
74
+ model_config = ConfigDict(env_prefix="SEARCH_TOOL_")
75
+
76
+ google_api_key: Optional[str] = Field(
77
+ default=None,
78
+ description="Google API key for Custom Search"
79
+ )
80
+ google_cse_id: Optional[str] = Field(
81
+ default=None,
82
+ description="Custom Search Engine ID"
83
+ )
84
+ google_application_credentials: Optional[str] = Field(
85
+ default=None,
86
+ description="Path to service account JSON"
87
+ )
88
+ max_results_per_query: int = Field(
89
+ default=10,
90
+ description="Maximum results per single query"
91
+ )
92
+ cache_ttl: int = Field(
93
+ default=3600,
94
+ description="Default cache time-to-live in seconds"
95
+ )
96
+ rate_limit_requests: int = Field(
97
+ default=100,
98
+ description="Maximum requests per time window"
99
+ )
100
+ rate_limit_window: int = Field(
101
+ default=86400,
102
+ description="Time window for rate limiting in seconds"
103
+ )
104
+ circuit_breaker_threshold: int = Field(
105
+ default=5,
106
+ description="Failures before opening circuit"
107
+ )
108
+ circuit_breaker_timeout: int = Field(
109
+ default=60,
110
+ description="Timeout before trying half-open in seconds"
111
+ )
112
+ retry_attempts: int = Field(
113
+ default=3,
114
+ description="Number of retry attempts"
115
+ )
116
+ retry_backoff: float = Field(
117
+ default=2.0,
118
+ description="Exponential backoff factor"
119
+ )
120
+ timeout: int = Field(
121
+ default=30,
122
+ description="API request timeout in seconds"
123
+ )
124
+ user_agent: str = Field(
125
+ default="AIECS-SearchTool/2.0",
126
+ description="User agent string"
127
+ )
128
+
129
+ # Enhanced features
130
+ enable_quality_analysis: bool = Field(
131
+ default=True,
132
+ description="Enable result quality analysis"
133
+ )
134
+ enable_intent_analysis: bool = Field(
135
+ default=True,
136
+ description="Enable query intent analysis"
137
+ )
138
+ enable_deduplication: bool = Field(
139
+ default=True,
140
+ description="Enable result deduplication"
141
+ )
142
+ enable_context_tracking: bool = Field(
143
+ default=True,
144
+ description="Enable search context tracking"
145
+ )
146
+ enable_intelligent_cache: bool = Field(
147
+ default=True,
148
+ description="Enable intelligent Redis caching"
149
+ )
150
+ similarity_threshold: float = Field(
151
+ default=0.85,
152
+ description="Similarity threshold for deduplication"
153
+ )
154
+ max_search_history: int = Field(
155
+ default=10,
156
+ description="Maximum search history to maintain"
157
+ )
158
+
159
+ # Operation schemas for input validation and documentation
160
+ SearchWebSchema = SearchWebSchema
161
+ SearchImagesSchema = SearchImagesSchema
162
+ SearchNewsSchema = SearchNewsSchema
163
+ SearchVideosSchema = SearchVideosSchema
164
+ SearchPaginatedSchema = SearchPaginatedSchema
165
+ SearchBatchSchema = SearchBatchSchema
166
+ ValidateCredentialsSchema = ValidateCredentialsSchema
167
+ GetQuotaStatusSchema = GetQuotaStatusSchema
168
+ GetMetricsSchema = GetMetricsSchema
169
+ GetMetricsReportSchema = GetMetricsReportSchema
170
+ GetHealthScoreSchema = GetHealthScoreSchema
171
+ GetSearchContextSchema = GetSearchContextSchema
172
+
173
+ # Tool metadata
174
+ description = "Comprehensive web search tool using Google Custom Search API."
175
+ category = "task"
176
+
177
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
178
+ """
179
+ Initialize SearchTool with enhanced capabilities.
180
+
181
+ Args:
182
+ config: Optional configuration overrides
183
+
184
+ Raises:
185
+ AuthenticationError: If Google API libraries not available
186
+ ValidationError: If configuration is invalid
187
+ """
188
+ super().__init__(config)
189
+
190
+ if not GOOGLE_API_AVAILABLE:
191
+ raise AuthenticationError(
192
+ "Google API client libraries not available. "
193
+ "Install with: pip install google-api-python-client google-auth google-auth-httplib2"
194
+ )
195
+
196
+ # Load settings
197
+ global_settings = get_settings()
198
+
199
+ # Merge configuration
200
+ merged_config = {
201
+ 'google_api_key': global_settings.google_api_key,
202
+ 'google_cse_id': global_settings.google_cse_id,
203
+ 'google_application_credentials': global_settings.google_application_credentials
204
+ }
205
+ if config:
206
+ merged_config.update(config)
207
+
208
+ # Parse configuration
209
+ self.config = self.Config(**merged_config)
210
+
211
+ # Initialize logger
212
+ self.logger = logging.getLogger(__name__)
213
+ if not self.logger.handlers:
214
+ handler = logging.StreamHandler()
215
+ handler.setFormatter(
216
+ logging.Formatter('%(asctime)s %(levelname)s [SearchTool] %(message)s')
217
+ )
218
+ self.logger.addHandler(handler)
219
+ self.logger.setLevel(logging.INFO)
220
+
221
+ # Initialize API client
222
+ self._service = None
223
+ self._credentials = None
224
+ self._init_credentials()
225
+
226
+ # Initialize core components
227
+ self.rate_limiter = RateLimiter(
228
+ self.config.rate_limit_requests,
229
+ self.config.rate_limit_window
230
+ )
231
+
232
+ self.circuit_breaker = CircuitBreaker(
233
+ self.config.circuit_breaker_threshold,
234
+ self.config.circuit_breaker_timeout
235
+ )
236
+
237
+ # Initialize enhanced components
238
+ self.quality_analyzer = ResultQualityAnalyzer() if self.config.enable_quality_analysis else None
239
+ self.intent_analyzer = QueryIntentAnalyzer() if self.config.enable_intent_analysis else None
240
+ self.deduplicator = ResultDeduplicator() if self.config.enable_deduplication else None
241
+ self.result_summarizer = ResultSummarizer() if self.config.enable_quality_analysis else None
242
+ self.search_context = SearchContext(self.config.max_search_history) if self.config.enable_context_tracking else None
243
+ self.error_handler = AgentFriendlyErrorHandler()
244
+
245
+ # Initialize intelligent cache (Redis)
246
+ self.intelligent_cache = None
247
+ if self.config.enable_intelligent_cache:
248
+ try:
249
+ from aiecs.infrastructure.persistence import RedisClient
250
+ redis_client = RedisClient()
251
+ # Note: Redis client needs to be initialized asynchronously
252
+ self.intelligent_cache = IntelligentCache(redis_client, enabled=True)
253
+ except Exception as e:
254
+ self.logger.warning(f"Could not initialize Redis cache: {e}")
255
+ self.intelligent_cache = IntelligentCache(None, enabled=False)
256
+
257
+ # Initialize enhanced metrics
258
+ self.metrics = EnhancedMetrics()
259
+
260
+ self.logger.info("SearchTool initialized with enhanced capabilities")
261
+
262
+ def _create_search_ttl_strategy(self):
263
+ """
264
+ Create intelligent TTL strategy for search results.
265
+
266
+ This strategy calculates TTL based on:
267
+ 1. Query intent type (from result metadata)
268
+ 2. Result freshness score
269
+ 3. Result quality score
270
+
271
+ Returns:
272
+ Callable: TTL strategy function compatible with cache_result_with_strategy
273
+ """
274
+ def calculate_search_ttl(result: Any, args: tuple, kwargs: dict) -> int:
275
+ """
276
+ Calculate intelligent TTL for search results.
277
+
278
+ Args:
279
+ result: Search result (dict with 'results' and '_metadata')
280
+ args: Positional arguments (not used)
281
+ kwargs: Keyword arguments containing 'query', etc.
282
+
283
+ Returns:
284
+ int: TTL in seconds
285
+ """
286
+ # Extract metadata from result
287
+ if not isinstance(result, dict):
288
+ return 3600 # Default 1 hour for non-dict results
289
+
290
+ metadata = result.get('_metadata', {})
291
+ intent_type = metadata.get('intent_type', 'GENERAL')
292
+ results_list = result.get('results', [])
293
+ query = kwargs.get('query', '')
294
+
295
+ # Use IntelligentCache logic if available
296
+ if hasattr(self, 'intelligent_cache') and self.intelligent_cache:
297
+ try:
298
+ return self.intelligent_cache.calculate_ttl(
299
+ query,
300
+ intent_type,
301
+ results_list
302
+ )
303
+ except Exception as e:
304
+ self.logger.warning(f"Failed to calculate intelligent TTL: {e}")
305
+
306
+ # Fallback: Use intent-based TTL
307
+ from .cache import IntelligentCache
308
+ ttl_strategies = IntelligentCache.TTL_STRATEGIES
309
+ base_ttl = ttl_strategies.get(intent_type, ttl_strategies.get('GENERAL', 3600))
310
+
311
+ # Adjust based on result count
312
+ if not results_list:
313
+ return base_ttl // 2 # Shorter TTL for empty results
314
+
315
+ return base_ttl
316
+
317
+ return calculate_search_ttl
318
+
319
+ def _init_credentials(self):
320
+ """Initialize Google API credentials"""
321
+ # Method 1: API Key
322
+ if self.config.google_api_key and self.config.google_cse_id:
323
+ try:
324
+ self._service = build(
325
+ 'customsearch',
326
+ 'v1',
327
+ developerKey=self.config.google_api_key,
328
+ cache_discovery=False
329
+ )
330
+ self.logger.info("Initialized with API key")
331
+ return
332
+ except Exception as e:
333
+ self.logger.warning(f"Failed to initialize with API key: {e}")
334
+
335
+ # Method 2: Service Account
336
+ if self.config.google_application_credentials:
337
+ creds_path = self.config.google_application_credentials
338
+ if os.path.exists(creds_path):
339
+ try:
340
+ credentials = service_account.Credentials.from_service_account_file(
341
+ creds_path,
342
+ scopes=['https://www.googleapis.com/auth/cse']
343
+ )
344
+ self._credentials = credentials
345
+ self._service = build(
346
+ 'customsearch',
347
+ 'v1',
348
+ credentials=credentials,
349
+ cache_discovery=False
350
+ )
351
+ self.logger.info("Initialized with service account")
352
+ return
353
+ except Exception as e:
354
+ self.logger.warning(f"Failed to initialize with service account: {e}")
355
+
356
+ raise AuthenticationError(
357
+ "No valid Google API credentials found. Set GOOGLE_API_KEY and GOOGLE_CSE_ID"
358
+ )
359
+
360
+ def _execute_search(
361
+ self,
362
+ query: str,
363
+ num_results: int = 10,
364
+ start_index: int = 1,
365
+ **kwargs
366
+ ) -> Dict[str, Any]:
367
+ """Execute search with rate limiting and circuit breaker"""
368
+ # Check rate limit
369
+ self.rate_limiter.acquire()
370
+
371
+ # Prepare parameters
372
+ search_params = {
373
+ 'q': query,
374
+ 'cx': self.config.google_cse_id,
375
+ 'num': min(num_results, 10),
376
+ 'start': start_index,
377
+ **kwargs
378
+ }
379
+
380
+ # Execute with circuit breaker
381
+ def _do_search():
382
+ try:
383
+ result = self._service.cse().list(**search_params).execute()
384
+ return result
385
+ except HttpError as e:
386
+ if e.resp.status == 429:
387
+ raise QuotaExceededError(f"API quota exceeded: {e}")
388
+ elif e.resp.status == 403:
389
+ raise AuthenticationError(f"Authentication failed: {e}")
390
+ else:
391
+ raise SearchAPIError(f"Search API error: {e}")
392
+ except Exception as e:
393
+ raise SearchAPIError(f"Unexpected error: {e}")
394
+
395
+ return self.circuit_breaker.call(_do_search)
396
+
397
+ def _retry_with_backoff(self, func, *args, **kwargs) -> Any:
398
+ """Execute with exponential backoff retry"""
399
+ last_exception = None
400
+
401
+ for attempt in range(self.config.retry_attempts):
402
+ try:
403
+ return func(*args, **kwargs)
404
+ except (RateLimitError, CircuitBreakerOpenError) as e:
405
+ # Don't retry these
406
+ raise e
407
+ except Exception as e:
408
+ last_exception = e
409
+ if attempt < self.config.retry_attempts - 1:
410
+ wait_time = self.config.retry_backoff ** attempt
411
+ self.logger.warning(
412
+ f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s..."
413
+ )
414
+ time.sleep(wait_time)
415
+
416
+ raise last_exception
417
+
418
+ def _parse_search_results(
419
+ self,
420
+ raw_results: Dict[str, Any],
421
+ query: str = "",
422
+ enable_quality_analysis: bool = True
423
+ ) -> List[Dict[str, Any]]:
424
+ """Parse and enhance search results"""
425
+ items = raw_results.get('items', [])
426
+ results = []
427
+
428
+ for position, item in enumerate(items, start=1):
429
+ result = {
430
+ 'title': item.get('title', ''),
431
+ 'link': item.get('link', ''),
432
+ 'snippet': item.get('snippet', ''),
433
+ 'displayLink': item.get('displayLink', ''),
434
+ 'formattedUrl': item.get('formattedUrl', ''),
435
+ }
436
+
437
+ # Add image metadata
438
+ if 'image' in item:
439
+ result['image'] = {
440
+ 'contextLink': item['image'].get('contextLink', ''),
441
+ 'height': item['image'].get('height', 0),
442
+ 'width': item['image'].get('width', 0),
443
+ 'byteSize': item['image'].get('byteSize', 0),
444
+ 'thumbnailLink': item['image'].get('thumbnailLink', '')
445
+ }
446
+
447
+ # Add page metadata
448
+ if 'pagemap' in item:
449
+ result['metadata'] = item['pagemap']
450
+
451
+ # Add quality analysis
452
+ if enable_quality_analysis and self.quality_analyzer and query:
453
+ quality_analysis = self.quality_analyzer.analyze_result_quality(
454
+ result, query, position
455
+ )
456
+ result['_quality'] = quality_analysis
457
+
458
+ # Add agent-friendly quality summary
459
+ result['_quality_summary'] = {
460
+ 'score': quality_analysis['quality_score'],
461
+ 'level': quality_analysis['credibility_level'],
462
+ 'is_authoritative': quality_analysis['authority_score'] > 0.8,
463
+ 'is_relevant': quality_analysis['relevance_score'] > 0.7,
464
+ 'is_fresh': quality_analysis['freshness_score'] > 0.7,
465
+ 'warnings_count': len(quality_analysis['warnings'])
466
+ }
467
+
468
+ results.append(result)
469
+
470
+ return results
471
+
472
+ # ========================================================================
473
+ # Core Search Methods
474
+ # ========================================================================
475
+
476
+ @cache_result_with_strategy(ttl_strategy=lambda self, result, args, kwargs:
477
+ self._create_search_ttl_strategy()(result, args, kwargs))
478
+ def search_web(
479
+ self,
480
+ query: str,
481
+ num_results: int = 10,
482
+ start_index: int = 1,
483
+ language: str = "en",
484
+ country: str = "us",
485
+ safe_search: str = "medium",
486
+ date_restrict: Optional[str] = None,
487
+ file_type: Optional[str] = None,
488
+ exclude_terms: Optional[str] = None,
489
+ auto_enhance: bool = True,
490
+ return_summary: bool = False
491
+ ) -> Dict[str, Any]:
492
+ """
493
+ Search the web with enhanced intelligence.
494
+
495
+ Args:
496
+ query: Search query string
497
+ num_results: Number of results to return
498
+ start_index: Starting index for pagination
499
+ language: Language code
500
+ country: Country code
501
+ safe_search: Safe search level
502
+ date_restrict: Date restriction
503
+ file_type: File type filter
504
+ exclude_terms: Terms to exclude
505
+ auto_enhance: Enable automatic query enhancement
506
+ return_summary: Return summary metadata
507
+
508
+ Returns:
509
+ List of search results (or dict with results and summary)
510
+ """
511
+ start_time = time.time()
512
+ intent_analysis = None
513
+
514
+ try:
515
+ if not query or not query.strip():
516
+ raise ValidationError("Query cannot be empty")
517
+
518
+ if num_results < 1 or num_results > 100:
519
+ raise ValidationError("num_results must be between 1 and 100")
520
+
521
+ # Analyze query intent
522
+ enhanced_query = query
523
+ if auto_enhance and self.intent_analyzer:
524
+ intent_analysis = self.intent_analyzer.analyze_query_intent(query)
525
+ enhanced_query = intent_analysis['enhanced_query']
526
+
527
+ # Merge suggested parameters
528
+ for param, value in intent_analysis['suggested_params'].items():
529
+ if param == 'date_restrict' and not date_restrict:
530
+ date_restrict = value
531
+ elif param == 'file_type' and not file_type:
532
+ file_type = value
533
+ elif param == 'num_results':
534
+ num_results = min(num_results, value)
535
+
536
+ self.logger.info(
537
+ f"Intent: {intent_analysis['intent_type']} "
538
+ f"(confidence: {intent_analysis['confidence']:.2f})"
539
+ )
540
+
541
+ # Note: Cache is now handled by @cache_result_with_strategy decorator
542
+ # No need for manual cache check here
543
+
544
+ # Prepare search parameters
545
+ search_params = {
546
+ 'lr': f'lang_{language}',
547
+ 'cr': f'country{country.upper()}',
548
+ 'safe': safe_search,
549
+ }
550
+
551
+ if date_restrict:
552
+ search_params['dateRestrict'] = date_restrict
553
+
554
+ if file_type:
555
+ search_params['fileType'] = file_type
556
+
557
+ if exclude_terms:
558
+ enhanced_query = f"{enhanced_query} -{exclude_terms}"
559
+
560
+ # Execute search
561
+ raw_results = self._retry_with_backoff(
562
+ self._execute_search,
563
+ enhanced_query,
564
+ num_results,
565
+ start_index,
566
+ **search_params
567
+ )
568
+
569
+ # Parse results
570
+ results = self._parse_search_results(
571
+ raw_results,
572
+ query=query,
573
+ enable_quality_analysis=self.config.enable_quality_analysis
574
+ )
575
+
576
+ # Deduplicate
577
+ if self.deduplicator:
578
+ results = self.deduplicator.deduplicate_results(
579
+ results,
580
+ self.config.similarity_threshold
581
+ )
582
+
583
+ # Add search metadata
584
+ if intent_analysis:
585
+ for result in results:
586
+ result['_search_metadata'] = {
587
+ 'original_query': query,
588
+ 'enhanced_query': enhanced_query,
589
+ 'intent_type': intent_analysis['intent_type'],
590
+ 'intent_confidence': intent_analysis['confidence'],
591
+ 'suggestions': intent_analysis['suggestions']
592
+ }
593
+
594
+ # Update context
595
+ if self.search_context:
596
+ self.search_context.add_search(query, results)
597
+
598
+ # Note: Cache is now handled by @cache_result_with_strategy decorator
599
+ # The decorator will call _create_search_ttl_strategy() to calculate TTL
600
+
601
+ # Record metrics
602
+ response_time = (time.time() - start_time) * 1000
603
+ self.metrics.record_search(
604
+ query, 'web', results, response_time, cached=False
605
+ )
606
+
607
+ # Prepare result with metadata for TTL calculation
608
+ result_data = {
609
+ 'results': results,
610
+ '_metadata': {
611
+ 'intent_type': intent_analysis['intent_type'] if intent_analysis else 'GENERAL',
612
+ 'query': query,
613
+ 'enhanced_query': enhanced_query,
614
+ 'timestamp': time.time(),
615
+ 'response_time_ms': response_time
616
+ }
617
+ }
618
+
619
+ # Generate summary if requested
620
+ if return_summary and self.result_summarizer:
621
+ summary = self.result_summarizer.generate_summary(results, query)
622
+ result_data['summary'] = summary
623
+
624
+ return result_data
625
+
626
+ except Exception as e:
627
+ response_time = (time.time() - start_time) * 1000
628
+ self.metrics.record_search(
629
+ query, 'web', [], response_time, error=e
630
+ )
631
+
632
+ # Format error for agent
633
+ error_info = self.error_handler.format_error_for_agent(
634
+ e,
635
+ {'circuit_breaker_timeout': self.config.circuit_breaker_timeout}
636
+ )
637
+
638
+ self.logger.error(f"Search failed: {error_info['user_message']}")
639
+ raise
640
+
641
+ def search_images(
642
+ self,
643
+ query: str,
644
+ num_results: int = 10,
645
+ image_size: Optional[str] = None,
646
+ image_type: Optional[str] = None,
647
+ image_color_type: Optional[str] = None,
648
+ safe_search: str = "medium"
649
+ ) -> List[Dict[str, Any]]:
650
+ """Search for images"""
651
+ if not query or not query.strip():
652
+ raise ValidationError("Query cannot be empty")
653
+
654
+ search_params = {
655
+ 'searchType': 'image',
656
+ 'safe': safe_search,
657
+ }
658
+
659
+ if image_size:
660
+ search_params['imgSize'] = image_size
661
+ if image_type:
662
+ search_params['imgType'] = image_type
663
+ if image_color_type:
664
+ search_params['imgColorType'] = image_color_type
665
+
666
+ raw_results = self._retry_with_backoff(
667
+ self._execute_search,
668
+ query,
669
+ num_results,
670
+ 1,
671
+ **search_params
672
+ )
673
+
674
+ return self._parse_search_results(raw_results, query=query)
675
+
676
+ def search_news(
677
+ self,
678
+ query: str,
679
+ num_results: int = 10,
680
+ start_index: int = 1,
681
+ language: str = "en",
682
+ date_restrict: Optional[str] = None,
683
+ sort_by: str = "date"
684
+ ) -> List[Dict[str, Any]]:
685
+ """Search for news articles"""
686
+ if not query or not query.strip():
687
+ raise ValidationError("Query cannot be empty")
688
+
689
+ news_query = f"{query} news"
690
+
691
+ search_params = {
692
+ 'lr': f'lang_{language}',
693
+ 'sort': sort_by if sort_by == 'date' else '',
694
+ }
695
+
696
+ if date_restrict:
697
+ search_params['dateRestrict'] = date_restrict
698
+
699
+ raw_results = self._retry_with_backoff(
700
+ self._execute_search,
701
+ news_query,
702
+ num_results,
703
+ start_index,
704
+ **search_params
705
+ )
706
+
707
+ return self._parse_search_results(raw_results, query=query)
708
+
709
+ def search_videos(
710
+ self,
711
+ query: str,
712
+ num_results: int = 10,
713
+ start_index: int = 1,
714
+ language: str = "en",
715
+ safe_search: str = "medium"
716
+ ) -> List[Dict[str, Any]]:
717
+ """Search for videos"""
718
+ if not query or not query.strip():
719
+ raise ValidationError("Query cannot be empty")
720
+
721
+ video_query = f"{query} filetype:mp4 OR filetype:webm OR filetype:mov"
722
+
723
+ search_params = {
724
+ 'lr': f'lang_{language}',
725
+ 'safe': safe_search,
726
+ }
727
+
728
+ raw_results = self._retry_with_backoff(
729
+ self._execute_search,
730
+ video_query,
731
+ num_results,
732
+ start_index,
733
+ **search_params
734
+ )
735
+
736
+ return self._parse_search_results(raw_results, query=query)
737
+
738
+ # ========================================================================
739
+ # Utility Methods
740
+ # ========================================================================
741
+
742
+ def get_metrics(self) -> Dict[str, Any]:
743
+ """Get comprehensive metrics"""
744
+ return self.metrics.get_metrics()
745
+
746
+ def get_metrics_report(self) -> str:
747
+ """Get human-readable metrics report"""
748
+ return self.metrics.generate_report()
749
+
750
+ def get_health_score(self) -> float:
751
+ """Get system health score (0-1)"""
752
+ return self.metrics.get_health_score()
753
+
754
+ def get_quota_status(self) -> Dict[str, Any]:
755
+ """Get quota and rate limit status"""
756
+ return {
757
+ 'remaining_quota': self.rate_limiter.get_remaining_quota(),
758
+ 'max_requests': self.config.rate_limit_requests,
759
+ 'time_window_seconds': self.config.rate_limit_window,
760
+ 'circuit_breaker_state': self.circuit_breaker.get_state(),
761
+ 'health_score': self.get_health_score()
762
+ }
763
+
764
+ def get_search_context(self) -> Optional[Dict[str, Any]]:
765
+ """Get search context information"""
766
+ if not self.search_context:
767
+ return None
768
+
769
+ return {
770
+ 'history': self.search_context.get_history(5),
771
+ 'preferences': self.search_context.get_preferences()
772
+ }
773
+