aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,1123 +0,0 @@
1
- """
2
- Google Custom Search Tool for AI Agents
3
-
4
- A comprehensive, production-ready web search tool that integrates Google Custom Search API
5
- with advanced features including multiple search types, pagination, rate limiting, circuit
6
- breaker pattern, caching, and full AIECS architecture compliance.
7
-
8
- Features:
9
- - Multiple search types: web, image, news, video
10
- - Dual authentication: API key and service account
11
- - Rate limiting with token bucket algorithm
12
- - Circuit breaker pattern for API resilience
13
- - Intelligent caching with TTL
14
- - Comprehensive error handling and retry logic
15
- - Batch and paginated search support
16
- """
17
-
18
- import asyncio
19
- import hashlib
20
- import json
21
- import logging
22
- import os
23
- import time
24
- from collections import deque
25
- from datetime import datetime, timedelta
26
- from enum import Enum
27
- from threading import Lock
28
- from typing import Any, Dict, List, Optional, Union
29
-
30
- from pydantic import BaseModel, Field, ValidationError as PydanticValidationError, ConfigDict
31
-
32
- from aiecs.tools import register_tool
33
- from aiecs.tools.base_tool import BaseTool
34
- from aiecs.config.config import get_settings
35
-
36
- # Google API imports with graceful fallback
37
- try:
38
- from googleapiclient.discovery import build
39
- from googleapiclient.errors import HttpError
40
- from google.auth.exceptions import GoogleAuthError
41
- from google.oauth2 import service_account
42
- from google.auth.transport.requests import Request
43
- GOOGLE_API_AVAILABLE = True
44
- except ImportError:
45
- GOOGLE_API_AVAILABLE = False
46
- HttpError = Exception
47
- GoogleAuthError = Exception
48
-
49
-
50
- # ============================================================================
51
- # Enums and Constants
52
- # ============================================================================
53
-
54
- class SearchType(str, Enum):
55
- """Supported search types"""
56
- WEB = "web"
57
- IMAGE = "image"
58
- NEWS = "news"
59
- VIDEO = "video"
60
-
61
-
62
- class SafeSearch(str, Enum):
63
- """Safe search levels"""
64
- OFF = "off"
65
- MEDIUM = "medium"
66
- HIGH = "high"
67
-
68
-
69
- class ImageSize(str, Enum):
70
- """Image size filters"""
71
- ICON = "icon"
72
- SMALL = "small"
73
- MEDIUM = "medium"
74
- LARGE = "large"
75
- XLARGE = "xlarge"
76
- XXLARGE = "xxlarge"
77
- HUGE = "huge"
78
-
79
-
80
- class ImageType(str, Enum):
81
- """Image type filters"""
82
- CLIPART = "clipart"
83
- FACE = "face"
84
- LINEART = "lineart"
85
- STOCK = "stock"
86
- PHOTO = "photo"
87
- ANIMATED = "animated"
88
-
89
-
90
- class ImageColorType(str, Enum):
91
- """Image color type filters"""
92
- COLOR = "color"
93
- GRAY = "gray"
94
- MONO = "mono"
95
- TRANS = "trans"
96
-
97
-
98
- class CircuitState(str, Enum):
99
- """Circuit breaker states"""
100
- CLOSED = "closed"
101
- OPEN = "open"
102
- HALF_OPEN = "half_open"
103
-
104
-
105
- # ============================================================================
106
- # Exception Hierarchy
107
- # ============================================================================
108
-
109
- class SearchToolError(Exception):
110
- """Base exception for SearchTool errors"""
111
- pass
112
-
113
-
114
- class AuthenticationError(SearchToolError):
115
- """Authentication-related errors"""
116
- pass
117
-
118
-
119
- class QuotaExceededError(SearchToolError):
120
- """API quota exceeded"""
121
- pass
122
-
123
-
124
- class RateLimitError(SearchToolError):
125
- """Rate limit exceeded"""
126
- pass
127
-
128
-
129
- class CircuitBreakerOpenError(SearchToolError):
130
- """Circuit breaker is open"""
131
- pass
132
-
133
-
134
- class SearchAPIError(SearchToolError):
135
- """Search API errors"""
136
- pass
137
-
138
-
139
- class ValidationError(SearchToolError):
140
- """Input validation errors"""
141
- pass
142
-
143
-
144
- # ============================================================================
145
- # Configuration
146
- # ============================================================================
147
-
148
-
149
-
150
- # ============================================================================
151
- # Rate Limiter
152
- # ============================================================================
153
-
154
- class RateLimiter:
155
- """
156
- Token bucket rate limiter for API requests.
157
-
158
- Implements a token bucket algorithm to limit the rate of API requests
159
- and prevent quota exhaustion.
160
- """
161
-
162
- def __init__(self, max_requests: int, time_window: int):
163
- """
164
- Initialize rate limiter.
165
-
166
- Args:
167
- max_requests: Maximum number of requests allowed
168
- time_window: Time window in seconds
169
- """
170
- self.max_requests = max_requests
171
- self.time_window = time_window
172
- self.tokens = max_requests
173
- self.last_update = time.time()
174
- self.lock = Lock()
175
- self.request_history: deque = deque()
176
-
177
- def _refill_tokens(self):
178
- """Refill tokens based on elapsed time"""
179
- now = time.time()
180
- time_passed = now - self.last_update
181
-
182
- # Refill tokens proportionally to time passed
183
- refill_rate = self.max_requests / self.time_window
184
- tokens_to_add = time_passed * refill_rate
185
-
186
- self.tokens = min(self.max_requests, self.tokens + tokens_to_add)
187
- self.last_update = now
188
-
189
- def acquire(self, tokens: int = 1) -> bool:
190
- """
191
- Attempt to acquire tokens.
192
-
193
- Args:
194
- tokens: Number of tokens to acquire
195
-
196
- Returns:
197
- True if tokens acquired, False otherwise
198
-
199
- Raises:
200
- RateLimitError: If rate limit is exceeded
201
- """
202
- with self.lock:
203
- self._refill_tokens()
204
-
205
- # Clean up old request history
206
- cutoff_time = time.time() - self.time_window
207
- while self.request_history and self.request_history[0] < cutoff_time:
208
- self.request_history.popleft()
209
-
210
- # Check if we have enough tokens
211
- if self.tokens >= tokens:
212
- self.tokens -= tokens
213
- self.request_history.append(time.time())
214
- return True
215
- else:
216
- # Calculate wait time
217
- wait_time = (tokens - self.tokens) / (self.max_requests / self.time_window)
218
- raise RateLimitError(
219
- f"Rate limit exceeded. {len(self.request_history)} requests in last "
220
- f"{self.time_window}s. Wait {wait_time:.1f}s before retrying."
221
- )
222
-
223
- def get_remaining_quota(self) -> int:
224
- """Get remaining quota"""
225
- with self.lock:
226
- self._refill_tokens()
227
- return int(self.tokens)
228
-
229
-
230
- # ============================================================================
231
- # Circuit Breaker
232
- # ============================================================================
233
-
234
- class CircuitBreaker:
235
- """
236
- Circuit breaker pattern implementation for API resilience.
237
-
238
- Implements a circuit breaker to prevent cascading failures when
239
- the API is experiencing issues.
240
- """
241
-
242
- def __init__(self, failure_threshold: int, timeout: int):
243
- """
244
- Initialize circuit breaker.
245
-
246
- Args:
247
- failure_threshold: Number of failures before opening circuit
248
- timeout: Timeout in seconds before trying half-open state
249
- """
250
- self.failure_threshold = failure_threshold
251
- self.timeout = timeout
252
- self.failure_count = 0
253
- self.last_failure_time: Optional[float] = None
254
- self.state = CircuitState.CLOSED
255
- self.lock = Lock()
256
-
257
- def call(self, func, *args, **kwargs):
258
- """
259
- Execute function with circuit breaker protection.
260
-
261
- Args:
262
- func: Function to execute
263
- *args: Positional arguments
264
- **kwargs: Keyword arguments
265
-
266
- Returns:
267
- Function result
268
-
269
- Raises:
270
- CircuitBreakerOpenError: If circuit is open
271
- """
272
- with self.lock:
273
- if self.state == CircuitState.OPEN:
274
- # Check if timeout has passed
275
- if time.time() - self.last_failure_time >= self.timeout:
276
- self.state = CircuitState.HALF_OPEN
277
- self.failure_count = 0
278
- else:
279
- raise CircuitBreakerOpenError(
280
- f"Circuit breaker is OPEN. Retry after "
281
- f"{self.timeout - (time.time() - self.last_failure_time):.1f}s"
282
- )
283
-
284
- try:
285
- result = func(*args, **kwargs)
286
- self._on_success()
287
- return result
288
- except Exception as e:
289
- self._on_failure()
290
- raise e
291
-
292
- def _on_success(self):
293
- """Handle successful call"""
294
- with self.lock:
295
- if self.state == CircuitState.HALF_OPEN:
296
- self.state = CircuitState.CLOSED
297
- self.failure_count = 0
298
-
299
- def _on_failure(self):
300
- """Handle failed call"""
301
- with self.lock:
302
- self.failure_count += 1
303
- self.last_failure_time = time.time()
304
-
305
- if self.failure_count >= self.failure_threshold:
306
- self.state = CircuitState.OPEN
307
-
308
- def get_state(self) -> str:
309
- """Get current circuit state"""
310
- return self.state.value
311
-
312
-
313
- # ============================================================================
314
- # Search Tool Implementation
315
- # ============================================================================
316
-
317
- @register_tool("search")
318
- class SearchTool(BaseTool):
319
- """
320
- Comprehensive web search tool using Google Custom Search API.
321
-
322
- Provides multiple search types (web, image, news, video) with advanced features
323
- including rate limiting, circuit breaker protection, caching, and comprehensive
324
- error handling.
325
-
326
- Features:
327
- - Web, image, news, and video search
328
- - Dual authentication (API key and service account)
329
- - Rate limiting and circuit breaker
330
- - Intelligent caching with TTL
331
- - Batch and paginated search
332
- - Comprehensive error handling
333
-
334
- Inherits from BaseTool to leverage ToolExecutor for caching, concurrency,
335
- and error handling.
336
- """
337
-
338
- # Configuration schema
339
- class Config(BaseModel):
340
- """Configuration for the search tool"""
341
- model_config = ConfigDict(env_prefix="SEARCH_TOOL_")
342
-
343
- google_api_key: Optional[str] = Field(
344
- default=None,
345
- description="Google API key for Custom Search"
346
- )
347
- google_cse_id: Optional[str] = Field(
348
- default=None,
349
- description="Custom Search Engine ID"
350
- )
351
- google_application_credentials: Optional[str] = Field(
352
- default=None,
353
- description="Path to service account JSON"
354
- )
355
- max_results_per_query: int = Field(
356
- default=10,
357
- description="Maximum results per single query"
358
- )
359
- cache_ttl: int = Field(
360
- default=3600,
361
- description="Cache time-to-live in seconds"
362
- )
363
- rate_limit_requests: int = Field(
364
- default=100,
365
- description="Maximum requests per time window"
366
- )
367
- rate_limit_window: int = Field(
368
- default=86400,
369
- description="Time window for rate limiting in seconds"
370
- )
371
- circuit_breaker_threshold: int = Field(
372
- default=5,
373
- description="Failures before opening circuit"
374
- )
375
- circuit_breaker_timeout: int = Field(
376
- default=60,
377
- description="Timeout before trying half-open in seconds"
378
- )
379
- retry_attempts: int = Field(
380
- default=3,
381
- description="Number of retry attempts"
382
- )
383
- retry_backoff: float = Field(
384
- default=2.0,
385
- description="Exponential backoff factor"
386
- )
387
- timeout: int = Field(
388
- default=30,
389
- description="API request timeout in seconds"
390
- )
391
- user_agent: str = Field(
392
- default="AIECS-SearchTool/1.0",
393
- description="User agent string"
394
- )
395
- allowed_search_types: List[str] = Field(
396
- default=["web", "image", "news", "video"],
397
- description="Allowed search types"
398
- )
399
-
400
- def __init__(self, config: Optional[Dict[str, Any]] = None):
401
- """
402
- Initialize SearchTool with configuration.
403
-
404
- Args:
405
- config: Optional configuration overrides
406
-
407
- Raises:
408
- AuthenticationError: If Google API libraries are not available
409
- ValidationError: If configuration is invalid
410
- """
411
- super().__init__(config)
412
-
413
- if not GOOGLE_API_AVAILABLE:
414
- raise AuthenticationError(
415
- "Google API client libraries not available. "
416
- "Install with: pip install google-api-python-client google-auth google-auth-httplib2"
417
- )
418
-
419
- # Load settings from global config
420
- global_settings = get_settings()
421
-
422
- # Merge global settings with config overrides
423
- merged_config = {
424
- 'google_api_key': global_settings.google_api_key,
425
- 'google_cse_id': global_settings.google_cse_id,
426
- 'google_application_credentials': global_settings.google_application_credentials
427
- }
428
- if config:
429
- merged_config.update(config)
430
-
431
- # Parse configuration
432
- self.config = self.Config(**merged_config)
433
-
434
- # Initialize logger
435
- self.logger = logging.getLogger(__name__)
436
- if not self.logger.handlers:
437
- handler = logging.StreamHandler()
438
- handler.setFormatter(
439
- logging.Formatter('%(asctime)s %(levelname)s [SearchTool] %(message)s')
440
- )
441
- self.logger.addHandler(handler)
442
- self.logger.setLevel(logging.INFO)
443
-
444
- # Initialize API client
445
- self._service = None
446
- self._credentials = None
447
- self._init_credentials()
448
-
449
- # Initialize rate limiter
450
- self.rate_limiter = RateLimiter(
451
- self.config.rate_limit_requests,
452
- self.config.rate_limit_window
453
- )
454
-
455
- # Initialize circuit breaker
456
- self.circuit_breaker = CircuitBreaker(
457
- self.config.circuit_breaker_threshold,
458
- self.config.circuit_breaker_timeout
459
- )
460
-
461
- # Metrics tracking
462
- self.metrics = {
463
- 'total_requests': 0,
464
- 'successful_requests': 0,
465
- 'failed_requests': 0,
466
- 'cache_hits': 0,
467
- 'rate_limit_errors': 0,
468
- 'circuit_breaker_trips': 0
469
- }
470
-
471
- def _init_credentials(self):
472
- """
473
- Initialize Google API credentials.
474
-
475
- Supports both API key and service account authentication with auto-detection.
476
-
477
- Raises:
478
- AuthenticationError: If credentials are not properly configured
479
- """
480
- # Method 1: API Key (simpler, recommended for Custom Search)
481
- if self.config.google_api_key and self.config.google_cse_id:
482
- try:
483
- self._service = build(
484
- 'customsearch',
485
- 'v1',
486
- developerKey=self.config.google_api_key,
487
- cache_discovery=False
488
- )
489
- self.logger.info("Initialized Google Custom Search with API key")
490
- return
491
- except Exception as e:
492
- self.logger.warning(f"Failed to initialize with API key: {e}")
493
-
494
- # Method 2: Service Account (more complex, supports additional features)
495
- if self.config.google_application_credentials:
496
- creds_path = self.config.google_application_credentials
497
- if os.path.exists(creds_path):
498
- try:
499
- credentials = service_account.Credentials.from_service_account_file(
500
- creds_path,
501
- scopes=['https://www.googleapis.com/auth/cse']
502
- )
503
- self._credentials = credentials
504
- self._service = build(
505
- 'customsearch',
506
- 'v1',
507
- credentials=credentials,
508
- cache_discovery=False
509
- )
510
- self.logger.info("Initialized Google Custom Search with service account")
511
- return
512
- except Exception as e:
513
- self.logger.warning(f"Failed to initialize with service account: {e}")
514
-
515
- raise AuthenticationError(
516
- "No valid Google API credentials found. Please set either:\n"
517
- "1. GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables, or\n"
518
- "2. GOOGLE_APPLICATION_CREDENTIALS pointing to service account JSON file"
519
- )
520
-
521
- def _execute_search(
522
- self,
523
- query: str,
524
- num_results: int = 10,
525
- start_index: int = 1,
526
- **kwargs
527
- ) -> Dict[str, Any]:
528
- """
529
- Execute a search request with retry logic.
530
-
531
- Args:
532
- query: Search query
533
- num_results: Number of results to return
534
- start_index: Starting index for pagination
535
- **kwargs: Additional search parameters
536
-
537
- Returns:
538
- Search results dictionary
539
-
540
- Raises:
541
- SearchAPIError: If search fails
542
- RateLimitError: If rate limit is exceeded
543
- CircuitBreakerOpenError: If circuit breaker is open
544
- """
545
- # Check rate limit
546
- self.rate_limiter.acquire()
547
-
548
- # Prepare search parameters
549
- search_params = {
550
- 'q': query,
551
- 'cx': self.config.google_cse_id,
552
- 'num': min(num_results, 10), # Google limits to 10 per request
553
- 'start': start_index,
554
- **kwargs
555
- }
556
-
557
- # Execute with circuit breaker protection
558
- def _do_search():
559
- try:
560
- self.metrics['total_requests'] += 1
561
- result = self._service.cse().list(**search_params).execute()
562
- self.metrics['successful_requests'] += 1
563
- return result
564
- except HttpError as e:
565
- self.metrics['failed_requests'] += 1
566
- if e.resp.status == 429:
567
- raise QuotaExceededError(f"API quota exceeded: {e}")
568
- elif e.resp.status == 403:
569
- raise AuthenticationError(f"Authentication failed: {e}")
570
- else:
571
- raise SearchAPIError(f"Search API error: {e}")
572
- except Exception as e:
573
- self.metrics['failed_requests'] += 1
574
- raise SearchAPIError(f"Unexpected error: {e}")
575
-
576
- try:
577
- return self.circuit_breaker.call(_do_search)
578
- except CircuitBreakerOpenError as e:
579
- self.metrics['circuit_breaker_trips'] += 1
580
- raise e
581
-
582
- def _retry_with_backoff(self, func, *args, **kwargs) -> Any:
583
- """
584
- Execute function with exponential backoff retry logic.
585
-
586
- Args:
587
- func: Function to execute
588
- *args: Positional arguments
589
- **kwargs: Keyword arguments
590
-
591
- Returns:
592
- Function result
593
-
594
- Raises:
595
- Exception: Last exception if all retries fail
596
- """
597
- last_exception = None
598
-
599
- for attempt in range(self.config.retry_attempts):
600
- try:
601
- return func(*args, **kwargs)
602
- except (RateLimitError, CircuitBreakerOpenError) as e:
603
- # Don't retry rate limit or circuit breaker errors
604
- raise e
605
- except Exception as e:
606
- last_exception = e
607
- if attempt < self.config.retry_attempts - 1:
608
- wait_time = self.config.retry_backoff ** attempt
609
- self.logger.warning(
610
- f"Attempt {attempt + 1} failed: {e}. "
611
- f"Retrying in {wait_time}s..."
612
- )
613
- time.sleep(wait_time)
614
- else:
615
- self.logger.error(f"All {self.config.retry_attempts} attempts failed")
616
-
617
- raise last_exception
618
-
619
- def _parse_search_results(self, raw_results: Dict[str, Any]) -> List[Dict[str, Any]]:
620
- """
621
- Parse and normalize search results.
622
-
623
- Args:
624
- raw_results: Raw API response
625
-
626
- Returns:
627
- List of normalized result dictionaries
628
- """
629
- items = raw_results.get('items', [])
630
- results = []
631
-
632
- for item in items:
633
- result = {
634
- 'title': item.get('title', ''),
635
- 'link': item.get('link', ''),
636
- 'snippet': item.get('snippet', ''),
637
- 'displayLink': item.get('displayLink', ''),
638
- 'formattedUrl': item.get('formattedUrl', ''),
639
- }
640
-
641
- # Add image-specific metadata if present
642
- if 'image' in item:
643
- result['image'] = {
644
- 'contextLink': item['image'].get('contextLink', ''),
645
- 'height': item['image'].get('height', 0),
646
- 'width': item['image'].get('width', 0),
647
- 'byteSize': item['image'].get('byteSize', 0),
648
- 'thumbnailLink': item['image'].get('thumbnailLink', '')
649
- }
650
-
651
- # Add page metadata if present
652
- if 'pagemap' in item:
653
- result['metadata'] = item['pagemap']
654
-
655
- results.append(result)
656
-
657
- return results
658
-
659
- # ========================================================================
660
- # Core Search Methods
661
- # ========================================================================
662
-
663
- def search_web(
664
- self,
665
- query: str,
666
- num_results: int = 10,
667
- start_index: int = 1,
668
- language: str = "en",
669
- country: str = "us",
670
- safe_search: str = "medium",
671
- date_restrict: Optional[str] = None,
672
- file_type: Optional[str] = None,
673
- exclude_terms: Optional[str] = None
674
- ) -> List[Dict[str, Any]]:
675
- """
676
- Search the web using Google Custom Search API.
677
-
678
- Args:
679
- query: Search query string
680
- num_results: Number of results to return (max 10 per request)
681
- start_index: Starting index for pagination (1-based)
682
- language: Language code for results (e.g., 'en', 'zh-CN')
683
- country: Country code for results (e.g., 'us', 'cn')
684
- safe_search: Safe search level ('off', 'medium', 'high')
685
- date_restrict: Restrict results by date (e.g., 'd5' for last 5 days)
686
- file_type: Filter by file type (e.g., 'pdf', 'doc')
687
- exclude_terms: Terms to exclude from results
688
-
689
- Returns:
690
- List of search result dictionaries with title, link, snippet, etc.
691
-
692
- Raises:
693
- ValidationError: If query is invalid
694
- SearchAPIError: If search fails
695
- RateLimitError: If rate limit is exceeded
696
-
697
- Examples:
698
- >>> tool = SearchTool()
699
- >>> results = tool.search_web("artificial intelligence", num_results=5)
700
- >>> print(results[0]['title'])
701
- """
702
- if not query or not query.strip():
703
- raise ValidationError("Query cannot be empty")
704
-
705
- if num_results < 1 or num_results > 100:
706
- raise ValidationError("num_results must be between 1 and 100")
707
-
708
- search_params = {
709
- 'lr': f'lang_{language}',
710
- 'cr': f'country{country.upper()}',
711
- 'safe': safe_search,
712
- }
713
-
714
- if date_restrict:
715
- search_params['dateRestrict'] = date_restrict
716
-
717
- if file_type:
718
- search_params['fileType'] = file_type
719
-
720
- if exclude_terms:
721
- query = f"{query} -{exclude_terms}"
722
-
723
- raw_results = self._retry_with_backoff(
724
- self._execute_search,
725
- query,
726
- num_results,
727
- start_index,
728
- **search_params
729
- )
730
-
731
- return self._parse_search_results(raw_results)
732
-
733
- def search_images(
734
- self,
735
- query: str,
736
- num_results: int = 10,
737
- image_size: Optional[str] = None,
738
- image_type: Optional[str] = None,
739
- image_color_type: Optional[str] = None,
740
- safe_search: str = "medium"
741
- ) -> List[Dict[str, Any]]:
742
- """
743
- Search for images using Google Custom Search API.
744
-
745
- Args:
746
- query: Search query string
747
- num_results: Number of results to return (max 10 per request)
748
- image_size: Image size filter ('icon', 'small', 'medium', 'large', 'xlarge', 'xxlarge', 'huge')
749
- image_type: Image type filter ('clipart', 'face', 'lineart', 'stock', 'photo', 'animated')
750
- image_color_type: Color type filter ('color', 'gray', 'mono', 'trans')
751
- safe_search: Safe search level ('off', 'medium', 'high')
752
-
753
- Returns:
754
- List of image result dictionaries with URL, thumbnail, dimensions, etc.
755
-
756
- Raises:
757
- ValidationError: If query is invalid
758
- SearchAPIError: If search fails
759
-
760
- Examples:
761
- >>> tool = SearchTool()
762
- >>> results = tool.search_images("sunset beach", num_results=5, image_size="large")
763
- >>> print(results[0]['link'])
764
- """
765
- if not query or not query.strip():
766
- raise ValidationError("Query cannot be empty")
767
-
768
- search_params = {
769
- 'searchType': 'image',
770
- 'safe': safe_search,
771
- }
772
-
773
- if image_size:
774
- search_params['imgSize'] = image_size
775
-
776
- if image_type:
777
- search_params['imgType'] = image_type
778
-
779
- if image_color_type:
780
- search_params['imgColorType'] = image_color_type
781
-
782
- raw_results = self._retry_with_backoff(
783
- self._execute_search,
784
- query,
785
- num_results,
786
- 1,
787
- **search_params
788
- )
789
-
790
- return self._parse_search_results(raw_results)
791
-
792
- def search_news(
793
- self,
794
- query: str,
795
- num_results: int = 10,
796
- start_index: int = 1,
797
- language: str = "en",
798
- date_restrict: Optional[str] = None,
799
- sort_by: str = "date"
800
- ) -> List[Dict[str, Any]]:
801
- """
802
- Search for news articles using Google Custom Search API.
803
-
804
- Args:
805
- query: Search query string
806
- num_results: Number of results to return (max 10 per request)
807
- start_index: Starting index for pagination (1-based)
808
- language: Language code for results (e.g., 'en', 'zh-CN')
809
- date_restrict: Restrict results by date (e.g., 'd5' for last 5 days, 'w2' for last 2 weeks)
810
- sort_by: Sort order ('date' or 'relevance')
811
-
812
- Returns:
813
- List of news article dictionaries with title, link, snippet, date, etc.
814
-
815
- Raises:
816
- ValidationError: If query is invalid
817
- SearchAPIError: If search fails
818
-
819
- Examples:
820
- >>> tool = SearchTool()
821
- >>> results = tool.search_news("climate change", date_restrict="w1")
822
- >>> print(results[0]['title'])
823
- """
824
- if not query or not query.strip():
825
- raise ValidationError("Query cannot be empty")
826
-
827
- # Add "news" to the query to prioritize news sources
828
- news_query = f"{query} news"
829
-
830
- search_params = {
831
- 'lr': f'lang_{language}',
832
- 'sort': sort_by if sort_by == 'date' else '',
833
- }
834
-
835
- if date_restrict:
836
- search_params['dateRestrict'] = date_restrict
837
-
838
- raw_results = self._retry_with_backoff(
839
- self._execute_search,
840
- news_query,
841
- num_results,
842
- start_index,
843
- **search_params
844
- )
845
-
846
- return self._parse_search_results(raw_results)
847
-
848
- def search_videos(
849
- self,
850
- query: str,
851
- num_results: int = 10,
852
- start_index: int = 1,
853
- language: str = "en",
854
- safe_search: str = "medium"
855
- ) -> List[Dict[str, Any]]:
856
- """
857
- Search for videos using Google Custom Search API.
858
-
859
- Args:
860
- query: Search query string
861
- num_results: Number of results to return (max 10 per request)
862
- start_index: Starting index for pagination (1-based)
863
- language: Language code for results (e.g., 'en', 'zh-CN')
864
- safe_search: Safe search level ('off', 'medium', 'high')
865
-
866
- Returns:
867
- List of video result dictionaries with title, link, snippet, etc.
868
-
869
- Raises:
870
- ValidationError: If query is invalid
871
- SearchAPIError: If search fails
872
-
873
- Examples:
874
- >>> tool = SearchTool()
875
- >>> results = tool.search_videos("python tutorial", num_results=5)
876
- >>> print(results[0]['title'])
877
- """
878
- if not query or not query.strip():
879
- raise ValidationError("Query cannot be empty")
880
-
881
- # Add file type filter for video content
882
- video_query = f"{query} filetype:mp4 OR filetype:webm OR filetype:mov"
883
-
884
- search_params = {
885
- 'lr': f'lang_{language}',
886
- 'safe': safe_search,
887
- }
888
-
889
- raw_results = self._retry_with_backoff(
890
- self._execute_search,
891
- video_query,
892
- num_results,
893
- start_index,
894
- **search_params
895
- )
896
-
897
- return self._parse_search_results(raw_results)
898
-
899
- # ========================================================================
900
- # Advanced Features
901
- # ========================================================================
902
-
903
- def search_paginated(
904
- self,
905
- query: str,
906
- total_results: int,
907
- search_type: str = "web",
908
- **kwargs
909
- ) -> List[Dict[str, Any]]:
910
- """
911
- Perform paginated search to retrieve more than 10 results.
912
-
913
- Google Custom Search API limits each request to 10 results. This method
914
- automatically handles pagination to retrieve larger result sets.
915
-
916
- Args:
917
- query: Search query string
918
- total_results: Total number of results to retrieve
919
- search_type: Type of search ('web', 'image', 'news', 'video')
920
- **kwargs: Additional search parameters for the specific search type
921
-
922
- Returns:
923
- List of all search results combined from multiple pages
924
-
925
- Raises:
926
- ValidationError: If parameters are invalid
927
- SearchAPIError: If search fails
928
-
929
- Examples:
930
- >>> tool = SearchTool()
931
- >>> results = tool.search_paginated("machine learning", total_results=25)
932
- >>> len(results)
933
- 25
934
- """
935
- if total_results < 1 or total_results > 100:
936
- raise ValidationError("total_results must be between 1 and 100")
937
-
938
- # Select search method based on type
939
- search_methods = {
940
- 'web': self.search_web,
941
- 'image': self.search_images,
942
- 'news': self.search_news,
943
- 'video': self.search_videos,
944
- }
945
-
946
- if search_type not in search_methods:
947
- raise ValidationError(f"Invalid search_type: {search_type}")
948
-
949
- search_method = search_methods[search_type]
950
- all_results = []
951
-
952
- # Calculate number of pages needed
953
- results_per_page = 10
954
- num_pages = (total_results + results_per_page - 1) // results_per_page
955
-
956
- for page in range(num_pages):
957
- start_index = page * results_per_page + 1
958
- page_size = min(results_per_page, total_results - len(all_results))
959
-
960
- try:
961
- page_results = search_method(
962
- query=query,
963
- num_results=page_size,
964
- start_index=start_index,
965
- **kwargs
966
- )
967
- all_results.extend(page_results)
968
-
969
- if len(all_results) >= total_results:
970
- break
971
-
972
- except QuotaExceededError:
973
- self.logger.warning(
974
- f"Quota exceeded after {len(all_results)} results"
975
- )
976
- break
977
-
978
- return all_results[:total_results]
979
-
980
- async def search_batch(
981
- self,
982
- queries: List[str],
983
- search_type: str = "web",
984
- num_results: int = 10
985
- ) -> Dict[str, List[Dict[str, Any]]]:
986
- """
987
- Execute multiple search queries in batch with async execution.
988
-
989
- Args:
990
- queries: List of search query strings
991
- search_type: Type of search ('web', 'image', 'news', 'video')
992
- num_results: Number of results per query
993
-
994
- Returns:
995
- Dictionary mapping queries to their search results
996
-
997
- Raises:
998
- ValidationError: If parameters are invalid
999
-
1000
- Examples:
1001
- >>> tool = SearchTool()
1002
- >>> queries = ["AI", "machine learning", "deep learning"]
1003
- >>> results = await tool.search_batch(queries, num_results=5)
1004
- >>> print(results["AI"][0]['title'])
1005
- """
1006
- if not queries:
1007
- raise ValidationError("queries list cannot be empty")
1008
-
1009
- # Select search method
1010
- search_methods = {
1011
- 'web': self.search_web,
1012
- 'image': self.search_images,
1013
- 'news': self.search_news,
1014
- 'video': self.search_videos,
1015
- }
1016
-
1017
- if search_type not in search_methods:
1018
- raise ValidationError(f"Invalid search_type: {search_type}")
1019
-
1020
- search_method = search_methods[search_type]
1021
-
1022
- # Execute searches in parallel using asyncio
1023
- async def _search_async(query: str):
1024
- loop = asyncio.get_event_loop()
1025
- return await loop.run_in_executor(
1026
- None,
1027
- search_method,
1028
- query,
1029
- num_results
1030
- )
1031
-
1032
- tasks = [_search_async(query) for query in queries]
1033
- results_list = await asyncio.gather(*tasks, return_exceptions=True)
1034
-
1035
- # Build results dictionary
1036
- results_dict = {}
1037
- for query, result in zip(queries, results_list):
1038
- if isinstance(result, Exception):
1039
- self.logger.error(f"Search failed for query '{query}': {result}")
1040
- results_dict[query] = []
1041
- else:
1042
- results_dict[query] = result
1043
-
1044
- return results_dict
1045
-
1046
- # ========================================================================
1047
- # Utility Methods
1048
- # ========================================================================
1049
-
1050
- def validate_credentials(self) -> Dict[str, Any]:
1051
- """
1052
- Validate API credentials by performing a test search.
1053
-
1054
- Returns:
1055
- Dictionary with validation status and details
1056
-
1057
- Examples:
1058
- >>> tool = SearchTool()
1059
- >>> status = tool.validate_credentials()
1060
- >>> print(status['valid'])
1061
- True
1062
- """
1063
- try:
1064
- # Perform a minimal test search
1065
- result = self._execute_search("test", num_results=1)
1066
-
1067
- return {
1068
- 'valid': True,
1069
- 'method': 'api_key' if self.config.google_api_key else 'service_account',
1070
- 'cse_id': self.config.google_cse_id,
1071
- 'message': 'Credentials are valid and working'
1072
- }
1073
- except Exception as e:
1074
- return {
1075
- 'valid': False,
1076
- 'error': str(e),
1077
- 'message': 'Credentials validation failed'
1078
- }
1079
-
1080
- def get_quota_status(self) -> Dict[str, Any]:
1081
- """
1082
- Get current quota and rate limit status.
1083
-
1084
- Returns:
1085
- Dictionary with quota information including remaining requests,
1086
- circuit breaker state, and usage metrics
1087
-
1088
- Examples:
1089
- >>> tool = SearchTool()
1090
- >>> status = tool.get_quota_status()
1091
- >>> print(f"Remaining quota: {status['remaining_quota']}")
1092
- """
1093
- return {
1094
- 'remaining_quota': self.rate_limiter.get_remaining_quota(),
1095
- 'max_requests': self.config.rate_limit_requests,
1096
- 'time_window_seconds': self.config.rate_limit_window,
1097
- 'circuit_breaker_state': self.circuit_breaker.get_state(),
1098
- 'metrics': self.metrics.copy()
1099
- }
1100
-
1101
- def get_metrics(self) -> Dict[str, Any]:
1102
- """
1103
- Get detailed metrics about tool usage.
1104
-
1105
- Returns:
1106
- Dictionary with usage metrics
1107
-
1108
- Examples:
1109
- >>> tool = SearchTool()
1110
- >>> metrics = tool.get_metrics()
1111
- >>> print(f"Success rate: {metrics['success_rate']:.2%}")
1112
- """
1113
- total = self.metrics['total_requests']
1114
- success_rate = (
1115
- self.metrics['successful_requests'] / total if total > 0 else 0
1116
- )
1117
-
1118
- return {
1119
- **self.metrics,
1120
- 'success_rate': success_rate,
1121
- 'circuit_breaker_state': self.circuit_breaker.get_state(),
1122
- 'remaining_quota': self.rate_limiter.get_remaining_quota()
1123
- }