aiecs 1.2.2__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/llm/clients/vertex_client.py +22 -2
- aiecs/main.py +2 -2
- aiecs/scripts/tools_develop/README.md +111 -2
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
- aiecs/scripts/tools_develop/verify_tools.py +347 -0
- aiecs/tools/__init__.py +94 -30
- aiecs/tools/apisource/__init__.py +106 -0
- aiecs/tools/apisource/intelligence/__init__.py +20 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
- aiecs/tools/apisource/monitoring/__init__.py +12 -0
- aiecs/tools/apisource/monitoring/metrics.py +308 -0
- aiecs/tools/apisource/providers/__init__.py +114 -0
- aiecs/tools/apisource/providers/base.py +684 -0
- aiecs/tools/apisource/providers/census.py +412 -0
- aiecs/tools/apisource/providers/fred.py +575 -0
- aiecs/tools/apisource/providers/newsapi.py +402 -0
- aiecs/tools/apisource/providers/worldbank.py +346 -0
- aiecs/tools/apisource/reliability/__init__.py +14 -0
- aiecs/tools/apisource/reliability/error_handler.py +362 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
- aiecs/tools/apisource/tool.py +814 -0
- aiecs/tools/apisource/utils/__init__.py +12 -0
- aiecs/tools/apisource/utils/validators.py +343 -0
- aiecs/tools/langchain_adapter.py +95 -17
- aiecs/tools/search_tool/__init__.py +102 -0
- aiecs/tools/search_tool/analyzers.py +583 -0
- aiecs/tools/search_tool/cache.py +280 -0
- aiecs/tools/search_tool/constants.py +127 -0
- aiecs/tools/search_tool/context.py +219 -0
- aiecs/tools/search_tool/core.py +773 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +257 -0
- aiecs/tools/search_tool/metrics.py +375 -0
- aiecs/tools/search_tool/rate_limiter.py +177 -0
- aiecs/tools/search_tool/schemas.py +297 -0
- aiecs/tools/statistics/data_loader_tool.py +2 -2
- aiecs/tools/statistics/data_transformer_tool.py +1 -1
- aiecs/tools/task_tools/__init__.py +8 -8
- aiecs/tools/task_tools/report_tool.py +1 -1
- aiecs/tools/tool_executor/__init__.py +2 -0
- aiecs/tools/tool_executor/tool_executor.py +284 -14
- aiecs/utils/__init__.py +11 -0
- aiecs/utils/cache_provider.py +698 -0
- aiecs/utils/execution_utils.py +5 -5
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/METADATA +1 -1
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/RECORD +54 -22
- aiecs/tools/task_tools/search_tool.py +0 -1123
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/WHEEL +0 -0
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/entry_points.txt +0 -0
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/top_level.txt +0 -0
|
@@ -1,1123 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Google Custom Search Tool for AI Agents
|
|
3
|
-
|
|
4
|
-
A comprehensive, production-ready web search tool that integrates Google Custom Search API
|
|
5
|
-
with advanced features including multiple search types, pagination, rate limiting, circuit
|
|
6
|
-
breaker pattern, caching, and full AIECS architecture compliance.
|
|
7
|
-
|
|
8
|
-
Features:
|
|
9
|
-
- Multiple search types: web, image, news, video
|
|
10
|
-
- Dual authentication: API key and service account
|
|
11
|
-
- Rate limiting with token bucket algorithm
|
|
12
|
-
- Circuit breaker pattern for API resilience
|
|
13
|
-
- Intelligent caching with TTL
|
|
14
|
-
- Comprehensive error handling and retry logic
|
|
15
|
-
- Batch and paginated search support
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import asyncio
|
|
19
|
-
import hashlib
|
|
20
|
-
import json
|
|
21
|
-
import logging
|
|
22
|
-
import os
|
|
23
|
-
import time
|
|
24
|
-
from collections import deque
|
|
25
|
-
from datetime import datetime, timedelta
|
|
26
|
-
from enum import Enum
|
|
27
|
-
from threading import Lock
|
|
28
|
-
from typing import Any, Dict, List, Optional, Union
|
|
29
|
-
|
|
30
|
-
from pydantic import BaseModel, Field, ValidationError as PydanticValidationError, ConfigDict
|
|
31
|
-
|
|
32
|
-
from aiecs.tools import register_tool
|
|
33
|
-
from aiecs.tools.base_tool import BaseTool
|
|
34
|
-
from aiecs.config.config import get_settings
|
|
35
|
-
|
|
36
|
-
# Google API imports with graceful fallback
|
|
37
|
-
try:
|
|
38
|
-
from googleapiclient.discovery import build
|
|
39
|
-
from googleapiclient.errors import HttpError
|
|
40
|
-
from google.auth.exceptions import GoogleAuthError
|
|
41
|
-
from google.oauth2 import service_account
|
|
42
|
-
from google.auth.transport.requests import Request
|
|
43
|
-
GOOGLE_API_AVAILABLE = True
|
|
44
|
-
except ImportError:
|
|
45
|
-
GOOGLE_API_AVAILABLE = False
|
|
46
|
-
HttpError = Exception
|
|
47
|
-
GoogleAuthError = Exception
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
# ============================================================================
|
|
51
|
-
# Enums and Constants
|
|
52
|
-
# ============================================================================
|
|
53
|
-
|
|
54
|
-
class SearchType(str, Enum):
|
|
55
|
-
"""Supported search types"""
|
|
56
|
-
WEB = "web"
|
|
57
|
-
IMAGE = "image"
|
|
58
|
-
NEWS = "news"
|
|
59
|
-
VIDEO = "video"
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class SafeSearch(str, Enum):
|
|
63
|
-
"""Safe search levels"""
|
|
64
|
-
OFF = "off"
|
|
65
|
-
MEDIUM = "medium"
|
|
66
|
-
HIGH = "high"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class ImageSize(str, Enum):
|
|
70
|
-
"""Image size filters"""
|
|
71
|
-
ICON = "icon"
|
|
72
|
-
SMALL = "small"
|
|
73
|
-
MEDIUM = "medium"
|
|
74
|
-
LARGE = "large"
|
|
75
|
-
XLARGE = "xlarge"
|
|
76
|
-
XXLARGE = "xxlarge"
|
|
77
|
-
HUGE = "huge"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class ImageType(str, Enum):
|
|
81
|
-
"""Image type filters"""
|
|
82
|
-
CLIPART = "clipart"
|
|
83
|
-
FACE = "face"
|
|
84
|
-
LINEART = "lineart"
|
|
85
|
-
STOCK = "stock"
|
|
86
|
-
PHOTO = "photo"
|
|
87
|
-
ANIMATED = "animated"
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class ImageColorType(str, Enum):
|
|
91
|
-
"""Image color type filters"""
|
|
92
|
-
COLOR = "color"
|
|
93
|
-
GRAY = "gray"
|
|
94
|
-
MONO = "mono"
|
|
95
|
-
TRANS = "trans"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class CircuitState(str, Enum):
|
|
99
|
-
"""Circuit breaker states"""
|
|
100
|
-
CLOSED = "closed"
|
|
101
|
-
OPEN = "open"
|
|
102
|
-
HALF_OPEN = "half_open"
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# ============================================================================
|
|
106
|
-
# Exception Hierarchy
|
|
107
|
-
# ============================================================================
|
|
108
|
-
|
|
109
|
-
class SearchToolError(Exception):
|
|
110
|
-
"""Base exception for SearchTool errors"""
|
|
111
|
-
pass
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
class AuthenticationError(SearchToolError):
|
|
115
|
-
"""Authentication-related errors"""
|
|
116
|
-
pass
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class QuotaExceededError(SearchToolError):
|
|
120
|
-
"""API quota exceeded"""
|
|
121
|
-
pass
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
class RateLimitError(SearchToolError):
|
|
125
|
-
"""Rate limit exceeded"""
|
|
126
|
-
pass
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
class CircuitBreakerOpenError(SearchToolError):
|
|
130
|
-
"""Circuit breaker is open"""
|
|
131
|
-
pass
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
class SearchAPIError(SearchToolError):
|
|
135
|
-
"""Search API errors"""
|
|
136
|
-
pass
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
class ValidationError(SearchToolError):
|
|
140
|
-
"""Input validation errors"""
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
# ============================================================================
|
|
145
|
-
# Configuration
|
|
146
|
-
# ============================================================================
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
# ============================================================================
|
|
151
|
-
# Rate Limiter
|
|
152
|
-
# ============================================================================
|
|
153
|
-
|
|
154
|
-
class RateLimiter:
|
|
155
|
-
"""
|
|
156
|
-
Token bucket rate limiter for API requests.
|
|
157
|
-
|
|
158
|
-
Implements a token bucket algorithm to limit the rate of API requests
|
|
159
|
-
and prevent quota exhaustion.
|
|
160
|
-
"""
|
|
161
|
-
|
|
162
|
-
def __init__(self, max_requests: int, time_window: int):
|
|
163
|
-
"""
|
|
164
|
-
Initialize rate limiter.
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
max_requests: Maximum number of requests allowed
|
|
168
|
-
time_window: Time window in seconds
|
|
169
|
-
"""
|
|
170
|
-
self.max_requests = max_requests
|
|
171
|
-
self.time_window = time_window
|
|
172
|
-
self.tokens = max_requests
|
|
173
|
-
self.last_update = time.time()
|
|
174
|
-
self.lock = Lock()
|
|
175
|
-
self.request_history: deque = deque()
|
|
176
|
-
|
|
177
|
-
def _refill_tokens(self):
|
|
178
|
-
"""Refill tokens based on elapsed time"""
|
|
179
|
-
now = time.time()
|
|
180
|
-
time_passed = now - self.last_update
|
|
181
|
-
|
|
182
|
-
# Refill tokens proportionally to time passed
|
|
183
|
-
refill_rate = self.max_requests / self.time_window
|
|
184
|
-
tokens_to_add = time_passed * refill_rate
|
|
185
|
-
|
|
186
|
-
self.tokens = min(self.max_requests, self.tokens + tokens_to_add)
|
|
187
|
-
self.last_update = now
|
|
188
|
-
|
|
189
|
-
def acquire(self, tokens: int = 1) -> bool:
|
|
190
|
-
"""
|
|
191
|
-
Attempt to acquire tokens.
|
|
192
|
-
|
|
193
|
-
Args:
|
|
194
|
-
tokens: Number of tokens to acquire
|
|
195
|
-
|
|
196
|
-
Returns:
|
|
197
|
-
True if tokens acquired, False otherwise
|
|
198
|
-
|
|
199
|
-
Raises:
|
|
200
|
-
RateLimitError: If rate limit is exceeded
|
|
201
|
-
"""
|
|
202
|
-
with self.lock:
|
|
203
|
-
self._refill_tokens()
|
|
204
|
-
|
|
205
|
-
# Clean up old request history
|
|
206
|
-
cutoff_time = time.time() - self.time_window
|
|
207
|
-
while self.request_history and self.request_history[0] < cutoff_time:
|
|
208
|
-
self.request_history.popleft()
|
|
209
|
-
|
|
210
|
-
# Check if we have enough tokens
|
|
211
|
-
if self.tokens >= tokens:
|
|
212
|
-
self.tokens -= tokens
|
|
213
|
-
self.request_history.append(time.time())
|
|
214
|
-
return True
|
|
215
|
-
else:
|
|
216
|
-
# Calculate wait time
|
|
217
|
-
wait_time = (tokens - self.tokens) / (self.max_requests / self.time_window)
|
|
218
|
-
raise RateLimitError(
|
|
219
|
-
f"Rate limit exceeded. {len(self.request_history)} requests in last "
|
|
220
|
-
f"{self.time_window}s. Wait {wait_time:.1f}s before retrying."
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
def get_remaining_quota(self) -> int:
|
|
224
|
-
"""Get remaining quota"""
|
|
225
|
-
with self.lock:
|
|
226
|
-
self._refill_tokens()
|
|
227
|
-
return int(self.tokens)
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# ============================================================================
|
|
231
|
-
# Circuit Breaker
|
|
232
|
-
# ============================================================================
|
|
233
|
-
|
|
234
|
-
class CircuitBreaker:
|
|
235
|
-
"""
|
|
236
|
-
Circuit breaker pattern implementation for API resilience.
|
|
237
|
-
|
|
238
|
-
Implements a circuit breaker to prevent cascading failures when
|
|
239
|
-
the API is experiencing issues.
|
|
240
|
-
"""
|
|
241
|
-
|
|
242
|
-
def __init__(self, failure_threshold: int, timeout: int):
|
|
243
|
-
"""
|
|
244
|
-
Initialize circuit breaker.
|
|
245
|
-
|
|
246
|
-
Args:
|
|
247
|
-
failure_threshold: Number of failures before opening circuit
|
|
248
|
-
timeout: Timeout in seconds before trying half-open state
|
|
249
|
-
"""
|
|
250
|
-
self.failure_threshold = failure_threshold
|
|
251
|
-
self.timeout = timeout
|
|
252
|
-
self.failure_count = 0
|
|
253
|
-
self.last_failure_time: Optional[float] = None
|
|
254
|
-
self.state = CircuitState.CLOSED
|
|
255
|
-
self.lock = Lock()
|
|
256
|
-
|
|
257
|
-
def call(self, func, *args, **kwargs):
|
|
258
|
-
"""
|
|
259
|
-
Execute function with circuit breaker protection.
|
|
260
|
-
|
|
261
|
-
Args:
|
|
262
|
-
func: Function to execute
|
|
263
|
-
*args: Positional arguments
|
|
264
|
-
**kwargs: Keyword arguments
|
|
265
|
-
|
|
266
|
-
Returns:
|
|
267
|
-
Function result
|
|
268
|
-
|
|
269
|
-
Raises:
|
|
270
|
-
CircuitBreakerOpenError: If circuit is open
|
|
271
|
-
"""
|
|
272
|
-
with self.lock:
|
|
273
|
-
if self.state == CircuitState.OPEN:
|
|
274
|
-
# Check if timeout has passed
|
|
275
|
-
if time.time() - self.last_failure_time >= self.timeout:
|
|
276
|
-
self.state = CircuitState.HALF_OPEN
|
|
277
|
-
self.failure_count = 0
|
|
278
|
-
else:
|
|
279
|
-
raise CircuitBreakerOpenError(
|
|
280
|
-
f"Circuit breaker is OPEN. Retry after "
|
|
281
|
-
f"{self.timeout - (time.time() - self.last_failure_time):.1f}s"
|
|
282
|
-
)
|
|
283
|
-
|
|
284
|
-
try:
|
|
285
|
-
result = func(*args, **kwargs)
|
|
286
|
-
self._on_success()
|
|
287
|
-
return result
|
|
288
|
-
except Exception as e:
|
|
289
|
-
self._on_failure()
|
|
290
|
-
raise e
|
|
291
|
-
|
|
292
|
-
def _on_success(self):
|
|
293
|
-
"""Handle successful call"""
|
|
294
|
-
with self.lock:
|
|
295
|
-
if self.state == CircuitState.HALF_OPEN:
|
|
296
|
-
self.state = CircuitState.CLOSED
|
|
297
|
-
self.failure_count = 0
|
|
298
|
-
|
|
299
|
-
def _on_failure(self):
|
|
300
|
-
"""Handle failed call"""
|
|
301
|
-
with self.lock:
|
|
302
|
-
self.failure_count += 1
|
|
303
|
-
self.last_failure_time = time.time()
|
|
304
|
-
|
|
305
|
-
if self.failure_count >= self.failure_threshold:
|
|
306
|
-
self.state = CircuitState.OPEN
|
|
307
|
-
|
|
308
|
-
def get_state(self) -> str:
|
|
309
|
-
"""Get current circuit state"""
|
|
310
|
-
return self.state.value
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
# ============================================================================
|
|
314
|
-
# Search Tool Implementation
|
|
315
|
-
# ============================================================================
|
|
316
|
-
|
|
317
|
-
@register_tool("search")
|
|
318
|
-
class SearchTool(BaseTool):
|
|
319
|
-
"""
|
|
320
|
-
Comprehensive web search tool using Google Custom Search API.
|
|
321
|
-
|
|
322
|
-
Provides multiple search types (web, image, news, video) with advanced features
|
|
323
|
-
including rate limiting, circuit breaker protection, caching, and comprehensive
|
|
324
|
-
error handling.
|
|
325
|
-
|
|
326
|
-
Features:
|
|
327
|
-
- Web, image, news, and video search
|
|
328
|
-
- Dual authentication (API key and service account)
|
|
329
|
-
- Rate limiting and circuit breaker
|
|
330
|
-
- Intelligent caching with TTL
|
|
331
|
-
- Batch and paginated search
|
|
332
|
-
- Comprehensive error handling
|
|
333
|
-
|
|
334
|
-
Inherits from BaseTool to leverage ToolExecutor for caching, concurrency,
|
|
335
|
-
and error handling.
|
|
336
|
-
"""
|
|
337
|
-
|
|
338
|
-
# Configuration schema
|
|
339
|
-
class Config(BaseModel):
|
|
340
|
-
"""Configuration for the search tool"""
|
|
341
|
-
model_config = ConfigDict(env_prefix="SEARCH_TOOL_")
|
|
342
|
-
|
|
343
|
-
google_api_key: Optional[str] = Field(
|
|
344
|
-
default=None,
|
|
345
|
-
description="Google API key for Custom Search"
|
|
346
|
-
)
|
|
347
|
-
google_cse_id: Optional[str] = Field(
|
|
348
|
-
default=None,
|
|
349
|
-
description="Custom Search Engine ID"
|
|
350
|
-
)
|
|
351
|
-
google_application_credentials: Optional[str] = Field(
|
|
352
|
-
default=None,
|
|
353
|
-
description="Path to service account JSON"
|
|
354
|
-
)
|
|
355
|
-
max_results_per_query: int = Field(
|
|
356
|
-
default=10,
|
|
357
|
-
description="Maximum results per single query"
|
|
358
|
-
)
|
|
359
|
-
cache_ttl: int = Field(
|
|
360
|
-
default=3600,
|
|
361
|
-
description="Cache time-to-live in seconds"
|
|
362
|
-
)
|
|
363
|
-
rate_limit_requests: int = Field(
|
|
364
|
-
default=100,
|
|
365
|
-
description="Maximum requests per time window"
|
|
366
|
-
)
|
|
367
|
-
rate_limit_window: int = Field(
|
|
368
|
-
default=86400,
|
|
369
|
-
description="Time window for rate limiting in seconds"
|
|
370
|
-
)
|
|
371
|
-
circuit_breaker_threshold: int = Field(
|
|
372
|
-
default=5,
|
|
373
|
-
description="Failures before opening circuit"
|
|
374
|
-
)
|
|
375
|
-
circuit_breaker_timeout: int = Field(
|
|
376
|
-
default=60,
|
|
377
|
-
description="Timeout before trying half-open in seconds"
|
|
378
|
-
)
|
|
379
|
-
retry_attempts: int = Field(
|
|
380
|
-
default=3,
|
|
381
|
-
description="Number of retry attempts"
|
|
382
|
-
)
|
|
383
|
-
retry_backoff: float = Field(
|
|
384
|
-
default=2.0,
|
|
385
|
-
description="Exponential backoff factor"
|
|
386
|
-
)
|
|
387
|
-
timeout: int = Field(
|
|
388
|
-
default=30,
|
|
389
|
-
description="API request timeout in seconds"
|
|
390
|
-
)
|
|
391
|
-
user_agent: str = Field(
|
|
392
|
-
default="AIECS-SearchTool/1.0",
|
|
393
|
-
description="User agent string"
|
|
394
|
-
)
|
|
395
|
-
allowed_search_types: List[str] = Field(
|
|
396
|
-
default=["web", "image", "news", "video"],
|
|
397
|
-
description="Allowed search types"
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
401
|
-
"""
|
|
402
|
-
Initialize SearchTool with configuration.
|
|
403
|
-
|
|
404
|
-
Args:
|
|
405
|
-
config: Optional configuration overrides
|
|
406
|
-
|
|
407
|
-
Raises:
|
|
408
|
-
AuthenticationError: If Google API libraries are not available
|
|
409
|
-
ValidationError: If configuration is invalid
|
|
410
|
-
"""
|
|
411
|
-
super().__init__(config)
|
|
412
|
-
|
|
413
|
-
if not GOOGLE_API_AVAILABLE:
|
|
414
|
-
raise AuthenticationError(
|
|
415
|
-
"Google API client libraries not available. "
|
|
416
|
-
"Install with: pip install google-api-python-client google-auth google-auth-httplib2"
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
# Load settings from global config
|
|
420
|
-
global_settings = get_settings()
|
|
421
|
-
|
|
422
|
-
# Merge global settings with config overrides
|
|
423
|
-
merged_config = {
|
|
424
|
-
'google_api_key': global_settings.google_api_key,
|
|
425
|
-
'google_cse_id': global_settings.google_cse_id,
|
|
426
|
-
'google_application_credentials': global_settings.google_application_credentials
|
|
427
|
-
}
|
|
428
|
-
if config:
|
|
429
|
-
merged_config.update(config)
|
|
430
|
-
|
|
431
|
-
# Parse configuration
|
|
432
|
-
self.config = self.Config(**merged_config)
|
|
433
|
-
|
|
434
|
-
# Initialize logger
|
|
435
|
-
self.logger = logging.getLogger(__name__)
|
|
436
|
-
if not self.logger.handlers:
|
|
437
|
-
handler = logging.StreamHandler()
|
|
438
|
-
handler.setFormatter(
|
|
439
|
-
logging.Formatter('%(asctime)s %(levelname)s [SearchTool] %(message)s')
|
|
440
|
-
)
|
|
441
|
-
self.logger.addHandler(handler)
|
|
442
|
-
self.logger.setLevel(logging.INFO)
|
|
443
|
-
|
|
444
|
-
# Initialize API client
|
|
445
|
-
self._service = None
|
|
446
|
-
self._credentials = None
|
|
447
|
-
self._init_credentials()
|
|
448
|
-
|
|
449
|
-
# Initialize rate limiter
|
|
450
|
-
self.rate_limiter = RateLimiter(
|
|
451
|
-
self.config.rate_limit_requests,
|
|
452
|
-
self.config.rate_limit_window
|
|
453
|
-
)
|
|
454
|
-
|
|
455
|
-
# Initialize circuit breaker
|
|
456
|
-
self.circuit_breaker = CircuitBreaker(
|
|
457
|
-
self.config.circuit_breaker_threshold,
|
|
458
|
-
self.config.circuit_breaker_timeout
|
|
459
|
-
)
|
|
460
|
-
|
|
461
|
-
# Metrics tracking
|
|
462
|
-
self.metrics = {
|
|
463
|
-
'total_requests': 0,
|
|
464
|
-
'successful_requests': 0,
|
|
465
|
-
'failed_requests': 0,
|
|
466
|
-
'cache_hits': 0,
|
|
467
|
-
'rate_limit_errors': 0,
|
|
468
|
-
'circuit_breaker_trips': 0
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
def _init_credentials(self):
|
|
472
|
-
"""
|
|
473
|
-
Initialize Google API credentials.
|
|
474
|
-
|
|
475
|
-
Supports both API key and service account authentication with auto-detection.
|
|
476
|
-
|
|
477
|
-
Raises:
|
|
478
|
-
AuthenticationError: If credentials are not properly configured
|
|
479
|
-
"""
|
|
480
|
-
# Method 1: API Key (simpler, recommended for Custom Search)
|
|
481
|
-
if self.config.google_api_key and self.config.google_cse_id:
|
|
482
|
-
try:
|
|
483
|
-
self._service = build(
|
|
484
|
-
'customsearch',
|
|
485
|
-
'v1',
|
|
486
|
-
developerKey=self.config.google_api_key,
|
|
487
|
-
cache_discovery=False
|
|
488
|
-
)
|
|
489
|
-
self.logger.info("Initialized Google Custom Search with API key")
|
|
490
|
-
return
|
|
491
|
-
except Exception as e:
|
|
492
|
-
self.logger.warning(f"Failed to initialize with API key: {e}")
|
|
493
|
-
|
|
494
|
-
# Method 2: Service Account (more complex, supports additional features)
|
|
495
|
-
if self.config.google_application_credentials:
|
|
496
|
-
creds_path = self.config.google_application_credentials
|
|
497
|
-
if os.path.exists(creds_path):
|
|
498
|
-
try:
|
|
499
|
-
credentials = service_account.Credentials.from_service_account_file(
|
|
500
|
-
creds_path,
|
|
501
|
-
scopes=['https://www.googleapis.com/auth/cse']
|
|
502
|
-
)
|
|
503
|
-
self._credentials = credentials
|
|
504
|
-
self._service = build(
|
|
505
|
-
'customsearch',
|
|
506
|
-
'v1',
|
|
507
|
-
credentials=credentials,
|
|
508
|
-
cache_discovery=False
|
|
509
|
-
)
|
|
510
|
-
self.logger.info("Initialized Google Custom Search with service account")
|
|
511
|
-
return
|
|
512
|
-
except Exception as e:
|
|
513
|
-
self.logger.warning(f"Failed to initialize with service account: {e}")
|
|
514
|
-
|
|
515
|
-
raise AuthenticationError(
|
|
516
|
-
"No valid Google API credentials found. Please set either:\n"
|
|
517
|
-
"1. GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables, or\n"
|
|
518
|
-
"2. GOOGLE_APPLICATION_CREDENTIALS pointing to service account JSON file"
|
|
519
|
-
)
|
|
520
|
-
|
|
521
|
-
def _execute_search(
|
|
522
|
-
self,
|
|
523
|
-
query: str,
|
|
524
|
-
num_results: int = 10,
|
|
525
|
-
start_index: int = 1,
|
|
526
|
-
**kwargs
|
|
527
|
-
) -> Dict[str, Any]:
|
|
528
|
-
"""
|
|
529
|
-
Execute a search request with retry logic.
|
|
530
|
-
|
|
531
|
-
Args:
|
|
532
|
-
query: Search query
|
|
533
|
-
num_results: Number of results to return
|
|
534
|
-
start_index: Starting index for pagination
|
|
535
|
-
**kwargs: Additional search parameters
|
|
536
|
-
|
|
537
|
-
Returns:
|
|
538
|
-
Search results dictionary
|
|
539
|
-
|
|
540
|
-
Raises:
|
|
541
|
-
SearchAPIError: If search fails
|
|
542
|
-
RateLimitError: If rate limit is exceeded
|
|
543
|
-
CircuitBreakerOpenError: If circuit breaker is open
|
|
544
|
-
"""
|
|
545
|
-
# Check rate limit
|
|
546
|
-
self.rate_limiter.acquire()
|
|
547
|
-
|
|
548
|
-
# Prepare search parameters
|
|
549
|
-
search_params = {
|
|
550
|
-
'q': query,
|
|
551
|
-
'cx': self.config.google_cse_id,
|
|
552
|
-
'num': min(num_results, 10), # Google limits to 10 per request
|
|
553
|
-
'start': start_index,
|
|
554
|
-
**kwargs
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
# Execute with circuit breaker protection
|
|
558
|
-
def _do_search():
|
|
559
|
-
try:
|
|
560
|
-
self.metrics['total_requests'] += 1
|
|
561
|
-
result = self._service.cse().list(**search_params).execute()
|
|
562
|
-
self.metrics['successful_requests'] += 1
|
|
563
|
-
return result
|
|
564
|
-
except HttpError as e:
|
|
565
|
-
self.metrics['failed_requests'] += 1
|
|
566
|
-
if e.resp.status == 429:
|
|
567
|
-
raise QuotaExceededError(f"API quota exceeded: {e}")
|
|
568
|
-
elif e.resp.status == 403:
|
|
569
|
-
raise AuthenticationError(f"Authentication failed: {e}")
|
|
570
|
-
else:
|
|
571
|
-
raise SearchAPIError(f"Search API error: {e}")
|
|
572
|
-
except Exception as e:
|
|
573
|
-
self.metrics['failed_requests'] += 1
|
|
574
|
-
raise SearchAPIError(f"Unexpected error: {e}")
|
|
575
|
-
|
|
576
|
-
try:
|
|
577
|
-
return self.circuit_breaker.call(_do_search)
|
|
578
|
-
except CircuitBreakerOpenError as e:
|
|
579
|
-
self.metrics['circuit_breaker_trips'] += 1
|
|
580
|
-
raise e
|
|
581
|
-
|
|
582
|
-
def _retry_with_backoff(self, func, *args, **kwargs) -> Any:
|
|
583
|
-
"""
|
|
584
|
-
Execute function with exponential backoff retry logic.
|
|
585
|
-
|
|
586
|
-
Args:
|
|
587
|
-
func: Function to execute
|
|
588
|
-
*args: Positional arguments
|
|
589
|
-
**kwargs: Keyword arguments
|
|
590
|
-
|
|
591
|
-
Returns:
|
|
592
|
-
Function result
|
|
593
|
-
|
|
594
|
-
Raises:
|
|
595
|
-
Exception: Last exception if all retries fail
|
|
596
|
-
"""
|
|
597
|
-
last_exception = None
|
|
598
|
-
|
|
599
|
-
for attempt in range(self.config.retry_attempts):
|
|
600
|
-
try:
|
|
601
|
-
return func(*args, **kwargs)
|
|
602
|
-
except (RateLimitError, CircuitBreakerOpenError) as e:
|
|
603
|
-
# Don't retry rate limit or circuit breaker errors
|
|
604
|
-
raise e
|
|
605
|
-
except Exception as e:
|
|
606
|
-
last_exception = e
|
|
607
|
-
if attempt < self.config.retry_attempts - 1:
|
|
608
|
-
wait_time = self.config.retry_backoff ** attempt
|
|
609
|
-
self.logger.warning(
|
|
610
|
-
f"Attempt {attempt + 1} failed: {e}. "
|
|
611
|
-
f"Retrying in {wait_time}s..."
|
|
612
|
-
)
|
|
613
|
-
time.sleep(wait_time)
|
|
614
|
-
else:
|
|
615
|
-
self.logger.error(f"All {self.config.retry_attempts} attempts failed")
|
|
616
|
-
|
|
617
|
-
raise last_exception
|
|
618
|
-
|
|
619
|
-
def _parse_search_results(self, raw_results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
620
|
-
"""
|
|
621
|
-
Parse and normalize search results.
|
|
622
|
-
|
|
623
|
-
Args:
|
|
624
|
-
raw_results: Raw API response
|
|
625
|
-
|
|
626
|
-
Returns:
|
|
627
|
-
List of normalized result dictionaries
|
|
628
|
-
"""
|
|
629
|
-
items = raw_results.get('items', [])
|
|
630
|
-
results = []
|
|
631
|
-
|
|
632
|
-
for item in items:
|
|
633
|
-
result = {
|
|
634
|
-
'title': item.get('title', ''),
|
|
635
|
-
'link': item.get('link', ''),
|
|
636
|
-
'snippet': item.get('snippet', ''),
|
|
637
|
-
'displayLink': item.get('displayLink', ''),
|
|
638
|
-
'formattedUrl': item.get('formattedUrl', ''),
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
# Add image-specific metadata if present
|
|
642
|
-
if 'image' in item:
|
|
643
|
-
result['image'] = {
|
|
644
|
-
'contextLink': item['image'].get('contextLink', ''),
|
|
645
|
-
'height': item['image'].get('height', 0),
|
|
646
|
-
'width': item['image'].get('width', 0),
|
|
647
|
-
'byteSize': item['image'].get('byteSize', 0),
|
|
648
|
-
'thumbnailLink': item['image'].get('thumbnailLink', '')
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
# Add page metadata if present
|
|
652
|
-
if 'pagemap' in item:
|
|
653
|
-
result['metadata'] = item['pagemap']
|
|
654
|
-
|
|
655
|
-
results.append(result)
|
|
656
|
-
|
|
657
|
-
return results
|
|
658
|
-
|
|
659
|
-
# ========================================================================
|
|
660
|
-
# Core Search Methods
|
|
661
|
-
# ========================================================================
|
|
662
|
-
|
|
663
|
-
def search_web(
|
|
664
|
-
self,
|
|
665
|
-
query: str,
|
|
666
|
-
num_results: int = 10,
|
|
667
|
-
start_index: int = 1,
|
|
668
|
-
language: str = "en",
|
|
669
|
-
country: str = "us",
|
|
670
|
-
safe_search: str = "medium",
|
|
671
|
-
date_restrict: Optional[str] = None,
|
|
672
|
-
file_type: Optional[str] = None,
|
|
673
|
-
exclude_terms: Optional[str] = None
|
|
674
|
-
) -> List[Dict[str, Any]]:
|
|
675
|
-
"""
|
|
676
|
-
Search the web using Google Custom Search API.
|
|
677
|
-
|
|
678
|
-
Args:
|
|
679
|
-
query: Search query string
|
|
680
|
-
num_results: Number of results to return (max 10 per request)
|
|
681
|
-
start_index: Starting index for pagination (1-based)
|
|
682
|
-
language: Language code for results (e.g., 'en', 'zh-CN')
|
|
683
|
-
country: Country code for results (e.g., 'us', 'cn')
|
|
684
|
-
safe_search: Safe search level ('off', 'medium', 'high')
|
|
685
|
-
date_restrict: Restrict results by date (e.g., 'd5' for last 5 days)
|
|
686
|
-
file_type: Filter by file type (e.g., 'pdf', 'doc')
|
|
687
|
-
exclude_terms: Terms to exclude from results
|
|
688
|
-
|
|
689
|
-
Returns:
|
|
690
|
-
List of search result dictionaries with title, link, snippet, etc.
|
|
691
|
-
|
|
692
|
-
Raises:
|
|
693
|
-
ValidationError: If query is invalid
|
|
694
|
-
SearchAPIError: If search fails
|
|
695
|
-
RateLimitError: If rate limit is exceeded
|
|
696
|
-
|
|
697
|
-
Examples:
|
|
698
|
-
>>> tool = SearchTool()
|
|
699
|
-
>>> results = tool.search_web("artificial intelligence", num_results=5)
|
|
700
|
-
>>> print(results[0]['title'])
|
|
701
|
-
"""
|
|
702
|
-
if not query or not query.strip():
|
|
703
|
-
raise ValidationError("Query cannot be empty")
|
|
704
|
-
|
|
705
|
-
if num_results < 1 or num_results > 100:
|
|
706
|
-
raise ValidationError("num_results must be between 1 and 100")
|
|
707
|
-
|
|
708
|
-
search_params = {
|
|
709
|
-
'lr': f'lang_{language}',
|
|
710
|
-
'cr': f'country{country.upper()}',
|
|
711
|
-
'safe': safe_search,
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
if date_restrict:
|
|
715
|
-
search_params['dateRestrict'] = date_restrict
|
|
716
|
-
|
|
717
|
-
if file_type:
|
|
718
|
-
search_params['fileType'] = file_type
|
|
719
|
-
|
|
720
|
-
if exclude_terms:
|
|
721
|
-
query = f"{query} -{exclude_terms}"
|
|
722
|
-
|
|
723
|
-
raw_results = self._retry_with_backoff(
|
|
724
|
-
self._execute_search,
|
|
725
|
-
query,
|
|
726
|
-
num_results,
|
|
727
|
-
start_index,
|
|
728
|
-
**search_params
|
|
729
|
-
)
|
|
730
|
-
|
|
731
|
-
return self._parse_search_results(raw_results)
|
|
732
|
-
|
|
733
|
-
def search_images(
|
|
734
|
-
self,
|
|
735
|
-
query: str,
|
|
736
|
-
num_results: int = 10,
|
|
737
|
-
image_size: Optional[str] = None,
|
|
738
|
-
image_type: Optional[str] = None,
|
|
739
|
-
image_color_type: Optional[str] = None,
|
|
740
|
-
safe_search: str = "medium"
|
|
741
|
-
) -> List[Dict[str, Any]]:
|
|
742
|
-
"""
|
|
743
|
-
Search for images using Google Custom Search API.
|
|
744
|
-
|
|
745
|
-
Args:
|
|
746
|
-
query: Search query string
|
|
747
|
-
num_results: Number of results to return (max 10 per request)
|
|
748
|
-
image_size: Image size filter ('icon', 'small', 'medium', 'large', 'xlarge', 'xxlarge', 'huge')
|
|
749
|
-
image_type: Image type filter ('clipart', 'face', 'lineart', 'stock', 'photo', 'animated')
|
|
750
|
-
image_color_type: Color type filter ('color', 'gray', 'mono', 'trans')
|
|
751
|
-
safe_search: Safe search level ('off', 'medium', 'high')
|
|
752
|
-
|
|
753
|
-
Returns:
|
|
754
|
-
List of image result dictionaries with URL, thumbnail, dimensions, etc.
|
|
755
|
-
|
|
756
|
-
Raises:
|
|
757
|
-
ValidationError: If query is invalid
|
|
758
|
-
SearchAPIError: If search fails
|
|
759
|
-
|
|
760
|
-
Examples:
|
|
761
|
-
>>> tool = SearchTool()
|
|
762
|
-
>>> results = tool.search_images("sunset beach", num_results=5, image_size="large")
|
|
763
|
-
>>> print(results[0]['link'])
|
|
764
|
-
"""
|
|
765
|
-
if not query or not query.strip():
|
|
766
|
-
raise ValidationError("Query cannot be empty")
|
|
767
|
-
|
|
768
|
-
search_params = {
|
|
769
|
-
'searchType': 'image',
|
|
770
|
-
'safe': safe_search,
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
if image_size:
|
|
774
|
-
search_params['imgSize'] = image_size
|
|
775
|
-
|
|
776
|
-
if image_type:
|
|
777
|
-
search_params['imgType'] = image_type
|
|
778
|
-
|
|
779
|
-
if image_color_type:
|
|
780
|
-
search_params['imgColorType'] = image_color_type
|
|
781
|
-
|
|
782
|
-
raw_results = self._retry_with_backoff(
|
|
783
|
-
self._execute_search,
|
|
784
|
-
query,
|
|
785
|
-
num_results,
|
|
786
|
-
1,
|
|
787
|
-
**search_params
|
|
788
|
-
)
|
|
789
|
-
|
|
790
|
-
return self._parse_search_results(raw_results)
|
|
791
|
-
|
|
792
|
-
def search_news(
|
|
793
|
-
self,
|
|
794
|
-
query: str,
|
|
795
|
-
num_results: int = 10,
|
|
796
|
-
start_index: int = 1,
|
|
797
|
-
language: str = "en",
|
|
798
|
-
date_restrict: Optional[str] = None,
|
|
799
|
-
sort_by: str = "date"
|
|
800
|
-
) -> List[Dict[str, Any]]:
|
|
801
|
-
"""
|
|
802
|
-
Search for news articles using Google Custom Search API.
|
|
803
|
-
|
|
804
|
-
Args:
|
|
805
|
-
query: Search query string
|
|
806
|
-
num_results: Number of results to return (max 10 per request)
|
|
807
|
-
start_index: Starting index for pagination (1-based)
|
|
808
|
-
language: Language code for results (e.g., 'en', 'zh-CN')
|
|
809
|
-
date_restrict: Restrict results by date (e.g., 'd5' for last 5 days, 'w2' for last 2 weeks)
|
|
810
|
-
sort_by: Sort order ('date' or 'relevance')
|
|
811
|
-
|
|
812
|
-
Returns:
|
|
813
|
-
List of news article dictionaries with title, link, snippet, date, etc.
|
|
814
|
-
|
|
815
|
-
Raises:
|
|
816
|
-
ValidationError: If query is invalid
|
|
817
|
-
SearchAPIError: If search fails
|
|
818
|
-
|
|
819
|
-
Examples:
|
|
820
|
-
>>> tool = SearchTool()
|
|
821
|
-
>>> results = tool.search_news("climate change", date_restrict="w1")
|
|
822
|
-
>>> print(results[0]['title'])
|
|
823
|
-
"""
|
|
824
|
-
if not query or not query.strip():
|
|
825
|
-
raise ValidationError("Query cannot be empty")
|
|
826
|
-
|
|
827
|
-
# Add "news" to the query to prioritize news sources
|
|
828
|
-
news_query = f"{query} news"
|
|
829
|
-
|
|
830
|
-
search_params = {
|
|
831
|
-
'lr': f'lang_{language}',
|
|
832
|
-
'sort': sort_by if sort_by == 'date' else '',
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
if date_restrict:
|
|
836
|
-
search_params['dateRestrict'] = date_restrict
|
|
837
|
-
|
|
838
|
-
raw_results = self._retry_with_backoff(
|
|
839
|
-
self._execute_search,
|
|
840
|
-
news_query,
|
|
841
|
-
num_results,
|
|
842
|
-
start_index,
|
|
843
|
-
**search_params
|
|
844
|
-
)
|
|
845
|
-
|
|
846
|
-
return self._parse_search_results(raw_results)
|
|
847
|
-
|
|
848
|
-
def search_videos(
|
|
849
|
-
self,
|
|
850
|
-
query: str,
|
|
851
|
-
num_results: int = 10,
|
|
852
|
-
start_index: int = 1,
|
|
853
|
-
language: str = "en",
|
|
854
|
-
safe_search: str = "medium"
|
|
855
|
-
) -> List[Dict[str, Any]]:
|
|
856
|
-
"""
|
|
857
|
-
Search for videos using Google Custom Search API.
|
|
858
|
-
|
|
859
|
-
Args:
|
|
860
|
-
query: Search query string
|
|
861
|
-
num_results: Number of results to return (max 10 per request)
|
|
862
|
-
start_index: Starting index for pagination (1-based)
|
|
863
|
-
language: Language code for results (e.g., 'en', 'zh-CN')
|
|
864
|
-
safe_search: Safe search level ('off', 'medium', 'high')
|
|
865
|
-
|
|
866
|
-
Returns:
|
|
867
|
-
List of video result dictionaries with title, link, snippet, etc.
|
|
868
|
-
|
|
869
|
-
Raises:
|
|
870
|
-
ValidationError: If query is invalid
|
|
871
|
-
SearchAPIError: If search fails
|
|
872
|
-
|
|
873
|
-
Examples:
|
|
874
|
-
>>> tool = SearchTool()
|
|
875
|
-
>>> results = tool.search_videos("python tutorial", num_results=5)
|
|
876
|
-
>>> print(results[0]['title'])
|
|
877
|
-
"""
|
|
878
|
-
if not query or not query.strip():
|
|
879
|
-
raise ValidationError("Query cannot be empty")
|
|
880
|
-
|
|
881
|
-
# Add file type filter for video content
|
|
882
|
-
video_query = f"{query} filetype:mp4 OR filetype:webm OR filetype:mov"
|
|
883
|
-
|
|
884
|
-
search_params = {
|
|
885
|
-
'lr': f'lang_{language}',
|
|
886
|
-
'safe': safe_search,
|
|
887
|
-
}
|
|
888
|
-
|
|
889
|
-
raw_results = self._retry_with_backoff(
|
|
890
|
-
self._execute_search,
|
|
891
|
-
video_query,
|
|
892
|
-
num_results,
|
|
893
|
-
start_index,
|
|
894
|
-
**search_params
|
|
895
|
-
)
|
|
896
|
-
|
|
897
|
-
return self._parse_search_results(raw_results)
|
|
898
|
-
|
|
899
|
-
# ========================================================================
|
|
900
|
-
# Advanced Features
|
|
901
|
-
# ========================================================================
|
|
902
|
-
|
|
903
|
-
def search_paginated(
|
|
904
|
-
self,
|
|
905
|
-
query: str,
|
|
906
|
-
total_results: int,
|
|
907
|
-
search_type: str = "web",
|
|
908
|
-
**kwargs
|
|
909
|
-
) -> List[Dict[str, Any]]:
|
|
910
|
-
"""
|
|
911
|
-
Perform paginated search to retrieve more than 10 results.
|
|
912
|
-
|
|
913
|
-
Google Custom Search API limits each request to 10 results. This method
|
|
914
|
-
automatically handles pagination to retrieve larger result sets.
|
|
915
|
-
|
|
916
|
-
Args:
|
|
917
|
-
query: Search query string
|
|
918
|
-
total_results: Total number of results to retrieve
|
|
919
|
-
search_type: Type of search ('web', 'image', 'news', 'video')
|
|
920
|
-
**kwargs: Additional search parameters for the specific search type
|
|
921
|
-
|
|
922
|
-
Returns:
|
|
923
|
-
List of all search results combined from multiple pages
|
|
924
|
-
|
|
925
|
-
Raises:
|
|
926
|
-
ValidationError: If parameters are invalid
|
|
927
|
-
SearchAPIError: If search fails
|
|
928
|
-
|
|
929
|
-
Examples:
|
|
930
|
-
>>> tool = SearchTool()
|
|
931
|
-
>>> results = tool.search_paginated("machine learning", total_results=25)
|
|
932
|
-
>>> len(results)
|
|
933
|
-
25
|
|
934
|
-
"""
|
|
935
|
-
if total_results < 1 or total_results > 100:
|
|
936
|
-
raise ValidationError("total_results must be between 1 and 100")
|
|
937
|
-
|
|
938
|
-
# Select search method based on type
|
|
939
|
-
search_methods = {
|
|
940
|
-
'web': self.search_web,
|
|
941
|
-
'image': self.search_images,
|
|
942
|
-
'news': self.search_news,
|
|
943
|
-
'video': self.search_videos,
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
if search_type not in search_methods:
|
|
947
|
-
raise ValidationError(f"Invalid search_type: {search_type}")
|
|
948
|
-
|
|
949
|
-
search_method = search_methods[search_type]
|
|
950
|
-
all_results = []
|
|
951
|
-
|
|
952
|
-
# Calculate number of pages needed
|
|
953
|
-
results_per_page = 10
|
|
954
|
-
num_pages = (total_results + results_per_page - 1) // results_per_page
|
|
955
|
-
|
|
956
|
-
for page in range(num_pages):
|
|
957
|
-
start_index = page * results_per_page + 1
|
|
958
|
-
page_size = min(results_per_page, total_results - len(all_results))
|
|
959
|
-
|
|
960
|
-
try:
|
|
961
|
-
page_results = search_method(
|
|
962
|
-
query=query,
|
|
963
|
-
num_results=page_size,
|
|
964
|
-
start_index=start_index,
|
|
965
|
-
**kwargs
|
|
966
|
-
)
|
|
967
|
-
all_results.extend(page_results)
|
|
968
|
-
|
|
969
|
-
if len(all_results) >= total_results:
|
|
970
|
-
break
|
|
971
|
-
|
|
972
|
-
except QuotaExceededError:
|
|
973
|
-
self.logger.warning(
|
|
974
|
-
f"Quota exceeded after {len(all_results)} results"
|
|
975
|
-
)
|
|
976
|
-
break
|
|
977
|
-
|
|
978
|
-
return all_results[:total_results]
|
|
979
|
-
|
|
980
|
-
async def search_batch(
|
|
981
|
-
self,
|
|
982
|
-
queries: List[str],
|
|
983
|
-
search_type: str = "web",
|
|
984
|
-
num_results: int = 10
|
|
985
|
-
) -> Dict[str, List[Dict[str, Any]]]:
|
|
986
|
-
"""
|
|
987
|
-
Execute multiple search queries in batch with async execution.
|
|
988
|
-
|
|
989
|
-
Args:
|
|
990
|
-
queries: List of search query strings
|
|
991
|
-
search_type: Type of search ('web', 'image', 'news', 'video')
|
|
992
|
-
num_results: Number of results per query
|
|
993
|
-
|
|
994
|
-
Returns:
|
|
995
|
-
Dictionary mapping queries to their search results
|
|
996
|
-
|
|
997
|
-
Raises:
|
|
998
|
-
ValidationError: If parameters are invalid
|
|
999
|
-
|
|
1000
|
-
Examples:
|
|
1001
|
-
>>> tool = SearchTool()
|
|
1002
|
-
>>> queries = ["AI", "machine learning", "deep learning"]
|
|
1003
|
-
>>> results = await tool.search_batch(queries, num_results=5)
|
|
1004
|
-
>>> print(results["AI"][0]['title'])
|
|
1005
|
-
"""
|
|
1006
|
-
if not queries:
|
|
1007
|
-
raise ValidationError("queries list cannot be empty")
|
|
1008
|
-
|
|
1009
|
-
# Select search method
|
|
1010
|
-
search_methods = {
|
|
1011
|
-
'web': self.search_web,
|
|
1012
|
-
'image': self.search_images,
|
|
1013
|
-
'news': self.search_news,
|
|
1014
|
-
'video': self.search_videos,
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
if search_type not in search_methods:
|
|
1018
|
-
raise ValidationError(f"Invalid search_type: {search_type}")
|
|
1019
|
-
|
|
1020
|
-
search_method = search_methods[search_type]
|
|
1021
|
-
|
|
1022
|
-
# Execute searches in parallel using asyncio
|
|
1023
|
-
async def _search_async(query: str):
|
|
1024
|
-
loop = asyncio.get_event_loop()
|
|
1025
|
-
return await loop.run_in_executor(
|
|
1026
|
-
None,
|
|
1027
|
-
search_method,
|
|
1028
|
-
query,
|
|
1029
|
-
num_results
|
|
1030
|
-
)
|
|
1031
|
-
|
|
1032
|
-
tasks = [_search_async(query) for query in queries]
|
|
1033
|
-
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
1034
|
-
|
|
1035
|
-
# Build results dictionary
|
|
1036
|
-
results_dict = {}
|
|
1037
|
-
for query, result in zip(queries, results_list):
|
|
1038
|
-
if isinstance(result, Exception):
|
|
1039
|
-
self.logger.error(f"Search failed for query '{query}': {result}")
|
|
1040
|
-
results_dict[query] = []
|
|
1041
|
-
else:
|
|
1042
|
-
results_dict[query] = result
|
|
1043
|
-
|
|
1044
|
-
return results_dict
|
|
1045
|
-
|
|
1046
|
-
# ========================================================================
|
|
1047
|
-
# Utility Methods
|
|
1048
|
-
# ========================================================================
|
|
1049
|
-
|
|
1050
|
-
def validate_credentials(self) -> Dict[str, Any]:
|
|
1051
|
-
"""
|
|
1052
|
-
Validate API credentials by performing a test search.
|
|
1053
|
-
|
|
1054
|
-
Returns:
|
|
1055
|
-
Dictionary with validation status and details
|
|
1056
|
-
|
|
1057
|
-
Examples:
|
|
1058
|
-
>>> tool = SearchTool()
|
|
1059
|
-
>>> status = tool.validate_credentials()
|
|
1060
|
-
>>> print(status['valid'])
|
|
1061
|
-
True
|
|
1062
|
-
"""
|
|
1063
|
-
try:
|
|
1064
|
-
# Perform a minimal test search
|
|
1065
|
-
result = self._execute_search("test", num_results=1)
|
|
1066
|
-
|
|
1067
|
-
return {
|
|
1068
|
-
'valid': True,
|
|
1069
|
-
'method': 'api_key' if self.config.google_api_key else 'service_account',
|
|
1070
|
-
'cse_id': self.config.google_cse_id,
|
|
1071
|
-
'message': 'Credentials are valid and working'
|
|
1072
|
-
}
|
|
1073
|
-
except Exception as e:
|
|
1074
|
-
return {
|
|
1075
|
-
'valid': False,
|
|
1076
|
-
'error': str(e),
|
|
1077
|
-
'message': 'Credentials validation failed'
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
def get_quota_status(self) -> Dict[str, Any]:
|
|
1081
|
-
"""
|
|
1082
|
-
Get current quota and rate limit status.
|
|
1083
|
-
|
|
1084
|
-
Returns:
|
|
1085
|
-
Dictionary with quota information including remaining requests,
|
|
1086
|
-
circuit breaker state, and usage metrics
|
|
1087
|
-
|
|
1088
|
-
Examples:
|
|
1089
|
-
>>> tool = SearchTool()
|
|
1090
|
-
>>> status = tool.get_quota_status()
|
|
1091
|
-
>>> print(f"Remaining quota: {status['remaining_quota']}")
|
|
1092
|
-
"""
|
|
1093
|
-
return {
|
|
1094
|
-
'remaining_quota': self.rate_limiter.get_remaining_quota(),
|
|
1095
|
-
'max_requests': self.config.rate_limit_requests,
|
|
1096
|
-
'time_window_seconds': self.config.rate_limit_window,
|
|
1097
|
-
'circuit_breaker_state': self.circuit_breaker.get_state(),
|
|
1098
|
-
'metrics': self.metrics.copy()
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
def get_metrics(self) -> Dict[str, Any]:
|
|
1102
|
-
"""
|
|
1103
|
-
Get detailed metrics about tool usage.
|
|
1104
|
-
|
|
1105
|
-
Returns:
|
|
1106
|
-
Dictionary with usage metrics
|
|
1107
|
-
|
|
1108
|
-
Examples:
|
|
1109
|
-
>>> tool = SearchTool()
|
|
1110
|
-
>>> metrics = tool.get_metrics()
|
|
1111
|
-
>>> print(f"Success rate: {metrics['success_rate']:.2%}")
|
|
1112
|
-
"""
|
|
1113
|
-
total = self.metrics['total_requests']
|
|
1114
|
-
success_rate = (
|
|
1115
|
-
self.metrics['successful_requests'] / total if total > 0 else 0
|
|
1116
|
-
)
|
|
1117
|
-
|
|
1118
|
-
return {
|
|
1119
|
-
**self.metrics,
|
|
1120
|
-
'success_rate': success_rate,
|
|
1121
|
-
'circuit_breaker_state': self.circuit_breaker.get_state(),
|
|
1122
|
-
'remaining_quota': self.rate_limiter.get_remaining_quota()
|
|
1123
|
-
}
|