aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,123 @@
1
+ """
2
+ Result Deduplication
3
+
4
+ This module handles detection and removal of duplicate and highly similar
5
+ search results.
6
+ """
7
+
8
+ import hashlib
9
+ from typing import Any, Dict, List
10
+ from urllib.parse import urlparse, urlunparse
11
+
12
+
13
+ class ResultDeduplicator:
14
+ """Removes duplicate and similar search results"""
15
+
16
+ def deduplicate_results(
17
+ self,
18
+ results: List[Dict[str, Any]],
19
+ similarity_threshold: float = 0.85
20
+ ) -> List[Dict[str, Any]]:
21
+ """
22
+ Remove duplicate and highly similar results.
23
+
24
+ Args:
25
+ results: List of search results
26
+ similarity_threshold: Similarity threshold (0-1) for considering results as duplicates
27
+
28
+ Returns:
29
+ Deduplicated list of results
30
+ """
31
+ if not results:
32
+ return []
33
+
34
+ unique_results = []
35
+ seen_urls = set()
36
+ seen_content_hashes = set()
37
+
38
+ for result in results:
39
+ url = result.get('link', '')
40
+
41
+ # 1. URL deduplication (normalized)
42
+ normalized_url = self._normalize_url(url)
43
+ if normalized_url in seen_urls:
44
+ continue
45
+
46
+ # 2. Content similarity deduplication
47
+ content_hash = self._calculate_content_hash(
48
+ result.get('title', ''),
49
+ result.get('snippet', '')
50
+ )
51
+
52
+ # Check for high similarity with existing results
53
+ is_duplicate = False
54
+ for seen_hash in seen_content_hashes:
55
+ similarity = self._calculate_similarity(content_hash, seen_hash)
56
+ if similarity > similarity_threshold:
57
+ is_duplicate = True
58
+ break
59
+
60
+ if is_duplicate:
61
+ continue
62
+
63
+ # Add to unique results
64
+ unique_results.append(result)
65
+ seen_urls.add(normalized_url)
66
+ seen_content_hashes.add(content_hash)
67
+
68
+ return unique_results
69
+
70
+ def _normalize_url(self, url: str) -> str:
71
+ """
72
+ Normalize URL by removing query parameters and fragments.
73
+
74
+ Args:
75
+ url: URL to normalize
76
+
77
+ Returns:
78
+ Normalized URL
79
+ """
80
+ try:
81
+ parsed = urlparse(url)
82
+ # Keep only scheme, netloc, and path
83
+ normalized = urlunparse((
84
+ parsed.scheme,
85
+ parsed.netloc.lower(),
86
+ parsed.path.rstrip('/'),
87
+ '', '', '' # Remove params, query, fragment
88
+ ))
89
+ return normalized
90
+ except Exception:
91
+ return url.lower()
92
+
93
+ def _calculate_content_hash(self, title: str, snippet: str) -> str:
94
+ """
95
+ Calculate content hash for similarity detection.
96
+
97
+ Args:
98
+ title: Result title
99
+ snippet: Result snippet
100
+
101
+ Returns:
102
+ Content hash string
103
+ """
104
+ content = f"{title.lower()} {snippet.lower()}"
105
+ # Remove punctuation and normalize whitespace
106
+ content = ''.join(c for c in content if c.isalnum() or c.isspace())
107
+ content = ' '.join(content.split())
108
+ return hashlib.md5(content.encode()).hexdigest()
109
+
110
+ def _calculate_similarity(self, hash1: str, hash2: str) -> float:
111
+ """
112
+ Calculate similarity between two content hashes.
113
+
114
+ Args:
115
+ hash1: First content hash
116
+ hash2: Second content hash
117
+
118
+ Returns:
119
+ Similarity score (0-1)
120
+ """
121
+ # Exact hash match
122
+ return 1.0 if hash1 == hash2 else 0.0
123
+
@@ -0,0 +1,257 @@
1
+ """
2
+ Agent-Friendly Error Handling
3
+
4
+ This module formats errors in an agent-friendly way with clear messages,
5
+ suggested actions, and alternative approaches.
6
+ """
7
+
8
+ from typing import Any, Dict, List
9
+
10
+ from .constants import (
11
+ QuotaExceededError,
12
+ AuthenticationError,
13
+ RateLimitError,
14
+ CircuitBreakerOpenError,
15
+ ValidationError,
16
+ SearchAPIError
17
+ )
18
+
19
+
20
+ class AgentFriendlyErrorHandler:
21
+ """Formats errors for agent consumption with actionable suggestions"""
22
+
23
+ def format_error_for_agent(
24
+ self,
25
+ error: Exception,
26
+ context: Dict[str, Any]
27
+ ) -> Dict[str, Any]:
28
+ """
29
+ Format error for agent-friendly consumption.
30
+
31
+ Args:
32
+ error: The exception that occurred
33
+ context: Context information (circuit breaker timeout, etc.)
34
+
35
+ Returns:
36
+ Structured error information dictionary
37
+ """
38
+ error_response = {
39
+ 'error_type': 'unknown',
40
+ 'severity': 'medium',
41
+ 'user_message': '',
42
+ 'technical_details': str(error),
43
+ 'suggested_actions': [],
44
+ 'alternative_approaches': [],
45
+ 'can_retry': False,
46
+ 'estimated_recovery_time': None
47
+ }
48
+
49
+ error_str = str(error).lower()
50
+ error_type = type(error).__name__
51
+
52
+ # Handle specific error types
53
+ if isinstance(error, QuotaExceededError) or 'quota' in error_str or 'rate limit' in error_str:
54
+ self._handle_quota_exceeded(error_response)
55
+
56
+ elif isinstance(error, AuthenticationError) or 'auth' in error_str or 'credential' in error_str:
57
+ self._handle_authentication_error(error_response)
58
+
59
+ elif isinstance(error, RateLimitError):
60
+ self._handle_rate_limit_error(error_response)
61
+
62
+ elif isinstance(error, CircuitBreakerOpenError) or 'circuit breaker' in error_str:
63
+ self._handle_circuit_breaker_error(error_response, context)
64
+
65
+ elif isinstance(error, ValidationError) or 'invalid' in error_str or 'validation' in error_str:
66
+ self._handle_validation_error(error_response)
67
+
68
+ elif 'timeout' in error_str or 'connection' in error_str or 'network' in error_str:
69
+ self._handle_network_error(error_response)
70
+
71
+ elif 'no results' in error_str or 'not found' in error_str:
72
+ self._handle_no_results(error_response)
73
+
74
+ else:
75
+ # Generic error handling
76
+ error_response.update({
77
+ 'error_type': error_type,
78
+ 'severity': 'medium',
79
+ 'user_message': f'An unexpected error occurred: {str(error)}',
80
+ 'suggested_actions': [
81
+ 'Check your query parameters',
82
+ 'Try simplifying the query',
83
+ 'Retry the operation'
84
+ ],
85
+ 'can_retry': True
86
+ })
87
+
88
+ return error_response
89
+
90
+ def _handle_quota_exceeded(self, response: Dict[str, Any]):
91
+ """Handle quota exceeded errors"""
92
+ response.update({
93
+ 'error_type': 'quota_exceeded',
94
+ 'severity': 'high',
95
+ 'user_message': (
96
+ 'Search API quota has been exceeded. '
97
+ 'The service has temporarily reached its usage limit.'
98
+ ),
99
+ 'suggested_actions': [
100
+ 'Wait 60-120 seconds before retrying',
101
+ 'Reduce the number of results requested',
102
+ 'Use more specific queries to get better results with fewer searches',
103
+ 'Check if cached results are available'
104
+ ],
105
+ 'alternative_approaches': [
106
+ 'Use the scraper tool to extract information from known URLs',
107
+ 'Query specific authoritative domains using site: operator',
108
+ 'Defer non-urgent searches to later'
109
+ ],
110
+ 'can_retry': True,
111
+ 'estimated_recovery_time': '1-2 minutes'
112
+ })
113
+
114
+ def _handle_authentication_error(self, response: Dict[str, Any]):
115
+ """Handle authentication errors"""
116
+ response.update({
117
+ 'error_type': 'authentication_failed',
118
+ 'severity': 'high',
119
+ 'user_message': (
120
+ 'Search API authentication failed. '
121
+ 'The API credentials may be invalid or expired.'
122
+ ),
123
+ 'suggested_actions': [
124
+ 'Verify that GOOGLE_API_KEY is set correctly in environment',
125
+ 'Check that GOOGLE_CSE_ID is valid',
126
+ 'Ensure API key has not expired',
127
+ 'Verify API key has Custom Search API enabled'
128
+ ],
129
+ 'alternative_approaches': [
130
+ 'Use alternative data sources (apisource_tool)',
131
+ 'Request manual search from user'
132
+ ],
133
+ 'can_retry': False,
134
+ 'estimated_recovery_time': None
135
+ })
136
+
137
+ def _handle_rate_limit_error(self, response: Dict[str, Any]):
138
+ """Handle rate limit errors"""
139
+ response.update({
140
+ 'error_type': 'rate_limit_exceeded',
141
+ 'severity': 'medium',
142
+ 'user_message': (
143
+ 'Rate limit has been exceeded. '
144
+ 'Too many requests in a short time period.'
145
+ ),
146
+ 'suggested_actions': [
147
+ 'Wait for the suggested time before retrying',
148
+ 'Reduce request frequency',
149
+ 'Use cached results when available',
150
+ 'Batch similar queries together'
151
+ ],
152
+ 'alternative_approaches': [
153
+ 'Use cached or historical data',
154
+ 'Prioritize critical searches'
155
+ ],
156
+ 'can_retry': True,
157
+ 'estimated_recovery_time': 'As indicated in error message'
158
+ })
159
+
160
+ def _handle_circuit_breaker_error(
161
+ self,
162
+ response: Dict[str, Any],
163
+ context: Dict[str, Any]
164
+ ):
165
+ """Handle circuit breaker open errors"""
166
+ timeout = context.get('circuit_breaker_timeout', 60)
167
+
168
+ response.update({
169
+ 'error_type': 'circuit_breaker_open',
170
+ 'severity': 'high',
171
+ 'user_message': (
172
+ 'Search service is temporarily unavailable due to repeated failures. '
173
+ 'The circuit breaker has been triggered for protection.'
174
+ ),
175
+ 'suggested_actions': [
176
+ f'Wait {timeout} seconds for circuit to reset',
177
+ 'Check search service status',
178
+ 'Review recent error logs'
179
+ ],
180
+ 'alternative_approaches': [
181
+ 'Use alternative data sources',
182
+ 'Defer search to later',
183
+ 'Use cached or historical data'
184
+ ],
185
+ 'can_retry': True,
186
+ 'estimated_recovery_time': f'{timeout} seconds'
187
+ })
188
+
189
+ def _handle_validation_error(self, response: Dict[str, Any]):
190
+ """Handle validation errors"""
191
+ response.update({
192
+ 'error_type': 'invalid_query',
193
+ 'severity': 'low',
194
+ 'user_message': (
195
+ 'The search query or parameters are invalid. '
196
+ 'Please check the query format.'
197
+ ),
198
+ 'suggested_actions': [
199
+ 'Simplify the query - remove special characters',
200
+ 'Check that all parameters are within valid ranges',
201
+ 'Ensure query is not empty',
202
+ 'Review query syntax for search operators'
203
+ ],
204
+ 'alternative_approaches': [
205
+ 'Break complex query into simpler parts',
206
+ 'Use basic search without advanced operators'
207
+ ],
208
+ 'can_retry': True,
209
+ 'estimated_recovery_time': 'immediate (after fixing query)'
210
+ })
211
+
212
+ def _handle_network_error(self, response: Dict[str, Any]):
213
+ """Handle network-related errors"""
214
+ response.update({
215
+ 'error_type': 'network_error',
216
+ 'severity': 'medium',
217
+ 'user_message': (
218
+ 'Network connection to search API failed. '
219
+ 'This is usually a temporary issue.'
220
+ ),
221
+ 'suggested_actions': [
222
+ 'Retry the search in 5-10 seconds',
223
+ 'Check internet connectivity',
224
+ 'Try with a shorter timeout if query is complex'
225
+ ],
226
+ 'alternative_approaches': [
227
+ 'Use cached results if available',
228
+ 'Try alternative search parameters'
229
+ ],
230
+ 'can_retry': True,
231
+ 'estimated_recovery_time': '10-30 seconds'
232
+ })
233
+
234
+ def _handle_no_results(self, response: Dict[str, Any]):
235
+ """Handle no results found"""
236
+ response.update({
237
+ 'error_type': 'no_results',
238
+ 'severity': 'low',
239
+ 'user_message': (
240
+ 'No search results found for the query. '
241
+ 'Try broadening your search terms.'
242
+ ),
243
+ 'suggested_actions': [
244
+ 'Remove overly specific terms',
245
+ 'Try synonyms or related terms',
246
+ 'Remove date restrictions',
247
+ 'Broaden the search scope'
248
+ ],
249
+ 'alternative_approaches': [
250
+ 'Search for related topics',
251
+ 'Try different search engines or sources',
252
+ 'Break down into sub-queries'
253
+ ],
254
+ 'can_retry': True,
255
+ 'estimated_recovery_time': 'immediate (with modified query)'
256
+ })
257
+