aiecs 1.2.2__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (55) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/llm/clients/vertex_client.py +22 -2
  3. aiecs/main.py +2 -2
  4. aiecs/scripts/tools_develop/README.md +111 -2
  5. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  6. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  7. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  8. aiecs/tools/__init__.py +94 -30
  9. aiecs/tools/apisource/__init__.py +106 -0
  10. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  11. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  12. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  13. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  14. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  15. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  16. aiecs/tools/apisource/providers/__init__.py +114 -0
  17. aiecs/tools/apisource/providers/base.py +684 -0
  18. aiecs/tools/apisource/providers/census.py +412 -0
  19. aiecs/tools/apisource/providers/fred.py +575 -0
  20. aiecs/tools/apisource/providers/newsapi.py +402 -0
  21. aiecs/tools/apisource/providers/worldbank.py +346 -0
  22. aiecs/tools/apisource/reliability/__init__.py +14 -0
  23. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  24. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  25. aiecs/tools/apisource/tool.py +814 -0
  26. aiecs/tools/apisource/utils/__init__.py +12 -0
  27. aiecs/tools/apisource/utils/validators.py +343 -0
  28. aiecs/tools/langchain_adapter.py +95 -17
  29. aiecs/tools/search_tool/__init__.py +102 -0
  30. aiecs/tools/search_tool/analyzers.py +583 -0
  31. aiecs/tools/search_tool/cache.py +280 -0
  32. aiecs/tools/search_tool/constants.py +127 -0
  33. aiecs/tools/search_tool/context.py +219 -0
  34. aiecs/tools/search_tool/core.py +773 -0
  35. aiecs/tools/search_tool/deduplicator.py +123 -0
  36. aiecs/tools/search_tool/error_handler.py +257 -0
  37. aiecs/tools/search_tool/metrics.py +375 -0
  38. aiecs/tools/search_tool/rate_limiter.py +177 -0
  39. aiecs/tools/search_tool/schemas.py +297 -0
  40. aiecs/tools/statistics/data_loader_tool.py +2 -2
  41. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  42. aiecs/tools/task_tools/__init__.py +8 -8
  43. aiecs/tools/task_tools/report_tool.py +1 -1
  44. aiecs/tools/tool_executor/__init__.py +2 -0
  45. aiecs/tools/tool_executor/tool_executor.py +284 -14
  46. aiecs/utils/__init__.py +11 -0
  47. aiecs/utils/cache_provider.py +698 -0
  48. aiecs/utils/execution_utils.py +5 -5
  49. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/METADATA +1 -1
  50. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/RECORD +54 -22
  51. aiecs/tools/task_tools/search_tool.py +0 -1123
  52. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/WHEEL +0 -0
  53. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/entry_points.txt +0 -0
  54. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/licenses/LICENSE +0 -0
  55. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,280 @@
1
+ """
2
+ Intelligent Caching with Redis
3
+
4
+ This module implements intelligent caching with intent-aware TTL strategies
5
+ using Redis as the backend.
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ import logging
11
+ from datetime import datetime
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from .constants import QueryIntentType, CacheError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class IntelligentCache:
20
+ """Redis-based intelligent cache with intent-aware TTL"""
21
+
22
+ # TTL strategies by intent type (in seconds)
23
+ TTL_STRATEGIES = {
24
+ QueryIntentType.DEFINITION.value: 86400 * 30, # 30 days (rarely changes)
25
+ QueryIntentType.HOW_TO.value: 86400 * 7, # 7 days
26
+ QueryIntentType.FACTUAL.value: 86400 * 7, # 7 days
27
+ QueryIntentType.ACADEMIC.value: 86400 * 30, # 30 days (papers don't change)
28
+ QueryIntentType.RECENT_NEWS.value: 3600, # 1 hour (fast-changing)
29
+ QueryIntentType.PRODUCT.value: 86400, # 1 day
30
+ QueryIntentType.COMPARISON.value: 86400 * 3, # 3 days
31
+ QueryIntentType.GENERAL.value: 3600 # 1 hour default
32
+ }
33
+
34
+ def __init__(self, redis_client: Optional[Any] = None, enabled: bool = True):
35
+ """
36
+ Initialize intelligent cache.
37
+
38
+ Args:
39
+ redis_client: Redis client instance (optional)
40
+ enabled: Whether caching is enabled
41
+ """
42
+ self.redis_client = redis_client
43
+ self.enabled = enabled and redis_client is not None
44
+ self.cache_prefix = "search_tool:"
45
+
46
+ if not self.enabled:
47
+ logger.info("Intelligent cache is disabled (no Redis client)")
48
+
49
+ async def get(
50
+ self,
51
+ query: str,
52
+ params: Dict[str, Any]
53
+ ) -> Optional[Dict[str, Any]]:
54
+ """
55
+ Get cached search results.
56
+
57
+ Args:
58
+ query: Search query
59
+ params: Search parameters
60
+
61
+ Returns:
62
+ Cached results dictionary or None if not found
63
+ """
64
+ if not self.enabled:
65
+ return None
66
+
67
+ try:
68
+ cache_key = self._generate_cache_key(query, params)
69
+ redis = await self.redis_client.get_client()
70
+ cached_data = await redis.get(cache_key)
71
+
72
+ if cached_data:
73
+ logger.debug(f"Cache hit for query: {query}")
74
+ return json.loads(cached_data)
75
+
76
+ logger.debug(f"Cache miss for query: {query}")
77
+ return None
78
+
79
+ except Exception as e:
80
+ logger.warning(f"Cache get error: {e}")
81
+ return None
82
+
83
+ async def set(
84
+ self,
85
+ query: str,
86
+ params: Dict[str, Any],
87
+ results: List[Dict[str, Any]],
88
+ intent_type: str = QueryIntentType.GENERAL.value,
89
+ metadata: Optional[Dict[str, Any]] = None
90
+ ):
91
+ """
92
+ Cache search results with intelligent TTL.
93
+
94
+ Args:
95
+ query: Search query
96
+ params: Search parameters
97
+ results: Search results to cache
98
+ intent_type: Query intent type for TTL calculation
99
+ metadata: Optional metadata about the search
100
+ """
101
+ if not self.enabled:
102
+ return
103
+
104
+ try:
105
+ cache_key = self._generate_cache_key(query, params)
106
+
107
+ # Calculate intelligent TTL
108
+ ttl = self.calculate_ttl(query, intent_type, results)
109
+
110
+ # Prepare cache data
111
+ cache_data = {
112
+ 'query': query,
113
+ 'params': params,
114
+ 'results': results,
115
+ 'intent_type': intent_type,
116
+ 'metadata': metadata or {},
117
+ 'cached_at': datetime.utcnow().isoformat(),
118
+ 'ttl': ttl
119
+ }
120
+
121
+ # Store in Redis
122
+ redis = await self.redis_client.get_client()
123
+ await redis.set(
124
+ cache_key,
125
+ json.dumps(cache_data),
126
+ ex=ttl
127
+ )
128
+
129
+ logger.debug(f"Cached results for query: {query} (TTL: {ttl}s)")
130
+
131
+ except Exception as e:
132
+ logger.warning(f"Cache set error: {e}")
133
+
134
+ def calculate_ttl(
135
+ self,
136
+ query: str,
137
+ intent_type: str,
138
+ results: List[Dict[str, Any]]
139
+ ) -> int:
140
+ """
141
+ Calculate intelligent TTL based on intent and result quality.
142
+
143
+ Args:
144
+ query: Search query
145
+ intent_type: Query intent type
146
+ results: Search results
147
+
148
+ Returns:
149
+ TTL in seconds
150
+ """
151
+ # Base TTL from intent type
152
+ base_ttl = self.TTL_STRATEGIES.get(
153
+ intent_type,
154
+ self.TTL_STRATEGIES[QueryIntentType.GENERAL.value]
155
+ )
156
+
157
+ if not results:
158
+ # No results: shorter cache time
159
+ return base_ttl // 2
160
+
161
+ # Adjust based on result freshness
162
+ try:
163
+ avg_freshness = sum(
164
+ r.get('_quality', {}).get('freshness_score', 0.5)
165
+ for r in results
166
+ ) / len(results)
167
+
168
+ # Very fresh results can be cached longer
169
+ if avg_freshness > 0.9:
170
+ base_ttl = int(base_ttl * 2)
171
+ # Old results should have shorter cache
172
+ elif avg_freshness < 0.3:
173
+ base_ttl = base_ttl // 2
174
+ except Exception:
175
+ pass
176
+
177
+ # Adjust based on result quality
178
+ try:
179
+ avg_quality = sum(
180
+ r.get('_quality', {}).get('quality_score', 0.5)
181
+ for r in results
182
+ ) / len(results)
183
+
184
+ # High quality results can be cached longer
185
+ if avg_quality > 0.8:
186
+ base_ttl = int(base_ttl * 1.5)
187
+ except Exception:
188
+ pass
189
+
190
+ return base_ttl
191
+
192
+ async def invalidate(self, query: str, params: Dict[str, Any]):
193
+ """
194
+ Invalidate cached results.
195
+
196
+ Args:
197
+ query: Search query
198
+ params: Search parameters
199
+ """
200
+ if not self.enabled:
201
+ return
202
+
203
+ try:
204
+ cache_key = self._generate_cache_key(query, params)
205
+ redis = await self.redis_client.get_client()
206
+ await redis.delete(cache_key)
207
+ logger.debug(f"Invalidated cache for query: {query}")
208
+ except Exception as e:
209
+ logger.warning(f"Cache invalidate error: {e}")
210
+
211
+ async def clear_all(self):
212
+ """Clear all cached search results"""
213
+ if not self.enabled:
214
+ return
215
+
216
+ try:
217
+ redis = await self.redis_client.get_client()
218
+ # Find all search_tool cache keys
219
+ pattern = f"{self.cache_prefix}*"
220
+ keys = []
221
+ async for key in redis.scan_iter(match=pattern):
222
+ keys.append(key)
223
+
224
+ if keys:
225
+ await redis.delete(*keys)
226
+ logger.info(f"Cleared {len(keys)} cached entries")
227
+ except Exception as e:
228
+ logger.warning(f"Cache clear error: {e}")
229
+
230
+ def _generate_cache_key(self, query: str, params: Dict[str, Any]) -> str:
231
+ """
232
+ Generate unique cache key from query and parameters.
233
+
234
+ Args:
235
+ query: Search query
236
+ params: Search parameters
237
+
238
+ Returns:
239
+ Cache key string
240
+ """
241
+ # Create deterministic string from query and params
242
+ param_str = json.dumps(params, sort_keys=True)
243
+ key_data = f"{query}:{param_str}"
244
+ key_hash = hashlib.sha256(key_data.encode()).hexdigest()[:16]
245
+
246
+ return f"{self.cache_prefix}{key_hash}"
247
+
248
+ async def get_stats(self) -> Dict[str, Any]:
249
+ """
250
+ Get cache statistics.
251
+
252
+ Returns:
253
+ Cache statistics dictionary
254
+ """
255
+ if not self.enabled:
256
+ return {
257
+ 'enabled': False,
258
+ 'total_keys': 0
259
+ }
260
+
261
+ try:
262
+ redis = await self.redis_client.get_client()
263
+ # Count cache keys
264
+ pattern = f"{self.cache_prefix}*"
265
+ key_count = 0
266
+ async for _ in redis.scan_iter(match=pattern):
267
+ key_count += 1
268
+
269
+ return {
270
+ 'enabled': True,
271
+ 'total_keys': key_count,
272
+ 'prefix': self.cache_prefix
273
+ }
274
+ except Exception as e:
275
+ logger.warning(f"Cache stats error: {e}")
276
+ return {
277
+ 'enabled': True,
278
+ 'error': str(e)
279
+ }
280
+
@@ -0,0 +1,127 @@
1
+ """
2
+ Constants, Enums, and Exception Classes for Search Tool
3
+
4
+ This module contains all the shared constants, enumeration types, and
5
+ custom exception classes used across the search tool package.
6
+ """
7
+
8
+ from enum import Enum
9
+
10
+
11
+ # ============================================================================
12
+ # Enums
13
+ # ============================================================================
14
+
15
+ class SearchType(str, Enum):
16
+ """Supported search types"""
17
+ WEB = "web"
18
+ IMAGE = "image"
19
+ NEWS = "news"
20
+ VIDEO = "video"
21
+
22
+
23
+ class SafeSearch(str, Enum):
24
+ """Safe search levels"""
25
+ OFF = "off"
26
+ MEDIUM = "medium"
27
+ HIGH = "high"
28
+
29
+
30
+ class ImageSize(str, Enum):
31
+ """Image size filters"""
32
+ ICON = "icon"
33
+ SMALL = "small"
34
+ MEDIUM = "medium"
35
+ LARGE = "large"
36
+ XLARGE = "xlarge"
37
+ XXLARGE = "xxlarge"
38
+ HUGE = "huge"
39
+
40
+
41
+ class ImageType(str, Enum):
42
+ """Image type filters"""
43
+ CLIPART = "clipart"
44
+ FACE = "face"
45
+ LINEART = "lineart"
46
+ STOCK = "stock"
47
+ PHOTO = "photo"
48
+ ANIMATED = "animated"
49
+
50
+
51
+ class ImageColorType(str, Enum):
52
+ """Image color type filters"""
53
+ COLOR = "color"
54
+ GRAY = "gray"
55
+ MONO = "mono"
56
+ TRANS = "trans"
57
+
58
+
59
+ class CircuitState(str, Enum):
60
+ """Circuit breaker states"""
61
+ CLOSED = "closed"
62
+ OPEN = "open"
63
+ HALF_OPEN = "half_open"
64
+
65
+
66
+ class QueryIntentType(str, Enum):
67
+ """Query intent types"""
68
+ DEFINITION = "definition"
69
+ HOW_TO = "how_to"
70
+ COMPARISON = "comparison"
71
+ FACTUAL = "factual"
72
+ RECENT_NEWS = "recent_news"
73
+ ACADEMIC = "academic"
74
+ PRODUCT = "product"
75
+ GENERAL = "general"
76
+
77
+
78
+ class CredibilityLevel(str, Enum):
79
+ """Result credibility levels"""
80
+ HIGH = "high"
81
+ MEDIUM = "medium"
82
+ LOW = "low"
83
+
84
+
85
+ # ============================================================================
86
+ # Exception Hierarchy
87
+ # ============================================================================
88
+
89
+ class SearchToolError(Exception):
90
+ """Base exception for SearchTool errors"""
91
+ pass
92
+
93
+
94
+ class AuthenticationError(SearchToolError):
95
+ """Authentication-related errors"""
96
+ pass
97
+
98
+
99
+ class QuotaExceededError(SearchToolError):
100
+ """API quota exceeded"""
101
+ pass
102
+
103
+
104
+ class RateLimitError(SearchToolError):
105
+ """Rate limit exceeded"""
106
+ pass
107
+
108
+
109
+ class CircuitBreakerOpenError(SearchToolError):
110
+ """Circuit breaker is open"""
111
+ pass
112
+
113
+
114
+ class SearchAPIError(SearchToolError):
115
+ """Search API errors"""
116
+ pass
117
+
118
+
119
+ class ValidationError(SearchToolError):
120
+ """Input validation errors"""
121
+ pass
122
+
123
+
124
+ class CacheError(SearchToolError):
125
+ """Cache-related errors"""
126
+ pass
127
+
@@ -0,0 +1,219 @@
1
+ """
2
+ Search Context Management
3
+
4
+ This module tracks search history, learns user preferences, and provides
5
+ contextual suggestions for better search results.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional, Set
10
+
11
+
12
+ class SearchContext:
13
+ """Manages search history and context for improved results"""
14
+
15
+ def __init__(self, max_history: int = 10):
16
+ """
17
+ Initialize search context.
18
+
19
+ Args:
20
+ max_history: Maximum number of searches to keep in history
21
+ """
22
+ self.search_history: List[Dict[str, Any]] = []
23
+ self.max_history = max_history
24
+ self.topic_context: Optional[List[str]] = None
25
+ self.user_preferences = {
26
+ 'preferred_domains': set(),
27
+ 'avoided_domains': set(),
28
+ 'preferred_content_types': [],
29
+ 'language': 'en'
30
+ }
31
+
32
+ def add_search(
33
+ self,
34
+ query: str,
35
+ results: List[Dict[str, Any]],
36
+ user_feedback: Optional[Dict[str, Any]] = None
37
+ ):
38
+ """
39
+ Add search to history and update context.
40
+
41
+ Args:
42
+ query: Search query
43
+ results: Search results
44
+ user_feedback: Optional user feedback for learning
45
+ """
46
+ search_record = {
47
+ 'timestamp': datetime.utcnow().isoformat(),
48
+ 'query': query,
49
+ 'result_count': len(results),
50
+ 'clicked_results': [],
51
+ 'feedback': user_feedback
52
+ }
53
+
54
+ self.search_history.append(search_record)
55
+
56
+ # Maintain history size limit
57
+ if len(self.search_history) > self.max_history:
58
+ self.search_history.pop(0)
59
+
60
+ # Update topic context
61
+ self._update_topic_context(query, results)
62
+
63
+ # Learn from feedback if provided
64
+ if user_feedback:
65
+ self._learn_preferences(results, user_feedback)
66
+
67
+ def get_contextual_suggestions(self, current_query: str) -> Dict[str, Any]:
68
+ """
69
+ Generate context-aware suggestions for the current query.
70
+
71
+ Args:
72
+ current_query: Current search query
73
+
74
+ Returns:
75
+ Suggestions dictionary with related queries and parameters
76
+ """
77
+ suggestions = {
78
+ 'related_queries': [],
79
+ 'refinement_suggestions': [],
80
+ 'context_aware_params': {}
81
+ }
82
+
83
+ if not self.search_history:
84
+ return suggestions
85
+
86
+ # Find related historical queries
87
+ for record in reversed(self.search_history[-5:]):
88
+ prev_query = record['query']
89
+ similarity = self._calculate_query_similarity(current_query, prev_query)
90
+
91
+ if similarity > 0.5:
92
+ suggestions['related_queries'].append({
93
+ 'query': prev_query,
94
+ 'similarity': similarity,
95
+ 'timestamp': record['timestamp']
96
+ })
97
+
98
+ # Suggest preferred sites if available
99
+ if self.user_preferences['preferred_domains']:
100
+ suggestions['context_aware_params']['preferred_sites'] = list(
101
+ self.user_preferences['preferred_domains']
102
+ )
103
+
104
+ return suggestions
105
+
106
+ def get_history(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
107
+ """
108
+ Get search history.
109
+
110
+ Args:
111
+ limit: Maximum number of records to return
112
+
113
+ Returns:
114
+ List of search history records
115
+ """
116
+ if limit:
117
+ return self.search_history[-limit:]
118
+ return self.search_history.copy()
119
+
120
+ def clear_history(self):
121
+ """Clear search history"""
122
+ self.search_history.clear()
123
+ self.topic_context = None
124
+
125
+ def get_preferences(self) -> Dict[str, Any]:
126
+ """
127
+ Get current user preferences.
128
+
129
+ Returns:
130
+ User preferences dictionary
131
+ """
132
+ return {
133
+ 'preferred_domains': list(self.user_preferences['preferred_domains']),
134
+ 'avoided_domains': list(self.user_preferences['avoided_domains']),
135
+ 'preferred_content_types': self.user_preferences['preferred_content_types'].copy(),
136
+ 'language': self.user_preferences['language']
137
+ }
138
+
139
+ def set_preference(self, key: str, value: Any):
140
+ """
141
+ Set a user preference.
142
+
143
+ Args:
144
+ key: Preference key
145
+ value: Preference value
146
+ """
147
+ if key in self.user_preferences:
148
+ if isinstance(self.user_preferences[key], set):
149
+ if isinstance(value, (list, set)):
150
+ self.user_preferences[key] = set(value)
151
+ else:
152
+ self.user_preferences[key].add(value)
153
+ else:
154
+ self.user_preferences[key] = value
155
+
156
+ def _update_topic_context(self, query: str, results: List[Dict[str, Any]]):
157
+ """
158
+ Update topic context from query and results.
159
+
160
+ Args:
161
+ query: Search query
162
+ results: Search results
163
+ """
164
+ # Simple implementation: extract common words
165
+ words = query.lower().split()
166
+ self.topic_context = words
167
+
168
+ def _learn_preferences(
169
+ self,
170
+ results: List[Dict[str, Any]],
171
+ feedback: Dict[str, Any]
172
+ ):
173
+ """
174
+ Learn user preferences from feedback.
175
+
176
+ Args:
177
+ results: Search results
178
+ feedback: User feedback
179
+ """
180
+ # Learn from clicked/used results
181
+ if 'clicked_indices' in feedback:
182
+ for idx in feedback['clicked_indices']:
183
+ if 0 <= idx < len(results):
184
+ result = results[idx]
185
+ domain = result.get('displayLink', '')
186
+ if domain:
187
+ self.user_preferences['preferred_domains'].add(domain)
188
+
189
+ # Learn from disliked results
190
+ if 'disliked_indices' in feedback:
191
+ for idx in feedback['disliked_indices']:
192
+ if 0 <= idx < len(results):
193
+ result = results[idx]
194
+ domain = result.get('displayLink', '')
195
+ if domain:
196
+ self.user_preferences['avoided_domains'].add(domain)
197
+
198
+ def _calculate_query_similarity(self, query1: str, query2: str) -> float:
199
+ """
200
+ Calculate similarity between two queries using Jaccard index.
201
+
202
+ Args:
203
+ query1: First query
204
+ query2: Second query
205
+
206
+ Returns:
207
+ Similarity score (0-1)
208
+ """
209
+ words1 = set(query1.lower().split())
210
+ words2 = set(query2.lower().split())
211
+
212
+ if not words1 or not words2:
213
+ return 0.0
214
+
215
+ intersection = words1 & words2
216
+ union = words1 | words2
217
+
218
+ return len(intersection) / len(union) if union else 0.0
219
+