aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,375 @@
1
+ """
2
+ Enhanced Metrics Collection
3
+
4
+ This module provides comprehensive performance and quality metrics tracking
5
+ for the search tool.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional
10
+
11
+
12
+ class EnhancedMetrics:
13
+ """Comprehensive metrics collection for search operations"""
14
+
15
+ def __init__(self):
16
+ """Initialize enhanced metrics"""
17
+ self.metrics = {
18
+ # Basic counters
19
+ 'requests': {
20
+ 'total': 0,
21
+ 'successful': 0,
22
+ 'failed': 0,
23
+ 'cached': 0
24
+ },
25
+
26
+ # Performance metrics
27
+ 'performance': {
28
+ 'response_times_ms': [], # Recent 100
29
+ 'avg_response_time_ms': 0,
30
+ 'p50_response_time_ms': 0,
31
+ 'p95_response_time_ms': 0,
32
+ 'p99_response_time_ms': 0,
33
+ 'slowest_query': None,
34
+ 'fastest_query': None
35
+ },
36
+
37
+ # Quality metrics
38
+ 'quality': {
39
+ 'avg_results_per_query': 0,
40
+ 'avg_quality_score': 0,
41
+ 'high_quality_results_pct': 0,
42
+ 'queries_with_no_results': 0
43
+ },
44
+
45
+ # Error analysis
46
+ 'errors': {
47
+ 'by_type': {},
48
+ 'recent_errors': [], # Recent 10
49
+ 'error_rate': 0.0
50
+ },
51
+
52
+ # Cache efficiency
53
+ 'cache': {
54
+ 'hit_rate': 0.0,
55
+ 'total_hits': 0,
56
+ 'total_misses': 0,
57
+ 'avg_age_seconds': 0
58
+ },
59
+
60
+ # Rate limiting
61
+ 'rate_limiting': {
62
+ 'throttled_requests': 0,
63
+ 'avg_wait_time_ms': 0,
64
+ 'quota_utilization_pct': 0
65
+ },
66
+
67
+ # Query patterns
68
+ 'patterns': {
69
+ 'top_query_types': {},
70
+ 'top_domains_returned': {},
71
+ 'avg_query_length': 0
72
+ }
73
+ }
74
+
75
+ def record_search(
76
+ self,
77
+ query: str,
78
+ search_type: str,
79
+ results: List[Dict[str, Any]],
80
+ response_time_ms: float,
81
+ cached: bool = False,
82
+ error: Optional[Exception] = None
83
+ ):
84
+ """
85
+ Record a search operation.
86
+
87
+ Args:
88
+ query: Search query
89
+ search_type: Type of search performed
90
+ results: Search results
91
+ response_time_ms: Response time in milliseconds
92
+ cached: Whether result was from cache
93
+ error: Error if search failed
94
+ """
95
+ # Update request counts
96
+ self.metrics['requests']['total'] += 1
97
+
98
+ if error:
99
+ self.metrics['requests']['failed'] += 1
100
+ self._record_error(error)
101
+ else:
102
+ self.metrics['requests']['successful'] += 1
103
+
104
+ if cached:
105
+ self.metrics['requests']['cached'] += 1
106
+ self.metrics['cache']['total_hits'] += 1
107
+ else:
108
+ self.metrics['cache']['total_misses'] += 1
109
+
110
+ # Update performance metrics
111
+ self.metrics['performance']['response_times_ms'].append(response_time_ms)
112
+ if len(self.metrics['performance']['response_times_ms']) > 100:
113
+ self.metrics['performance']['response_times_ms'].pop(0)
114
+
115
+ self._update_percentiles()
116
+
117
+ # Track slowest/fastest queries
118
+ if (not self.metrics['performance']['slowest_query'] or
119
+ response_time_ms > self.metrics['performance']['slowest_query']['time']):
120
+ self.metrics['performance']['slowest_query'] = {
121
+ 'query': query,
122
+ 'time': response_time_ms,
123
+ 'type': search_type
124
+ }
125
+
126
+ if (not self.metrics['performance']['fastest_query'] or
127
+ response_time_ms < self.metrics['performance']['fastest_query']['time']):
128
+ self.metrics['performance']['fastest_query'] = {
129
+ 'query': query,
130
+ 'time': response_time_ms,
131
+ 'type': search_type
132
+ }
133
+
134
+ # Update quality metrics
135
+ if results:
136
+ result_count = len(results)
137
+
138
+ # Calculate average quality
139
+ avg_quality = sum(
140
+ r.get('_quality', {}).get('quality_score', 0.5)
141
+ for r in results
142
+ ) / result_count
143
+
144
+ # Count high quality results
145
+ high_quality_count = sum(
146
+ 1 for r in results
147
+ if r.get('_quality', {}).get('quality_score', 0) > 0.75
148
+ )
149
+
150
+ # Update running averages
151
+ total = self.metrics['requests']['successful']
152
+
153
+ current_avg_results = self.metrics['quality']['avg_results_per_query']
154
+ self.metrics['quality']['avg_results_per_query'] = (
155
+ (current_avg_results * (total - 1) + result_count) / total
156
+ )
157
+
158
+ current_avg_quality = self.metrics['quality']['avg_quality_score']
159
+ self.metrics['quality']['avg_quality_score'] = (
160
+ (current_avg_quality * (total - 1) + avg_quality) / total
161
+ )
162
+
163
+ current_high_pct = self.metrics['quality']['high_quality_results_pct']
164
+ high_pct = high_quality_count / result_count
165
+ self.metrics['quality']['high_quality_results_pct'] = (
166
+ (current_high_pct * (total - 1) + high_pct) / total
167
+ )
168
+ else:
169
+ self.metrics['quality']['queries_with_no_results'] += 1
170
+
171
+ # Update query patterns
172
+ query_type = self._detect_query_type(query)
173
+ self.metrics['patterns']['top_query_types'][query_type] = (
174
+ self.metrics['patterns']['top_query_types'].get(query_type, 0) + 1
175
+ )
176
+
177
+ # Track returned domains
178
+ for result in results:
179
+ domain = result.get('displayLink', 'unknown')
180
+ self.metrics['patterns']['top_domains_returned'][domain] = (
181
+ self.metrics['patterns']['top_domains_returned'].get(domain, 0) + 1
182
+ )
183
+
184
+ # Update average query length
185
+ total = self.metrics['requests']['total']
186
+ current_avg_len = self.metrics['patterns']['avg_query_length']
187
+ self.metrics['patterns']['avg_query_length'] = (
188
+ (current_avg_len * (total - 1) + len(query.split())) / total
189
+ )
190
+
191
+ # Update cache hit rate
192
+ total_cache_requests = (
193
+ self.metrics['cache']['total_hits'] +
194
+ self.metrics['cache']['total_misses']
195
+ )
196
+ if total_cache_requests > 0:
197
+ self.metrics['cache']['hit_rate'] = (
198
+ self.metrics['cache']['total_hits'] / total_cache_requests
199
+ )
200
+
201
+ def _update_percentiles(self):
202
+ """Update response time percentiles"""
203
+ times = sorted(self.metrics['performance']['response_times_ms'])
204
+ if not times:
205
+ return
206
+
207
+ self.metrics['performance']['avg_response_time_ms'] = sum(times) / len(times)
208
+ self.metrics['performance']['p50_response_time_ms'] = times[len(times) // 2]
209
+ self.metrics['performance']['p95_response_time_ms'] = times[int(len(times) * 0.95)]
210
+ self.metrics['performance']['p99_response_time_ms'] = times[int(len(times) * 0.99)]
211
+
212
+ def _record_error(self, error: Exception):
213
+ """Record an error"""
214
+ error_type = type(error).__name__
215
+
216
+ self.metrics['errors']['by_type'][error_type] = (
217
+ self.metrics['errors']['by_type'].get(error_type, 0) + 1
218
+ )
219
+
220
+ self.metrics['errors']['recent_errors'].append({
221
+ 'type': error_type,
222
+ 'message': str(error),
223
+ 'timestamp': datetime.utcnow().isoformat()
224
+ })
225
+
226
+ if len(self.metrics['errors']['recent_errors']) > 10:
227
+ self.metrics['errors']['recent_errors'].pop(0)
228
+
229
+ # Update error rate
230
+ total = self.metrics['requests']['total']
231
+ failed = self.metrics['requests']['failed']
232
+ self.metrics['errors']['error_rate'] = failed / total if total > 0 else 0
233
+
234
+ def _detect_query_type(self, query: str) -> str:
235
+ """Detect query type from query text"""
236
+ query_lower = query.lower()
237
+
238
+ if any(kw in query_lower for kw in ['how to', 'tutorial', 'guide']):
239
+ return 'how_to'
240
+ elif any(kw in query_lower for kw in ['what is', 'define', 'meaning']):
241
+ return 'definition'
242
+ elif any(kw in query_lower for kw in ['vs', 'versus', 'compare']):
243
+ return 'comparison'
244
+ elif any(kw in query_lower for kw in ['latest', 'news', 'recent']):
245
+ return 'news'
246
+ else:
247
+ return 'general'
248
+
249
+ def get_health_score(self) -> float:
250
+ """
251
+ Calculate system health score (0-1).
252
+
253
+ Returns:
254
+ Health score based on success rate, performance, quality, and cache efficiency
255
+ """
256
+ total = self.metrics['requests']['total']
257
+ if total == 0:
258
+ return 1.0
259
+
260
+ # Success rate score (40%)
261
+ success_rate = self.metrics['requests']['successful'] / total
262
+ success_score = success_rate * 0.4
263
+
264
+ # Performance score (25%)
265
+ avg_time = self.metrics['performance']['avg_response_time_ms']
266
+ # < 500ms excellent, > 3000ms poor
267
+ performance_score = max(0, min(1, (3000 - avg_time) / 2500)) * 0.25
268
+
269
+ # Quality score (25%)
270
+ quality_score = self.metrics['quality']['avg_quality_score'] * 0.25
271
+
272
+ # Cache efficiency score (10%)
273
+ cache_score = self.metrics['cache']['hit_rate'] * 0.1
274
+
275
+ return success_score + performance_score + quality_score + cache_score
276
+
277
+ def generate_report(self) -> str:
278
+ """
279
+ Generate human-readable metrics report.
280
+
281
+ Returns:
282
+ Formatted report string
283
+ """
284
+ health = self.get_health_score()
285
+ total = self.metrics['requests']['total']
286
+
287
+ if total == 0:
288
+ return "No search operations recorded yet."
289
+
290
+ health_indicator = '✅' if health > 0.8 else '⚠️' if health > 0.6 else '❌'
291
+
292
+ # Format top error types
293
+ top_errors = sorted(
294
+ self.metrics['errors']['by_type'].items(),
295
+ key=lambda x: x[1],
296
+ reverse=True
297
+ )[:3]
298
+ error_str = ', '.join(f"{k}({v})" for k, v in top_errors) if top_errors else 'None'
299
+
300
+ # Format top query types
301
+ top_types = sorted(
302
+ self.metrics['patterns']['top_query_types'].items(),
303
+ key=lambda x: x[1],
304
+ reverse=True
305
+ )[:3]
306
+ types_str = ', '.join(f"{k}({v})" for k, v in top_types)
307
+
308
+ # Format top domains
309
+ top_domains = sorted(
310
+ self.metrics['patterns']['top_domains_returned'].items(),
311
+ key=lambda x: x[1],
312
+ reverse=True
313
+ )[:5]
314
+ domains_str = ', '.join(f"{k}({v})" for k, v in top_domains)
315
+
316
+ # Extract slowest query info
317
+ slowest_query = self.metrics['performance']['slowest_query']
318
+ slowest_query_str = 'N/A'
319
+ slowest_time_str = '0ms'
320
+ if slowest_query:
321
+ slowest_query_str = slowest_query['query']
322
+ slowest_time_str = f"{slowest_query['time']:.0f}ms"
323
+
324
+ report = f"""
325
+ Search Tool Performance Report
326
+ {'='*50}
327
+
328
+ Overall Health Score: {health:.2%} {health_indicator}
329
+
330
+ Requests:
331
+ Total: {total}
332
+ Successful: {self.metrics['requests']['successful']} ({self.metrics['requests']['successful']/total:.1%})
333
+ Failed: {self.metrics['requests']['failed']}
334
+ Cached: {self.metrics['requests']['cached']}
335
+
336
+ Performance:
337
+ Avg Response: {self.metrics['performance']['avg_response_time_ms']:.0f}ms
338
+ P95 Response: {self.metrics['performance']['p95_response_time_ms']:.0f}ms
339
+ Slowest: {slowest_query_str} ({slowest_time_str})
340
+
341
+ Quality:
342
+ Avg Results/Query: {self.metrics['quality']['avg_results_per_query']:.1f}
343
+ Avg Quality Score: {self.metrics['quality']['avg_quality_score']:.2f}
344
+ High Quality %: {self.metrics['quality']['high_quality_results_pct']:.1%}
345
+ No Results: {self.metrics['quality']['queries_with_no_results']}
346
+
347
+ Cache:
348
+ Hit Rate: {self.metrics['cache']['hit_rate']:.1%}
349
+ Hits: {self.metrics['cache']['total_hits']}
350
+ Misses: {self.metrics['cache']['total_misses']}
351
+
352
+ Errors:
353
+ Error Rate: {self.metrics['errors']['error_rate']:.1%}
354
+ Top Types: {error_str}
355
+
356
+ Query Patterns:
357
+ Top Types: {types_str}
358
+ Avg Query Length: {self.metrics['patterns']['avg_query_length']:.1f} words
359
+ Top Domains: {domains_str}
360
+ """
361
+ return report
362
+
363
+ def get_metrics(self) -> Dict[str, Any]:
364
+ """
365
+ Get all metrics.
366
+
367
+ Returns:
368
+ Complete metrics dictionary
369
+ """
370
+ return self.metrics.copy()
371
+
372
+ def reset(self):
373
+ """Reset all metrics"""
374
+ self.__init__()
375
+
@@ -0,0 +1,177 @@
1
+ """
2
+ Rate Limiting and Circuit Breaker Components
3
+
4
+ This module implements rate limiting using token bucket algorithm and
5
+ circuit breaker pattern for API resilience.
6
+ """
7
+
8
+ import time
9
+ from collections import deque
10
+ from threading import Lock
11
+ from typing import Optional
12
+
13
+ from .constants import CircuitState, RateLimitError, CircuitBreakerOpenError
14
+
15
+
16
+ # ============================================================================
17
+ # Rate Limiter
18
+ # ============================================================================
19
+
20
+ class RateLimiter:
21
+ """
22
+ Token bucket rate limiter for API requests.
23
+
24
+ Implements a token bucket algorithm to limit the rate of API requests
25
+ and prevent quota exhaustion.
26
+ """
27
+
28
+ def __init__(self, max_requests: int, time_window: int):
29
+ """
30
+ Initialize rate limiter.
31
+
32
+ Args:
33
+ max_requests: Maximum number of requests allowed
34
+ time_window: Time window in seconds
35
+ """
36
+ self.max_requests = max_requests
37
+ self.time_window = time_window
38
+ self.tokens = max_requests
39
+ self.last_update = time.time()
40
+ self.lock = Lock()
41
+ self.request_history: deque = deque()
42
+
43
+ def _refill_tokens(self):
44
+ """Refill tokens based on elapsed time"""
45
+ now = time.time()
46
+ time_passed = now - self.last_update
47
+
48
+ # Refill tokens proportionally to time passed
49
+ refill_rate = self.max_requests / self.time_window
50
+ tokens_to_add = time_passed * refill_rate
51
+
52
+ self.tokens = min(self.max_requests, self.tokens + tokens_to_add)
53
+ self.last_update = now
54
+
55
+ def acquire(self, tokens: int = 1) -> bool:
56
+ """
57
+ Attempt to acquire tokens.
58
+
59
+ Args:
60
+ tokens: Number of tokens to acquire
61
+
62
+ Returns:
63
+ True if tokens acquired, False otherwise
64
+
65
+ Raises:
66
+ RateLimitError: If rate limit is exceeded
67
+ """
68
+ with self.lock:
69
+ self._refill_tokens()
70
+
71
+ # Clean up old request history
72
+ cutoff_time = time.time() - self.time_window
73
+ while self.request_history and self.request_history[0] < cutoff_time:
74
+ self.request_history.popleft()
75
+
76
+ # Check if we have enough tokens
77
+ if self.tokens >= tokens:
78
+ self.tokens -= tokens
79
+ self.request_history.append(time.time())
80
+ return True
81
+ else:
82
+ # Calculate wait time
83
+ wait_time = (tokens - self.tokens) / (self.max_requests / self.time_window)
84
+ raise RateLimitError(
85
+ f"Rate limit exceeded. {len(self.request_history)} requests in last "
86
+ f"{self.time_window}s. Wait {wait_time:.1f}s before retrying."
87
+ )
88
+
89
+ def get_remaining_quota(self) -> int:
90
+ """Get remaining quota"""
91
+ with self.lock:
92
+ self._refill_tokens()
93
+ return int(self.tokens)
94
+
95
+
96
+ # ============================================================================
97
+ # Circuit Breaker
98
+ # ============================================================================
99
+
100
+ class CircuitBreaker:
101
+ """
102
+ Circuit breaker pattern implementation for API resilience.
103
+
104
+ Implements a circuit breaker to prevent cascading failures when
105
+ the API is experiencing issues.
106
+ """
107
+
108
+ def __init__(self, failure_threshold: int, timeout: int):
109
+ """
110
+ Initialize circuit breaker.
111
+
112
+ Args:
113
+ failure_threshold: Number of failures before opening circuit
114
+ timeout: Timeout in seconds before trying half-open state
115
+ """
116
+ self.failure_threshold = failure_threshold
117
+ self.timeout = timeout
118
+ self.failure_count = 0
119
+ self.last_failure_time: Optional[float] = None
120
+ self.state = CircuitState.CLOSED
121
+ self.lock = Lock()
122
+
123
+ def call(self, func, *args, **kwargs):
124
+ """
125
+ Execute function with circuit breaker protection.
126
+
127
+ Args:
128
+ func: Function to execute
129
+ *args: Positional arguments
130
+ **kwargs: Keyword arguments
131
+
132
+ Returns:
133
+ Function result
134
+
135
+ Raises:
136
+ CircuitBreakerOpenError: If circuit is open
137
+ """
138
+ with self.lock:
139
+ if self.state == CircuitState.OPEN:
140
+ # Check if timeout has passed
141
+ if time.time() - self.last_failure_time >= self.timeout:
142
+ self.state = CircuitState.HALF_OPEN
143
+ self.failure_count = 0
144
+ else:
145
+ raise CircuitBreakerOpenError(
146
+ f"Circuit breaker is OPEN. Retry after "
147
+ f"{self.timeout - (time.time() - self.last_failure_time):.1f}s"
148
+ )
149
+
150
+ try:
151
+ result = func(*args, **kwargs)
152
+ self._on_success()
153
+ return result
154
+ except Exception as e:
155
+ self._on_failure()
156
+ raise e
157
+
158
+ def _on_success(self):
159
+ """Handle successful call"""
160
+ with self.lock:
161
+ if self.state == CircuitState.HALF_OPEN:
162
+ self.state = CircuitState.CLOSED
163
+ self.failure_count = 0
164
+
165
+ def _on_failure(self):
166
+ """Handle failed call"""
167
+ with self.lock:
168
+ self.failure_count += 1
169
+ self.last_failure_time = time.time()
170
+
171
+ if self.failure_count >= self.failure_threshold:
172
+ self.state = CircuitState.OPEN
173
+
174
+ def get_state(self) -> str:
175
+ """Get current circuit state"""
176
+ return self.state.value
177
+