aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/config/config.py +2 -1
- aiecs/llm/clients/vertex_client.py +5 -0
- aiecs/main.py +2 -2
- aiecs/scripts/tools_develop/README.md +111 -2
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
- aiecs/scripts/tools_develop/verify_tools.py +347 -0
- aiecs/tools/__init__.py +94 -30
- aiecs/tools/apisource/__init__.py +106 -0
- aiecs/tools/apisource/intelligence/__init__.py +20 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
- aiecs/tools/apisource/monitoring/__init__.py +12 -0
- aiecs/tools/apisource/monitoring/metrics.py +308 -0
- aiecs/tools/apisource/providers/__init__.py +114 -0
- aiecs/tools/apisource/providers/base.py +684 -0
- aiecs/tools/apisource/providers/census.py +412 -0
- aiecs/tools/apisource/providers/fred.py +575 -0
- aiecs/tools/apisource/providers/newsapi.py +402 -0
- aiecs/tools/apisource/providers/worldbank.py +346 -0
- aiecs/tools/apisource/reliability/__init__.py +14 -0
- aiecs/tools/apisource/reliability/error_handler.py +362 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
- aiecs/tools/apisource/tool.py +814 -0
- aiecs/tools/apisource/utils/__init__.py +12 -0
- aiecs/tools/apisource/utils/validators.py +343 -0
- aiecs/tools/langchain_adapter.py +95 -17
- aiecs/tools/search_tool/__init__.py +102 -0
- aiecs/tools/search_tool/analyzers.py +583 -0
- aiecs/tools/search_tool/cache.py +280 -0
- aiecs/tools/search_tool/constants.py +127 -0
- aiecs/tools/search_tool/context.py +219 -0
- aiecs/tools/search_tool/core.py +773 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +257 -0
- aiecs/tools/search_tool/metrics.py +375 -0
- aiecs/tools/search_tool/rate_limiter.py +177 -0
- aiecs/tools/search_tool/schemas.py +297 -0
- aiecs/tools/statistics/data_loader_tool.py +2 -2
- aiecs/tools/statistics/data_transformer_tool.py +1 -1
- aiecs/tools/task_tools/__init__.py +8 -8
- aiecs/tools/task_tools/report_tool.py +1 -1
- aiecs/tools/tool_executor/__init__.py +2 -0
- aiecs/tools/tool_executor/tool_executor.py +284 -14
- aiecs/utils/__init__.py +11 -0
- aiecs/utils/cache_provider.py +698 -0
- aiecs/utils/execution_utils.py +5 -5
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
- aiecs/tools/task_tools/search_tool.py +0 -1123
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Search Result Enhancement and Ranking
|
|
3
|
+
|
|
4
|
+
Intelligently scores and filters search results:
|
|
5
|
+
- Calculate relevance scores using keyword matching
|
|
6
|
+
- Compute popularity scores
|
|
7
|
+
- Calculate recency/freshness scores
|
|
8
|
+
- Apply composite scoring with configurable weights
|
|
9
|
+
- Filter by quality, relevance, and date ranges
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SearchEnhancer:
|
|
20
|
+
"""
|
|
21
|
+
Enhances search results with relevance scoring and intelligent filtering.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
relevance_weight: float = 0.5,
|
|
27
|
+
popularity_weight: float = 0.3,
|
|
28
|
+
recency_weight: float = 0.2
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize search enhancer.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
relevance_weight: Weight for relevance score in composite score
|
|
35
|
+
popularity_weight: Weight for popularity score in composite score
|
|
36
|
+
recency_weight: Weight for recency score in composite score
|
|
37
|
+
"""
|
|
38
|
+
self.relevance_weight = relevance_weight
|
|
39
|
+
self.popularity_weight = popularity_weight
|
|
40
|
+
self.recency_weight = recency_weight
|
|
41
|
+
|
|
42
|
+
# Normalize weights
|
|
43
|
+
total_weight = relevance_weight + popularity_weight + recency_weight
|
|
44
|
+
self.relevance_weight /= total_weight
|
|
45
|
+
self.popularity_weight /= total_weight
|
|
46
|
+
self.recency_weight /= total_weight
|
|
47
|
+
|
|
48
|
+
def enhance_search_results(
|
|
49
|
+
self,
|
|
50
|
+
query: str,
|
|
51
|
+
results: List[Dict[str, Any]],
|
|
52
|
+
options: Optional[Dict[str, Any]] = None
|
|
53
|
+
) -> List[Dict[str, Any]]:
|
|
54
|
+
"""
|
|
55
|
+
Enhance search results with scoring and filtering.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query: Original search query
|
|
59
|
+
results: Raw search results
|
|
60
|
+
options: Enhancement options:
|
|
61
|
+
- relevance_threshold: Minimum composite score (0-1)
|
|
62
|
+
- sort_by: Sort method ('relevance', 'popularity', 'recency', 'composite')
|
|
63
|
+
- date_range: {'start': 'YYYY-MM-DD', 'end': 'YYYY-MM-DD'}
|
|
64
|
+
- min_quality_score: Minimum quality score (0-1)
|
|
65
|
+
- max_results: Maximum number of results to return
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Enhanced and filtered results
|
|
69
|
+
"""
|
|
70
|
+
if not results:
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
options = options or {}
|
|
74
|
+
enhanced = []
|
|
75
|
+
|
|
76
|
+
for result in results:
|
|
77
|
+
# Calculate scores
|
|
78
|
+
relevance = self._calculate_relevance(query, result)
|
|
79
|
+
popularity = self._get_popularity_score(result)
|
|
80
|
+
recency = self._calculate_recency(result)
|
|
81
|
+
|
|
82
|
+
# Calculate composite score
|
|
83
|
+
composite_score = (
|
|
84
|
+
relevance * self.relevance_weight +
|
|
85
|
+
popularity * self.popularity_weight +
|
|
86
|
+
recency * self.recency_weight
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Add search metadata
|
|
90
|
+
result_copy = result.copy()
|
|
91
|
+
result_copy['_search_metadata'] = {
|
|
92
|
+
'relevance_score': round(relevance, 3),
|
|
93
|
+
'popularity_score': round(popularity, 3),
|
|
94
|
+
'recency_score': round(recency, 3),
|
|
95
|
+
'composite_score': round(composite_score, 3),
|
|
96
|
+
'match_type': self._get_match_type(query, result)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Apply filters
|
|
100
|
+
if self._passes_filters(result_copy, options):
|
|
101
|
+
enhanced.append(result_copy)
|
|
102
|
+
|
|
103
|
+
# Sort results
|
|
104
|
+
enhanced = self._sort_results(enhanced, options.get('sort_by', 'composite'))
|
|
105
|
+
|
|
106
|
+
# Apply max results limit
|
|
107
|
+
max_results = options.get('max_results')
|
|
108
|
+
if max_results and max_results > 0:
|
|
109
|
+
enhanced = enhanced[:max_results]
|
|
110
|
+
|
|
111
|
+
return enhanced
|
|
112
|
+
|
|
113
|
+
def _calculate_relevance(self, query: str, result: Dict[str, Any]) -> float:
|
|
114
|
+
"""
|
|
115
|
+
Calculate relevance score using keyword matching.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
query: Search query
|
|
119
|
+
result: Result item
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Relevance score (0-1)
|
|
123
|
+
"""
|
|
124
|
+
query_terms = set(query.lower().split())
|
|
125
|
+
if not query_terms:
|
|
126
|
+
return 0.0
|
|
127
|
+
|
|
128
|
+
# Extract searchable text from result
|
|
129
|
+
searchable_fields = ['title', 'name', 'description', 'notes', 'sourceNote']
|
|
130
|
+
|
|
131
|
+
title_text = ""
|
|
132
|
+
description_text = ""
|
|
133
|
+
|
|
134
|
+
for field in ['title', 'name']:
|
|
135
|
+
if field in result:
|
|
136
|
+
title_text += " " + str(result[field]).lower()
|
|
137
|
+
|
|
138
|
+
for field in ['description', 'notes', 'sourceNote']:
|
|
139
|
+
if field in result:
|
|
140
|
+
description_text += " " + str(result[field]).lower()
|
|
141
|
+
|
|
142
|
+
# Count matches in title (weighted higher)
|
|
143
|
+
title_matches = sum(1 for term in query_terms if term in title_text)
|
|
144
|
+
title_score = min(title_matches / len(query_terms), 1.0)
|
|
145
|
+
|
|
146
|
+
# Count matches in description
|
|
147
|
+
desc_matches = sum(1 for term in query_terms if term in description_text)
|
|
148
|
+
desc_score = min(desc_matches / len(query_terms), 1.0)
|
|
149
|
+
|
|
150
|
+
# Weight title matches more heavily
|
|
151
|
+
relevance = title_score * 0.7 + desc_score * 0.3
|
|
152
|
+
|
|
153
|
+
# Boost for exact phrase match
|
|
154
|
+
query_lower = query.lower()
|
|
155
|
+
if query_lower in title_text:
|
|
156
|
+
relevance = min(relevance * 1.5, 1.0)
|
|
157
|
+
|
|
158
|
+
return relevance
|
|
159
|
+
|
|
160
|
+
def _get_popularity_score(self, result: Dict[str, Any]) -> float:
|
|
161
|
+
"""
|
|
162
|
+
Calculate popularity score based on usage indicators.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
result: Result item
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Popularity score (0-1)
|
|
169
|
+
"""
|
|
170
|
+
# Look for popularity indicators
|
|
171
|
+
popularity_fields = ['popularity', 'usage_count', 'frequency', 'popularity_rank']
|
|
172
|
+
|
|
173
|
+
for field in popularity_fields:
|
|
174
|
+
if field in result:
|
|
175
|
+
value = result[field]
|
|
176
|
+
if isinstance(value, (int, float)):
|
|
177
|
+
# Normalize to 0-1 range (assumes max popularity of 100)
|
|
178
|
+
return min(value / 100, 1.0)
|
|
179
|
+
|
|
180
|
+
# Check for "popular" or "commonly used" in metadata
|
|
181
|
+
if result.get('frequency') in ['Daily', 'Weekly', 'Monthly']:
|
|
182
|
+
# More frequent updates = more popular
|
|
183
|
+
frequency_scores = {'Daily': 1.0, 'Weekly': 0.8, 'Monthly': 0.6}
|
|
184
|
+
return frequency_scores.get(result.get('frequency'), 0.5)
|
|
185
|
+
|
|
186
|
+
# Default: medium popularity
|
|
187
|
+
return 0.5
|
|
188
|
+
|
|
189
|
+
def _calculate_recency(self, result: Dict[str, Any]) -> float:
|
|
190
|
+
"""
|
|
191
|
+
Calculate recency/freshness score.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
result: Result item
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Recency score (0-1)
|
|
198
|
+
"""
|
|
199
|
+
# Look for date fields
|
|
200
|
+
date_fields = [
|
|
201
|
+
'updated', 'last_updated', 'observation_end',
|
|
202
|
+
'date', 'publishedAt', 'last_modified'
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
latest_date = None
|
|
206
|
+
|
|
207
|
+
for field in date_fields:
|
|
208
|
+
if field in result:
|
|
209
|
+
date_str = result[field]
|
|
210
|
+
try:
|
|
211
|
+
# Parse date
|
|
212
|
+
if 'T' in str(date_str):
|
|
213
|
+
# ISO format
|
|
214
|
+
date_obj = datetime.fromisoformat(str(date_str).replace('Z', '+00:00'))
|
|
215
|
+
else:
|
|
216
|
+
# Simple date format
|
|
217
|
+
date_obj = datetime.strptime(str(date_str)[:10], '%Y-%m-%d')
|
|
218
|
+
|
|
219
|
+
if latest_date is None or date_obj > latest_date:
|
|
220
|
+
latest_date = date_obj
|
|
221
|
+
except (ValueError, TypeError):
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
if latest_date is None:
|
|
225
|
+
# No date found, assume moderate recency
|
|
226
|
+
return 0.5
|
|
227
|
+
|
|
228
|
+
# Calculate age in days
|
|
229
|
+
now = datetime.utcnow()
|
|
230
|
+
age_days = (now - latest_date).days
|
|
231
|
+
|
|
232
|
+
# Score based on age
|
|
233
|
+
if age_days < 7:
|
|
234
|
+
return 1.0 # Very recent
|
|
235
|
+
elif age_days < 30:
|
|
236
|
+
return 0.9 # Recent
|
|
237
|
+
elif age_days < 90:
|
|
238
|
+
return 0.7 # Somewhat recent
|
|
239
|
+
elif age_days < 365:
|
|
240
|
+
return 0.5 # This year
|
|
241
|
+
elif age_days < 365 * 2:
|
|
242
|
+
return 0.3 # Last 2 years
|
|
243
|
+
else:
|
|
244
|
+
# Older data, score decreases slowly
|
|
245
|
+
return max(0.1, 0.3 - (age_days - 365 * 2) / (365 * 10))
|
|
246
|
+
|
|
247
|
+
def _get_match_type(self, query: str, result: Dict[str, Any]) -> str:
|
|
248
|
+
"""
|
|
249
|
+
Determine the type of match.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
query: Search query
|
|
253
|
+
result: Result item
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Match type string ('exact', 'partial', 'fuzzy')
|
|
257
|
+
"""
|
|
258
|
+
query_lower = query.lower()
|
|
259
|
+
|
|
260
|
+
# Check title/name fields
|
|
261
|
+
for field in ['title', 'name', 'id', 'series_id']:
|
|
262
|
+
if field in result:
|
|
263
|
+
value = str(result[field]).lower()
|
|
264
|
+
|
|
265
|
+
if value == query_lower:
|
|
266
|
+
return 'exact'
|
|
267
|
+
elif query_lower in value or value in query_lower:
|
|
268
|
+
return 'partial'
|
|
269
|
+
|
|
270
|
+
return 'fuzzy'
|
|
271
|
+
|
|
272
|
+
def _passes_filters(
|
|
273
|
+
self,
|
|
274
|
+
result: Dict[str, Any],
|
|
275
|
+
options: Dict[str, Any]
|
|
276
|
+
) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Check if result passes filter criteria.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
result: Result with _search_metadata
|
|
282
|
+
options: Filter options
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
True if result passes all filters
|
|
286
|
+
"""
|
|
287
|
+
# Relevance threshold
|
|
288
|
+
threshold = options.get('relevance_threshold', 0.0)
|
|
289
|
+
composite_score = result['_search_metadata']['composite_score']
|
|
290
|
+
if composite_score < threshold:
|
|
291
|
+
return False
|
|
292
|
+
|
|
293
|
+
# Quality score threshold
|
|
294
|
+
min_quality = options.get('min_quality_score')
|
|
295
|
+
if min_quality is not None:
|
|
296
|
+
# Check if result has quality metadata
|
|
297
|
+
quality_score = result.get('_quality', {}).get('score')
|
|
298
|
+
if quality_score is None:
|
|
299
|
+
quality_score = result.get('metadata', {}).get('quality', {}).get('score')
|
|
300
|
+
|
|
301
|
+
if quality_score is not None and quality_score < min_quality:
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
# Date range filter
|
|
305
|
+
date_range = options.get('date_range')
|
|
306
|
+
if date_range:
|
|
307
|
+
# Check if result falls within date range
|
|
308
|
+
result_date = self._extract_date(result)
|
|
309
|
+
if result_date:
|
|
310
|
+
start = date_range.get('start')
|
|
311
|
+
end = date_range.get('end')
|
|
312
|
+
|
|
313
|
+
try:
|
|
314
|
+
if start:
|
|
315
|
+
start_date = datetime.strptime(start, '%Y-%m-%d')
|
|
316
|
+
if result_date < start_date:
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
if end:
|
|
320
|
+
end_date = datetime.strptime(end, '%Y-%m-%d')
|
|
321
|
+
if result_date > end_date:
|
|
322
|
+
return False
|
|
323
|
+
except ValueError:
|
|
324
|
+
logger.warning(f"Invalid date range format: {date_range}")
|
|
325
|
+
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
def _extract_date(self, result: Dict[str, Any]) -> Optional[datetime]:
|
|
329
|
+
"""Extract date from result"""
|
|
330
|
+
date_fields = ['date', 'observation_end', 'last_updated', 'publishedAt']
|
|
331
|
+
|
|
332
|
+
for field in date_fields:
|
|
333
|
+
if field in result:
|
|
334
|
+
try:
|
|
335
|
+
date_str = str(result[field])
|
|
336
|
+
if 'T' in date_str:
|
|
337
|
+
return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
|
338
|
+
else:
|
|
339
|
+
return datetime.strptime(date_str[:10], '%Y-%m-%d')
|
|
340
|
+
except (ValueError, TypeError):
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
def _sort_results(
|
|
346
|
+
self,
|
|
347
|
+
results: List[Dict[str, Any]],
|
|
348
|
+
sort_by: str
|
|
349
|
+
) -> List[Dict[str, Any]]:
|
|
350
|
+
"""
|
|
351
|
+
Sort results by specified criteria.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
results: Results with _search_metadata
|
|
355
|
+
sort_by: Sort method
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Sorted results
|
|
359
|
+
"""
|
|
360
|
+
if sort_by == 'relevance':
|
|
361
|
+
return sorted(
|
|
362
|
+
results,
|
|
363
|
+
key=lambda x: x['_search_metadata']['relevance_score'],
|
|
364
|
+
reverse=True
|
|
365
|
+
)
|
|
366
|
+
elif sort_by == 'popularity':
|
|
367
|
+
return sorted(
|
|
368
|
+
results,
|
|
369
|
+
key=lambda x: x['_search_metadata']['popularity_score'],
|
|
370
|
+
reverse=True
|
|
371
|
+
)
|
|
372
|
+
elif sort_by == 'recency':
|
|
373
|
+
return sorted(
|
|
374
|
+
results,
|
|
375
|
+
key=lambda x: x['_search_metadata']['recency_score'],
|
|
376
|
+
reverse=True
|
|
377
|
+
)
|
|
378
|
+
else: # composite (default)
|
|
379
|
+
return sorted(
|
|
380
|
+
results,
|
|
381
|
+
key=lambda x: x['_search_metadata']['composite_score'],
|
|
382
|
+
reverse=True
|
|
383
|
+
)
|
|
384
|
+
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Detailed Metrics and Health Monitoring for API Providers
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive performance tracking including:
|
|
5
|
+
- Response time percentiles
|
|
6
|
+
- Data volume statistics
|
|
7
|
+
- Error type distribution
|
|
8
|
+
- Rate limiting events
|
|
9
|
+
- Cache hit rates
|
|
10
|
+
- Overall health scoring
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import time
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from threading import Lock
|
|
18
|
+
from typing import Any, Dict, List, Optional
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DetailedMetrics:
|
|
24
|
+
"""
|
|
25
|
+
Tracks detailed performance metrics for API providers.
|
|
26
|
+
|
|
27
|
+
Provides comprehensive monitoring including response times, data volumes,
|
|
28
|
+
error patterns, and overall health scoring.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, max_response_times: int = 100):
|
|
32
|
+
"""
|
|
33
|
+
Initialize metrics tracker.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
max_response_times: Maximum number of response times to keep in memory
|
|
37
|
+
"""
|
|
38
|
+
self.max_response_times = max_response_times
|
|
39
|
+
self.lock = Lock()
|
|
40
|
+
|
|
41
|
+
# Request metrics
|
|
42
|
+
self.metrics = {
|
|
43
|
+
'requests': {
|
|
44
|
+
'total': 0,
|
|
45
|
+
'successful': 0,
|
|
46
|
+
'failed': 0,
|
|
47
|
+
'cached': 0
|
|
48
|
+
},
|
|
49
|
+
'performance': {
|
|
50
|
+
'response_times': [], # Last N response times
|
|
51
|
+
'avg_response_time_ms': 0.0,
|
|
52
|
+
'p50_response_time_ms': 0.0,
|
|
53
|
+
'p95_response_time_ms': 0.0,
|
|
54
|
+
'p99_response_time_ms': 0.0,
|
|
55
|
+
'min_response_time_ms': 0.0,
|
|
56
|
+
'max_response_time_ms': 0.0
|
|
57
|
+
},
|
|
58
|
+
'data_volume': {
|
|
59
|
+
'total_records_fetched': 0,
|
|
60
|
+
'total_bytes_transferred': 0,
|
|
61
|
+
'avg_records_per_request': 0.0,
|
|
62
|
+
'avg_bytes_per_request': 0.0
|
|
63
|
+
},
|
|
64
|
+
'errors': {
|
|
65
|
+
'by_type': defaultdict(int), # {error_type: count}
|
|
66
|
+
'recent_errors': [] # Last 10 errors with details
|
|
67
|
+
},
|
|
68
|
+
'rate_limiting': {
|
|
69
|
+
'throttled_requests': 0,
|
|
70
|
+
'total_wait_time_ms': 0.0,
|
|
71
|
+
'avg_wait_time_ms': 0.0
|
|
72
|
+
},
|
|
73
|
+
'timestamps': {
|
|
74
|
+
'first_request': None,
|
|
75
|
+
'last_request': None,
|
|
76
|
+
'last_success': None,
|
|
77
|
+
'last_failure': None
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
def record_request(
|
|
82
|
+
self,
|
|
83
|
+
success: bool,
|
|
84
|
+
response_time_ms: float,
|
|
85
|
+
record_count: int = 0,
|
|
86
|
+
bytes_transferred: int = 0,
|
|
87
|
+
cached: bool = False,
|
|
88
|
+
error_type: Optional[str] = None,
|
|
89
|
+
error_message: Optional[str] = None
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Record a request with its metrics.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
success: Whether the request was successful
|
|
96
|
+
response_time_ms: Response time in milliseconds
|
|
97
|
+
record_count: Number of records returned
|
|
98
|
+
bytes_transferred: Bytes transferred in the response
|
|
99
|
+
cached: Whether the response was cached
|
|
100
|
+
error_type: Type of error if failed (e.g., 'timeout', 'auth', 'rate_limit')
|
|
101
|
+
error_message: Error message if failed
|
|
102
|
+
"""
|
|
103
|
+
with self.lock:
|
|
104
|
+
now = datetime.utcnow().isoformat()
|
|
105
|
+
|
|
106
|
+
# Update request counts
|
|
107
|
+
self.metrics['requests']['total'] += 1
|
|
108
|
+
if success:
|
|
109
|
+
self.metrics['requests']['successful'] += 1
|
|
110
|
+
self.metrics['timestamps']['last_success'] = now
|
|
111
|
+
else:
|
|
112
|
+
self.metrics['requests']['failed'] += 1
|
|
113
|
+
self.metrics['timestamps']['last_failure'] = now
|
|
114
|
+
|
|
115
|
+
if cached:
|
|
116
|
+
self.metrics['requests']['cached'] += 1
|
|
117
|
+
|
|
118
|
+
# Update timestamps
|
|
119
|
+
if self.metrics['timestamps']['first_request'] is None:
|
|
120
|
+
self.metrics['timestamps']['first_request'] = now
|
|
121
|
+
self.metrics['timestamps']['last_request'] = now
|
|
122
|
+
|
|
123
|
+
# Update performance metrics
|
|
124
|
+
self.metrics['performance']['response_times'].append(response_time_ms)
|
|
125
|
+
if len(self.metrics['performance']['response_times']) > self.max_response_times:
|
|
126
|
+
self.metrics['performance']['response_times'].pop(0)
|
|
127
|
+
|
|
128
|
+
# Calculate percentiles
|
|
129
|
+
self._calculate_percentiles()
|
|
130
|
+
|
|
131
|
+
# Update data volume metrics
|
|
132
|
+
self.metrics['data_volume']['total_records_fetched'] += record_count
|
|
133
|
+
self.metrics['data_volume']['total_bytes_transferred'] += bytes_transferred
|
|
134
|
+
|
|
135
|
+
total_requests = self.metrics['requests']['total']
|
|
136
|
+
if total_requests > 0:
|
|
137
|
+
self.metrics['data_volume']['avg_records_per_request'] = (
|
|
138
|
+
self.metrics['data_volume']['total_records_fetched'] / total_requests
|
|
139
|
+
)
|
|
140
|
+
self.metrics['data_volume']['avg_bytes_per_request'] = (
|
|
141
|
+
self.metrics['data_volume']['total_bytes_transferred'] / total_requests
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Record errors
|
|
145
|
+
if not success and error_type:
|
|
146
|
+
self.metrics['errors']['by_type'][error_type] += 1
|
|
147
|
+
|
|
148
|
+
error_entry = {
|
|
149
|
+
'type': error_type,
|
|
150
|
+
'message': error_message or 'Unknown error',
|
|
151
|
+
'timestamp': now,
|
|
152
|
+
'response_time_ms': response_time_ms
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
self.metrics['errors']['recent_errors'].append(error_entry)
|
|
156
|
+
if len(self.metrics['errors']['recent_errors']) > 10:
|
|
157
|
+
self.metrics['errors']['recent_errors'].pop(0)
|
|
158
|
+
|
|
159
|
+
def record_rate_limit_wait(self, wait_time_ms: float):
|
|
160
|
+
"""
|
|
161
|
+
Record a rate limit wait event.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
wait_time_ms: Time waited in milliseconds
|
|
165
|
+
"""
|
|
166
|
+
with self.lock:
|
|
167
|
+
self.metrics['rate_limiting']['throttled_requests'] += 1
|
|
168
|
+
self.metrics['rate_limiting']['total_wait_time_ms'] += wait_time_ms
|
|
169
|
+
|
|
170
|
+
throttled = self.metrics['rate_limiting']['throttled_requests']
|
|
171
|
+
if throttled > 0:
|
|
172
|
+
self.metrics['rate_limiting']['avg_wait_time_ms'] = (
|
|
173
|
+
self.metrics['rate_limiting']['total_wait_time_ms'] / throttled
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def _calculate_percentiles(self):
|
|
177
|
+
"""Calculate response time percentiles"""
|
|
178
|
+
times = sorted(self.metrics['performance']['response_times'])
|
|
179
|
+
if not times:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
n = len(times)
|
|
183
|
+
self.metrics['performance']['avg_response_time_ms'] = sum(times) / n
|
|
184
|
+
self.metrics['performance']['min_response_time_ms'] = times[0]
|
|
185
|
+
self.metrics['performance']['max_response_time_ms'] = times[-1]
|
|
186
|
+
self.metrics['performance']['p50_response_time_ms'] = times[n // 2]
|
|
187
|
+
self.metrics['performance']['p95_response_time_ms'] = times[int(n * 0.95)]
|
|
188
|
+
self.metrics['performance']['p99_response_time_ms'] = times[min(int(n * 0.99), n - 1)]
|
|
189
|
+
|
|
190
|
+
def _calculate_health_score_unlocked(self) -> float:
|
|
191
|
+
"""
|
|
192
|
+
Calculate health score without acquiring lock (internal use only).
|
|
193
|
+
Must be called while holding self.lock.
|
|
194
|
+
"""
|
|
195
|
+
total = self.metrics['requests']['total']
|
|
196
|
+
if total == 0:
|
|
197
|
+
return 1.0
|
|
198
|
+
|
|
199
|
+
# Success rate score (40%)
|
|
200
|
+
success_rate = self.metrics['requests']['successful'] / total
|
|
201
|
+
success_score = success_rate * 0.4
|
|
202
|
+
|
|
203
|
+
# Performance score (30%)
|
|
204
|
+
avg_time = self.metrics['performance']['avg_response_time_ms']
|
|
205
|
+
# Assume < 200ms is excellent, > 2000ms is poor
|
|
206
|
+
if avg_time < 200:
|
|
207
|
+
performance_score = 0.3
|
|
208
|
+
elif avg_time > 2000:
|
|
209
|
+
performance_score = 0.0
|
|
210
|
+
else:
|
|
211
|
+
performance_score = max(0, min(1, (2000 - avg_time) / 1800)) * 0.3
|
|
212
|
+
|
|
213
|
+
# Cache hit rate score (20%)
|
|
214
|
+
cache_rate = self.metrics['requests']['cached'] / total
|
|
215
|
+
cache_score = cache_rate * 0.2
|
|
216
|
+
|
|
217
|
+
# Error diversity score (10%) - fewer error types is better
|
|
218
|
+
error_types = len(self.metrics['errors']['by_type'])
|
|
219
|
+
error_score = max(0, (5 - error_types) / 5) * 0.1
|
|
220
|
+
|
|
221
|
+
return success_score + performance_score + cache_score + error_score
|
|
222
|
+
|
|
223
|
+
def get_health_score(self) -> float:
|
|
224
|
+
"""
|
|
225
|
+
Calculate overall health score (0-1).
|
|
226
|
+
|
|
227
|
+
The health score considers:
|
|
228
|
+
- Success rate (40%)
|
|
229
|
+
- Performance (30%)
|
|
230
|
+
- Cache hit rate (20%)
|
|
231
|
+
- Error diversity (10%)
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Health score between 0 and 1
|
|
235
|
+
"""
|
|
236
|
+
with self.lock:
|
|
237
|
+
return self._calculate_health_score_unlocked()
|
|
238
|
+
|
|
239
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
240
|
+
"""
|
|
241
|
+
Get all metrics as a dictionary.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Complete metrics dictionary
|
|
245
|
+
"""
|
|
246
|
+
with self.lock:
|
|
247
|
+
# Convert defaultdict to regular dict for JSON serialization
|
|
248
|
+
stats = {
|
|
249
|
+
'requests': dict(self.metrics['requests']),
|
|
250
|
+
'performance': dict(self.metrics['performance']),
|
|
251
|
+
'data_volume': dict(self.metrics['data_volume']),
|
|
252
|
+
'errors': {
|
|
253
|
+
'by_type': dict(self.metrics['errors']['by_type']),
|
|
254
|
+
'recent_errors': list(self.metrics['errors']['recent_errors'])
|
|
255
|
+
},
|
|
256
|
+
'rate_limiting': dict(self.metrics['rate_limiting']),
|
|
257
|
+
'timestamps': dict(self.metrics['timestamps']),
|
|
258
|
+
'health_score': self.get_health_score()
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# Remove response_times array to keep output clean
|
|
262
|
+
stats['performance'] = {
|
|
263
|
+
k: v for k, v in stats['performance'].items()
|
|
264
|
+
if k != 'response_times'
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return stats
|
|
268
|
+
|
|
269
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
270
|
+
"""
|
|
271
|
+
Get a concise summary of key metrics.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Summary dictionary with key metrics
|
|
275
|
+
"""
|
|
276
|
+
with self.lock:
|
|
277
|
+
total = self.metrics['requests']['total']
|
|
278
|
+
if total == 0:
|
|
279
|
+
return {
|
|
280
|
+
'status': 'no_activity',
|
|
281
|
+
'health_score': 1.0
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
success_rate = self.metrics['requests']['successful'] / total
|
|
285
|
+
cache_hit_rate = self.metrics['requests']['cached'] / total
|
|
286
|
+
health_score = self._calculate_health_score_unlocked() # Use unlocked version to avoid deadlock
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
'status': 'healthy' if health_score > 0.7 else 'degraded',
|
|
290
|
+
'health_score': round(health_score, 3),
|
|
291
|
+
'total_requests': total,
|
|
292
|
+
'success_rate': round(success_rate, 3),
|
|
293
|
+
'cache_hit_rate': round(cache_hit_rate, 3),
|
|
294
|
+
'avg_response_time_ms': round(
|
|
295
|
+
self.metrics['performance']['avg_response_time_ms'], 2
|
|
296
|
+
),
|
|
297
|
+
'p95_response_time_ms': round(
|
|
298
|
+
self.metrics['performance']['p95_response_time_ms'], 2
|
|
299
|
+
),
|
|
300
|
+
'total_errors': self.metrics['requests']['failed'],
|
|
301
|
+
'error_types': len(self.metrics['errors']['by_type'])
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
def reset(self):
|
|
305
|
+
"""Reset all metrics"""
|
|
306
|
+
with self.lock:
|
|
307
|
+
self.__init__(self.max_response_times)
|
|
308
|
+
|