aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
1
+ """
2
+ Search Result Analyzers
3
+
4
+ This module contains analyzers for assessing search result quality,
5
+ understanding query intent, and generating result summaries.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from .constants import QueryIntentType, CredibilityLevel
12
+
13
+
14
+ # ============================================================================
15
+ # Result Quality Analyzer
16
+ # ============================================================================
17
+
18
+ class ResultQualityAnalyzer:
19
+ """Analyzer for assessing search result quality"""
20
+
21
+ # High authority domains with trust scores
22
+ AUTHORITATIVE_DOMAINS = {
23
+ # Academic and research
24
+ 'scholar.google.com': 0.95,
25
+ 'arxiv.org': 0.95,
26
+ 'ieee.org': 0.95,
27
+ 'acm.org': 0.95,
28
+ 'nature.com': 0.95,
29
+ 'science.org': 0.95,
30
+
31
+ # Government and official
32
+ '.gov': 0.90,
33
+ '.edu': 0.85,
34
+ 'who.int': 0.90,
35
+ 'un.org': 0.90,
36
+
37
+ # Major media
38
+ 'nytimes.com': 0.80,
39
+ 'bbc.com': 0.80,
40
+ 'reuters.com': 0.85,
41
+ 'apnews.com': 0.85,
42
+
43
+ # Technical documentation
44
+ 'docs.python.org': 0.90,
45
+ 'developer.mozilla.org': 0.90,
46
+ 'stackoverflow.com': 0.75,
47
+ 'github.com': 0.70,
48
+
49
+ # Encyclopedia
50
+ 'wikipedia.org': 0.75,
51
+ }
52
+
53
+ # Low quality indicators
54
+ LOW_QUALITY_INDICATORS = [
55
+ 'clickbait', 'ads', 'spam', 'scam',
56
+ 'download-now', 'free-download',
57
+ 'xxx', 'adult', 'casino', 'pills'
58
+ ]
59
+
60
+ def analyze_result_quality(
61
+ self,
62
+ result: Dict[str, Any],
63
+ query: str,
64
+ position: int
65
+ ) -> Dict[str, Any]:
66
+ """
67
+ Analyze quality of a single search result.
68
+
69
+ Args:
70
+ result: Search result dictionary
71
+ query: Original search query
72
+ position: Position in search results (1-based)
73
+
74
+ Returns:
75
+ Quality analysis dictionary with scores and signals
76
+ """
77
+ quality_analysis = {
78
+ 'quality_score': 0.0,
79
+ 'authority_score': 0.0,
80
+ 'relevance_score': 0.0,
81
+ 'freshness_score': 0.0,
82
+ 'credibility_level': CredibilityLevel.MEDIUM.value,
83
+ 'quality_signals': {},
84
+ 'warnings': []
85
+ }
86
+
87
+ # 1. Evaluate domain authority
88
+ domain = result.get('displayLink', '').lower()
89
+ authority_score = self._calculate_authority_score(domain)
90
+ quality_analysis['authority_score'] = authority_score
91
+ quality_analysis['quality_signals']['domain_authority'] = (
92
+ 'high' if authority_score > 0.8 else
93
+ 'medium' if authority_score > 0.5 else 'low'
94
+ )
95
+
96
+ # 2. Evaluate relevance
97
+ relevance_score = self._calculate_relevance_score(
98
+ query,
99
+ result.get('title', ''),
100
+ result.get('snippet', ''),
101
+ position
102
+ )
103
+ quality_analysis['relevance_score'] = relevance_score
104
+
105
+ # 3. Evaluate freshness
106
+ freshness_score = self._calculate_freshness_score(result)
107
+ quality_analysis['freshness_score'] = freshness_score
108
+
109
+ # 4. Check HTTPS
110
+ link = result.get('link', '')
111
+ has_https = link.startswith('https://')
112
+ quality_analysis['quality_signals']['has_https'] = has_https
113
+ if not has_https:
114
+ quality_analysis['warnings'].append('No HTTPS - security concern')
115
+
116
+ # 5. Check content length
117
+ snippet_length = len(result.get('snippet', ''))
118
+ quality_analysis['quality_signals']['content_length'] = (
119
+ 'adequate' if snippet_length > 100 else 'short'
120
+ )
121
+ if snippet_length < 50:
122
+ quality_analysis['warnings'].append('Very short snippet - may lack detail')
123
+
124
+ # 6. Check metadata
125
+ has_metadata = 'metadata' in result
126
+ quality_analysis['quality_signals']['has_metadata'] = has_metadata
127
+
128
+ # 7. Position ranking (Google's ranking is a quality signal)
129
+ position_score = max(0, 1.0 - (position - 1) * 0.05)
130
+ quality_analysis['quality_signals']['position_rank'] = position
131
+
132
+ # 8. Detect low quality indicators
133
+ url_lower = link.lower()
134
+ title_lower = result.get('title', '').lower()
135
+ for indicator in self.LOW_QUALITY_INDICATORS:
136
+ if indicator in url_lower or indicator in title_lower:
137
+ quality_analysis['warnings'].append(
138
+ f'Low quality indicator detected: {indicator}'
139
+ )
140
+ authority_score *= 0.5 # Significantly reduce authority
141
+
142
+ # 9. Calculate comprehensive quality score
143
+ quality_analysis['quality_score'] = (
144
+ authority_score * 0.35 + # Authority 35%
145
+ relevance_score * 0.30 + # Relevance 30%
146
+ position_score * 0.20 + # Position 20%
147
+ freshness_score * 0.10 + # Freshness 10%
148
+ (0.05 if has_https else 0) # HTTPS 5%
149
+ )
150
+
151
+ # 10. Determine credibility level
152
+ if quality_analysis['quality_score'] > 0.75:
153
+ quality_analysis['credibility_level'] = CredibilityLevel.HIGH.value
154
+ elif quality_analysis['quality_score'] > 0.5:
155
+ quality_analysis['credibility_level'] = CredibilityLevel.MEDIUM.value
156
+ else:
157
+ quality_analysis['credibility_level'] = CredibilityLevel.LOW.value
158
+
159
+ return quality_analysis
160
+
161
+ def _calculate_authority_score(self, domain: str) -> float:
162
+ """Calculate domain authority score"""
163
+ # Exact match
164
+ if domain in self.AUTHORITATIVE_DOMAINS:
165
+ return self.AUTHORITATIVE_DOMAINS[domain]
166
+
167
+ # Suffix match
168
+ for auth_domain, score in self.AUTHORITATIVE_DOMAINS.items():
169
+ if domain.endswith(auth_domain):
170
+ return score
171
+
172
+ # Default medium authority
173
+ return 0.5
174
+
175
+ def _calculate_relevance_score(
176
+ self,
177
+ query: str,
178
+ title: str,
179
+ snippet: str,
180
+ position: int
181
+ ) -> float:
182
+ """Calculate relevance score based on query match"""
183
+ query_terms = set(query.lower().split())
184
+ title_lower = title.lower()
185
+ snippet_lower = snippet.lower()
186
+
187
+ # Title matching
188
+ title_matches = sum(1 for term in query_terms if term in title_lower)
189
+ title_score = title_matches / len(query_terms) if query_terms else 0
190
+
191
+ # Snippet matching
192
+ snippet_matches = sum(1 for term in query_terms if term in snippet_lower)
193
+ snippet_score = snippet_matches / len(query_terms) if query_terms else 0
194
+
195
+ # Position bonus (top 3 get extra credit)
196
+ position_bonus = 0.2 if position <= 3 else 0.1 if position <= 10 else 0
197
+
198
+ # Combined relevance
199
+ relevance = (
200
+ title_score * 0.6 + # Title weighted higher
201
+ snippet_score * 0.3 + # Snippet secondary
202
+ position_bonus # Position bonus
203
+ )
204
+
205
+ return min(1.0, relevance)
206
+
207
+ def _calculate_freshness_score(self, result: Dict[str, Any]) -> float:
208
+ """Calculate freshness score from publish date metadata"""
209
+ metadata = result.get('metadata', {})
210
+
211
+ # Look for date in various metadata fields
212
+ date_fields = ['metatags', 'article', 'newsarticle']
213
+ publish_date = None
214
+
215
+ for field in date_fields:
216
+ if field in metadata:
217
+ items = metadata[field]
218
+ if isinstance(items, list) and items:
219
+ item = items[0]
220
+ # Common date field names
221
+ for date_key in ['publishdate', 'datepublished', 'article:published_time']:
222
+ if date_key in item:
223
+ publish_date = item[date_key]
224
+ break
225
+ if publish_date:
226
+ break
227
+
228
+ if not publish_date:
229
+ # No date info, return neutral score
230
+ return 0.5
231
+
232
+ try:
233
+ # Parse date
234
+ pub_dt = datetime.fromisoformat(publish_date.replace('Z', '+00:00'))
235
+ now = datetime.now(pub_dt.tzinfo)
236
+
237
+ days_old = (now - pub_dt).days
238
+
239
+ # Freshness scoring
240
+ if days_old < 7:
241
+ return 1.0 # < 1 week - very fresh
242
+ elif days_old < 30:
243
+ return 0.9 # < 1 month - fresh
244
+ elif days_old < 90:
245
+ return 0.7 # < 3 months - moderately fresh
246
+ elif days_old < 365:
247
+ return 0.5 # < 1 year - neutral
248
+ elif days_old < 730:
249
+ return 0.3 # < 2 years - dated
250
+ else:
251
+ return 0.1 # > 2 years - old
252
+ except Exception:
253
+ return 0.5
254
+
255
+ def rank_results(
256
+ self,
257
+ results: List[Dict[str, Any]],
258
+ ranking_strategy: str = 'balanced'
259
+ ) -> List[Dict[str, Any]]:
260
+ """
261
+ Re-rank results by quality metrics.
262
+
263
+ Args:
264
+ results: List of results with quality analysis
265
+ ranking_strategy: Ranking strategy ('balanced', 'authority', 'relevance', 'freshness')
266
+
267
+ Returns:
268
+ Sorted list of results
269
+ """
270
+ if ranking_strategy == 'authority':
271
+ return sorted(
272
+ results,
273
+ key=lambda x: x.get('_quality', {}).get('authority_score', 0),
274
+ reverse=True
275
+ )
276
+ elif ranking_strategy == 'relevance':
277
+ return sorted(
278
+ results,
279
+ key=lambda x: x.get('_quality', {}).get('relevance_score', 0),
280
+ reverse=True
281
+ )
282
+ elif ranking_strategy == 'freshness':
283
+ return sorted(
284
+ results,
285
+ key=lambda x: x.get('_quality', {}).get('freshness_score', 0),
286
+ reverse=True
287
+ )
288
+ else: # balanced
289
+ return sorted(
290
+ results,
291
+ key=lambda x: x.get('_quality', {}).get('quality_score', 0),
292
+ reverse=True
293
+ )
294
+
295
+
296
+ # ============================================================================
297
+ # Query Intent Analyzer
298
+ # ============================================================================
299
+
300
+ class QueryIntentAnalyzer:
301
+ """Analyzer for understanding query intent and optimizing queries"""
302
+
303
+ # Intent patterns with keywords and enhancements
304
+ INTENT_PATTERNS = {
305
+ QueryIntentType.DEFINITION.value: {
306
+ 'keywords': ['what is', 'define', 'meaning of', 'definition'],
307
+ 'query_enhancement': 'definition OR meaning OR "what is"',
308
+ 'suggested_params': {'num_results': 5}
309
+ },
310
+ QueryIntentType.HOW_TO.value: {
311
+ 'keywords': ['how to', 'how do i', 'tutorial', 'guide', 'steps to'],
312
+ 'query_enhancement': 'tutorial OR guide OR "step by step"',
313
+ 'suggested_params': {'num_results': 10}
314
+ },
315
+ QueryIntentType.COMPARISON.value: {
316
+ 'keywords': ['vs', 'versus', 'compare', 'difference between', 'better than'],
317
+ 'query_enhancement': 'comparison OR versus OR "vs"',
318
+ 'suggested_params': {'num_results': 10}
319
+ },
320
+ QueryIntentType.FACTUAL.value: {
321
+ 'keywords': ['when', 'where', 'who', 'which', 'statistics', 'data'],
322
+ 'query_enhancement': '',
323
+ 'suggested_params': {'num_results': 5}
324
+ },
325
+ QueryIntentType.RECENT_NEWS.value: {
326
+ 'keywords': ['latest', 'recent', 'news', 'today', 'current'],
327
+ 'query_enhancement': 'news OR latest',
328
+ 'suggested_params': {'date_restrict': 'w1', 'sort_by': 'date'}
329
+ },
330
+ QueryIntentType.ACADEMIC.value: {
331
+ 'keywords': ['research', 'study', 'paper', 'journal', 'academic'],
332
+ 'query_enhancement': 'research OR study OR paper',
333
+ 'suggested_params': {'file_type': 'pdf', 'num_results': 10}
334
+ },
335
+ QueryIntentType.PRODUCT.value: {
336
+ 'keywords': ['buy', 'price', 'review', 'best', 'top rated'],
337
+ 'query_enhancement': 'review OR comparison',
338
+ 'suggested_params': {'num_results': 15}
339
+ }
340
+ }
341
+
342
+ def analyze_query_intent(self, query: str) -> Dict[str, Any]:
343
+ """
344
+ Analyze query to determine intent and generate enhancements.
345
+
346
+ Args:
347
+ query: Search query string
348
+
349
+ Returns:
350
+ Intent analysis with enhanced query and suggestions
351
+ """
352
+ query_lower = query.lower()
353
+
354
+ analysis = {
355
+ 'original_query': query,
356
+ 'intent_type': QueryIntentType.GENERAL.value,
357
+ 'confidence': 0.0,
358
+ 'enhanced_query': query,
359
+ 'suggested_params': {},
360
+ 'query_entities': [],
361
+ 'query_modifiers': [],
362
+ 'suggestions': []
363
+ }
364
+
365
+ # Detect intent type
366
+ max_confidence = 0.0
367
+ detected_intent = QueryIntentType.GENERAL.value
368
+
369
+ for intent_type, intent_config in self.INTENT_PATTERNS.items():
370
+ keywords = intent_config['keywords']
371
+ matches = sum(1 for kw in keywords if kw in query_lower)
372
+
373
+ if matches > 0:
374
+ confidence = min(1.0, matches / len(keywords) * 2)
375
+ if confidence > max_confidence:
376
+ max_confidence = confidence
377
+ detected_intent = intent_type
378
+
379
+ analysis['intent_type'] = detected_intent
380
+ analysis['confidence'] = max_confidence
381
+
382
+ # Enhance query if intent detected
383
+ if detected_intent != QueryIntentType.GENERAL.value:
384
+ intent_config = self.INTENT_PATTERNS[detected_intent]
385
+ enhancement = intent_config['query_enhancement']
386
+
387
+ if enhancement:
388
+ analysis['enhanced_query'] = f"{query} {enhancement}"
389
+
390
+ analysis['suggested_params'] = intent_config['suggested_params'].copy()
391
+
392
+ # Extract entities and modifiers
393
+ analysis['query_entities'] = self._extract_entities(query)
394
+ analysis['query_modifiers'] = self._extract_modifiers(query)
395
+
396
+ # Generate suggestions
397
+ analysis['suggestions'] = self._generate_suggestions(query, detected_intent)
398
+
399
+ return analysis
400
+
401
+ def _extract_entities(self, query: str) -> List[str]:
402
+ """Extract potential entities from query (simplified)"""
403
+ words = query.split()
404
+ entities = []
405
+
406
+ for word in words:
407
+ # Simple rule: capitalized words might be entities
408
+ if word and word[0].isupper() and len(word) > 2:
409
+ entities.append(word)
410
+
411
+ return entities
412
+
413
+ def _extract_modifiers(self, query: str) -> List[str]:
414
+ """Extract query modifiers"""
415
+ modifiers = []
416
+ modifier_words = [
417
+ 'best', 'top', 'latest', 'new', 'old', 'cheap', 'expensive',
418
+ 'free', 'open source', 'commercial', 'beginner', 'advanced'
419
+ ]
420
+
421
+ query_lower = query.lower()
422
+ for modifier in modifier_words:
423
+ if modifier in query_lower:
424
+ modifiers.append(modifier)
425
+
426
+ return modifiers
427
+
428
+ def _generate_suggestions(self, query: str, intent_type: str) -> List[str]:
429
+ """Generate query optimization suggestions"""
430
+ suggestions = []
431
+
432
+ if intent_type == QueryIntentType.HOW_TO.value:
433
+ if 'beginner' not in query.lower() and 'advanced' not in query.lower():
434
+ suggestions.append(
435
+ 'Consider adding "beginner" or "advanced" to target skill level'
436
+ )
437
+
438
+ elif intent_type == QueryIntentType.COMPARISON.value:
439
+ if ' vs ' not in query.lower():
440
+ suggestions.append(
441
+ 'Use "vs" or "versus" for better comparison results'
442
+ )
443
+
444
+ elif intent_type == QueryIntentType.ACADEMIC.value:
445
+ if 'pdf' not in query.lower():
446
+ suggestions.append(
447
+ 'Consider adding "filetype:pdf" to find research papers'
448
+ )
449
+
450
+ elif intent_type == QueryIntentType.RECENT_NEWS.value:
451
+ suggestions.append(
452
+ 'Results will be filtered to last week for freshness'
453
+ )
454
+
455
+ # General suggestions
456
+ if len(query.split()) < 3:
457
+ suggestions.append(
458
+ 'Query is short - consider adding more specific terms'
459
+ )
460
+
461
+ if len(query.split()) > 10:
462
+ suggestions.append(
463
+ 'Query is long - consider simplifying to key terms'
464
+ )
465
+
466
+ return suggestions
467
+
468
+
469
+ # ============================================================================
470
+ # Result Summarizer
471
+ # ============================================================================
472
+
473
+ class ResultSummarizer:
474
+ """Generator of structured summaries from search results"""
475
+
476
+ def generate_summary(
477
+ self,
478
+ results: List[Dict[str, Any]],
479
+ query: str
480
+ ) -> Dict[str, Any]:
481
+ """
482
+ Generate comprehensive summary of search results.
483
+
484
+ Args:
485
+ results: List of search results with quality analysis
486
+ query: Original search query
487
+
488
+ Returns:
489
+ Summary dictionary with statistics and recommendations
490
+ """
491
+ summary = {
492
+ 'query': query,
493
+ 'total_results': len(results),
494
+ 'quality_distribution': {'high': 0, 'medium': 0, 'low': 0},
495
+ 'top_domains': [],
496
+ 'content_types': {},
497
+ 'freshness_distribution': {
498
+ 'very_fresh': 0, 'fresh': 0, 'moderate': 0, 'old': 0
499
+ },
500
+ 'recommended_results': [],
501
+ 'warnings': [],
502
+ 'suggestions': []
503
+ }
504
+
505
+ if not results:
506
+ summary['warnings'].append('No results found')
507
+ return summary
508
+
509
+ # Gather statistics
510
+ domain_stats = {}
511
+
512
+ for result in results:
513
+ quality = result.get('_quality', {})
514
+ quality_level = quality.get('credibility_level', 'medium')
515
+ summary['quality_distribution'][quality_level] += 1
516
+
517
+ # Domain statistics
518
+ domain = result.get('displayLink', 'unknown')
519
+ if domain not in domain_stats:
520
+ domain_stats[domain] = {'count': 0, 'total_quality': 0.0}
521
+ domain_stats[domain]['count'] += 1
522
+ domain_stats[domain]['total_quality'] += quality.get('quality_score', 0.5)
523
+
524
+ # Freshness distribution
525
+ freshness = quality.get('freshness_score', 0.5)
526
+ if freshness > 0.9:
527
+ summary['freshness_distribution']['very_fresh'] += 1
528
+ elif freshness > 0.7:
529
+ summary['freshness_distribution']['fresh'] += 1
530
+ elif freshness > 0.5:
531
+ summary['freshness_distribution']['moderate'] += 1
532
+ else:
533
+ summary['freshness_distribution']['old'] += 1
534
+
535
+ # Top domains
536
+ top_domains = []
537
+ for domain, stats in domain_stats.items():
538
+ avg_quality = stats['total_quality'] / stats['count']
539
+ top_domains.append({
540
+ 'domain': domain,
541
+ 'count': stats['count'],
542
+ 'avg_quality': avg_quality
543
+ })
544
+
545
+ summary['top_domains'] = sorted(
546
+ top_domains,
547
+ key=lambda x: (x['count'], x['avg_quality']),
548
+ reverse=True
549
+ )[:5]
550
+
551
+ # Recommended results (top 3 by quality)
552
+ sorted_results = sorted(
553
+ results,
554
+ key=lambda x: x.get('_quality', {}).get('quality_score', 0),
555
+ reverse=True
556
+ )
557
+ summary['recommended_results'] = sorted_results[:3]
558
+
559
+ # Generate warnings
560
+ if summary['quality_distribution']['low'] > 0:
561
+ summary['warnings'].append(
562
+ f"{summary['quality_distribution']['low']} low quality result(s) detected"
563
+ )
564
+
565
+ https_count = sum(1 for r in results if r.get('link', '').startswith('https://'))
566
+ if https_count < len(results):
567
+ summary['warnings'].append(
568
+ f"{len(results) - https_count} result(s) lack HTTPS"
569
+ )
570
+
571
+ # Generate suggestions
572
+ if summary['freshness_distribution']['old'] > len(results) / 2:
573
+ summary['suggestions'].append(
574
+ 'Many results are outdated - consider adding date filter'
575
+ )
576
+
577
+ if summary['quality_distribution']['high'] < 3:
578
+ summary['suggestions'].append(
579
+ 'Few high-quality results - try refining your query'
580
+ )
581
+
582
+ return summary
583
+