aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,589 @@
1
+ """
2
+ Search Result Analyzers
3
+
4
+ This module contains analyzers for assessing search result quality,
5
+ understanding query intent, and generating result summaries.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List
10
+
11
+ from .constants import QueryIntentType, CredibilityLevel
12
+
13
+
14
+ # ============================================================================
15
+ # Result Quality Analyzer
16
+ # ============================================================================
17
+
18
+
19
+ class ResultQualityAnalyzer:
20
+ """Analyzer for assessing search result quality"""
21
+
22
+ # High authority domains with trust scores
23
+ AUTHORITATIVE_DOMAINS = {
24
+ # Academic and research
25
+ "scholar.google.com": 0.95,
26
+ "arxiv.org": 0.95,
27
+ "ieee.org": 0.95,
28
+ "acm.org": 0.95,
29
+ "nature.com": 0.95,
30
+ "science.org": 0.95,
31
+ # Government and official
32
+ ".gov": 0.90,
33
+ ".edu": 0.85,
34
+ "who.int": 0.90,
35
+ "un.org": 0.90,
36
+ # Major media
37
+ "nytimes.com": 0.80,
38
+ "bbc.com": 0.80,
39
+ "reuters.com": 0.85,
40
+ "apnews.com": 0.85,
41
+ # Technical documentation
42
+ "docs.python.org": 0.90,
43
+ "developer.mozilla.org": 0.90,
44
+ "stackoverflow.com": 0.75,
45
+ "github.com": 0.70,
46
+ # Encyclopedia
47
+ "wikipedia.org": 0.75,
48
+ }
49
+
50
+ # Low quality indicators
51
+ LOW_QUALITY_INDICATORS = [
52
+ "clickbait",
53
+ "ads",
54
+ "spam",
55
+ "scam",
56
+ "download-now",
57
+ "free-download",
58
+ "xxx",
59
+ "adult",
60
+ "casino",
61
+ "pills",
62
+ ]
63
+
64
+ def analyze_result_quality(
65
+ self, result: Dict[str, Any], query: str, position: int
66
+ ) -> Dict[str, Any]:
67
+ """
68
+ Analyze quality of a single search result.
69
+
70
+ Args:
71
+ result: Search result dictionary
72
+ query: Original search query
73
+ position: Position in search results (1-based)
74
+
75
+ Returns:
76
+ Quality analysis dictionary with scores and signals
77
+ """
78
+ quality_analysis = {
79
+ "quality_score": 0.0,
80
+ "authority_score": 0.0,
81
+ "relevance_score": 0.0,
82
+ "freshness_score": 0.0,
83
+ "credibility_level": CredibilityLevel.MEDIUM.value,
84
+ "quality_signals": {},
85
+ "warnings": [],
86
+ }
87
+
88
+ # 1. Evaluate domain authority
89
+ domain = result.get("displayLink", "").lower()
90
+ authority_score = self._calculate_authority_score(domain)
91
+ quality_analysis["authority_score"] = authority_score
92
+ quality_analysis["quality_signals"]["domain_authority"] = (
93
+ "high" if authority_score > 0.8 else "medium" if authority_score > 0.5 else "low"
94
+ )
95
+
96
+ # 2. Evaluate relevance
97
+ relevance_score = self._calculate_relevance_score(
98
+ query, result.get("title", ""), result.get("snippet", ""), position
99
+ )
100
+ quality_analysis["relevance_score"] = relevance_score
101
+
102
+ # 3. Evaluate freshness
103
+ freshness_score = self._calculate_freshness_score(result)
104
+ quality_analysis["freshness_score"] = freshness_score
105
+
106
+ # 4. Check HTTPS
107
+ link = result.get("link", "")
108
+ has_https = link.startswith("https://")
109
+ quality_analysis["quality_signals"]["has_https"] = has_https
110
+ if not has_https:
111
+ quality_analysis["warnings"].append("No HTTPS - security concern")
112
+
113
+ # 5. Check content length
114
+ snippet_length = len(result.get("snippet", ""))
115
+ quality_analysis["quality_signals"]["content_length"] = (
116
+ "adequate" if snippet_length > 100 else "short"
117
+ )
118
+ if snippet_length < 50:
119
+ quality_analysis["warnings"].append("Very short snippet - may lack detail")
120
+
121
+ # 6. Check metadata
122
+ has_metadata = "metadata" in result
123
+ quality_analysis["quality_signals"]["has_metadata"] = has_metadata
124
+
125
+ # 7. Position ranking (Google's ranking is a quality signal)
126
+ position_score = max(0, 1.0 - (position - 1) * 0.05)
127
+ quality_analysis["quality_signals"]["position_rank"] = position
128
+
129
+ # 8. Detect low quality indicators
130
+ url_lower = link.lower()
131
+ title_lower = result.get("title", "").lower()
132
+ for indicator in self.LOW_QUALITY_INDICATORS:
133
+ if indicator in url_lower or indicator in title_lower:
134
+ quality_analysis["warnings"].append(f"Low quality indicator detected: {indicator}")
135
+ authority_score *= 0.5 # Significantly reduce authority
136
+
137
+ # 9. Calculate comprehensive quality score
138
+ quality_analysis["quality_score"] = (
139
+ authority_score * 0.35 # Authority 35%
140
+ + relevance_score * 0.30 # Relevance 30%
141
+ + position_score * 0.20 # Position 20%
142
+ + freshness_score * 0.10 # Freshness 10%
143
+ + (0.05 if has_https else 0) # HTTPS 5%
144
+ )
145
+
146
+ # 10. Determine credibility level
147
+ if quality_analysis["quality_score"] > 0.75:
148
+ quality_analysis["credibility_level"] = CredibilityLevel.HIGH.value
149
+ elif quality_analysis["quality_score"] > 0.5:
150
+ quality_analysis["credibility_level"] = CredibilityLevel.MEDIUM.value
151
+ else:
152
+ quality_analysis["credibility_level"] = CredibilityLevel.LOW.value
153
+
154
+ return quality_analysis
155
+
156
+ def _calculate_authority_score(self, domain: str) -> float:
157
+ """Calculate domain authority score"""
158
+ # Exact match
159
+ if domain in self.AUTHORITATIVE_DOMAINS:
160
+ return self.AUTHORITATIVE_DOMAINS[domain]
161
+
162
+ # Suffix match
163
+ for auth_domain, score in self.AUTHORITATIVE_DOMAINS.items():
164
+ if domain.endswith(auth_domain):
165
+ return score
166
+
167
+ # Default medium authority
168
+ return 0.5
169
+
170
+ def _calculate_relevance_score(
171
+ self, query: str, title: str, snippet: str, position: int
172
+ ) -> float:
173
+ """Calculate relevance score based on query match"""
174
+ query_terms = set(query.lower().split())
175
+ title_lower = title.lower()
176
+ snippet_lower = snippet.lower()
177
+
178
+ # Title matching
179
+ title_matches = sum(1 for term in query_terms if term in title_lower)
180
+ title_score = title_matches / len(query_terms) if query_terms else 0
181
+
182
+ # Snippet matching
183
+ snippet_matches = sum(1 for term in query_terms if term in snippet_lower)
184
+ snippet_score = snippet_matches / len(query_terms) if query_terms else 0
185
+
186
+ # Position bonus (top 3 get extra credit)
187
+ position_bonus = 0.2 if position <= 3 else 0.1 if position <= 10 else 0
188
+
189
+ # Combined relevance
190
+ relevance = (
191
+ title_score * 0.6 # Title weighted higher
192
+ + snippet_score * 0.3 # Snippet secondary
193
+ + position_bonus # Position bonus
194
+ )
195
+
196
+ return min(1.0, relevance)
197
+
198
+ def _calculate_freshness_score(self, result: Dict[str, Any]) -> float:
199
+ """Calculate freshness score from publish date metadata"""
200
+ metadata = result.get("metadata", {})
201
+
202
+ # Look for date in various metadata fields
203
+ date_fields = ["metatags", "article", "newsarticle"]
204
+ publish_date = None
205
+
206
+ for field in date_fields:
207
+ if field in metadata:
208
+ items = metadata[field]
209
+ if isinstance(items, list) and items:
210
+ item = items[0]
211
+ # Common date field names
212
+ for date_key in [
213
+ "publishdate",
214
+ "datepublished",
215
+ "article:published_time",
216
+ ]:
217
+ if date_key in item:
218
+ publish_date = item[date_key]
219
+ break
220
+ if publish_date:
221
+ break
222
+
223
+ if not publish_date:
224
+ # No date info, return neutral score
225
+ return 0.5
226
+
227
+ try:
228
+ # Parse date
229
+ pub_dt = datetime.fromisoformat(publish_date.replace("Z", "+00:00"))
230
+ now = datetime.now(pub_dt.tzinfo)
231
+
232
+ days_old = (now - pub_dt).days
233
+
234
+ # Freshness scoring
235
+ if days_old < 7:
236
+ return 1.0 # < 1 week - very fresh
237
+ elif days_old < 30:
238
+ return 0.9 # < 1 month - fresh
239
+ elif days_old < 90:
240
+ return 0.7 # < 3 months - moderately fresh
241
+ elif days_old < 365:
242
+ return 0.5 # < 1 year - neutral
243
+ elif days_old < 730:
244
+ return 0.3 # < 2 years - dated
245
+ else:
246
+ return 0.1 # > 2 years - old
247
+ except Exception:
248
+ return 0.5
249
+
250
+ def rank_results(
251
+ self, results: List[Dict[str, Any]], ranking_strategy: str = "balanced"
252
+ ) -> List[Dict[str, Any]]:
253
+ """
254
+ Re-rank results by quality metrics.
255
+
256
+ Args:
257
+ results: List of results with quality analysis
258
+ ranking_strategy: Ranking strategy ('balanced', 'authority', 'relevance', 'freshness')
259
+
260
+ Returns:
261
+ Sorted list of results
262
+ """
263
+ if ranking_strategy == "authority":
264
+ return sorted(
265
+ results,
266
+ key=lambda x: x.get("_quality", {}).get("authority_score", 0),
267
+ reverse=True,
268
+ )
269
+ elif ranking_strategy == "relevance":
270
+ return sorted(
271
+ results,
272
+ key=lambda x: x.get("_quality", {}).get("relevance_score", 0),
273
+ reverse=True,
274
+ )
275
+ elif ranking_strategy == "freshness":
276
+ return sorted(
277
+ results,
278
+ key=lambda x: x.get("_quality", {}).get("freshness_score", 0),
279
+ reverse=True,
280
+ )
281
+ else: # balanced
282
+ return sorted(
283
+ results,
284
+ key=lambda x: x.get("_quality", {}).get("quality_score", 0),
285
+ reverse=True,
286
+ )
287
+
288
+
289
+ # ============================================================================
290
+ # Query Intent Analyzer
291
+ # ============================================================================
292
+
293
+
294
+ class QueryIntentAnalyzer:
295
+ """Analyzer for understanding query intent and optimizing queries"""
296
+
297
+ # Intent patterns with keywords and enhancements
298
+ INTENT_PATTERNS = {
299
+ QueryIntentType.DEFINITION.value: {
300
+ "keywords": ["what is", "define", "meaning of", "definition"],
301
+ "query_enhancement": 'definition OR meaning OR "what is"',
302
+ "suggested_params": {"num_results": 5},
303
+ },
304
+ QueryIntentType.HOW_TO.value: {
305
+ "keywords": [
306
+ "how to",
307
+ "how do i",
308
+ "tutorial",
309
+ "guide",
310
+ "steps to",
311
+ ],
312
+ "query_enhancement": 'tutorial OR guide OR "step by step"',
313
+ "suggested_params": {"num_results": 10},
314
+ },
315
+ QueryIntentType.COMPARISON.value: {
316
+ "keywords": [
317
+ "vs",
318
+ "versus",
319
+ "compare",
320
+ "difference between",
321
+ "better than",
322
+ ],
323
+ "query_enhancement": 'comparison OR versus OR "vs"',
324
+ "suggested_params": {"num_results": 10},
325
+ },
326
+ QueryIntentType.FACTUAL.value: {
327
+ "keywords": [
328
+ "when",
329
+ "where",
330
+ "who",
331
+ "which",
332
+ "statistics",
333
+ "data",
334
+ ],
335
+ "query_enhancement": "",
336
+ "suggested_params": {"num_results": 5},
337
+ },
338
+ QueryIntentType.RECENT_NEWS.value: {
339
+ "keywords": ["latest", "recent", "news", "today", "current"],
340
+ "query_enhancement": "news OR latest",
341
+ "suggested_params": {"date_restrict": "w1", "sort_by": "date"},
342
+ },
343
+ QueryIntentType.ACADEMIC.value: {
344
+ "keywords": ["research", "study", "paper", "journal", "academic"],
345
+ "query_enhancement": "research OR study OR paper",
346
+ "suggested_params": {"file_type": "pdf", "num_results": 10},
347
+ },
348
+ QueryIntentType.PRODUCT.value: {
349
+ "keywords": ["buy", "price", "review", "best", "top rated"],
350
+ "query_enhancement": "review OR comparison",
351
+ "suggested_params": {"num_results": 15},
352
+ },
353
+ }
354
+
355
+ def analyze_query_intent(self, query: str) -> Dict[str, Any]:
356
+ """
357
+ Analyze query to determine intent and generate enhancements.
358
+
359
+ Args:
360
+ query: Search query string
361
+
362
+ Returns:
363
+ Intent analysis with enhanced query and suggestions
364
+ """
365
+ query_lower = query.lower()
366
+
367
+ analysis = {
368
+ "original_query": query,
369
+ "intent_type": QueryIntentType.GENERAL.value,
370
+ "confidence": 0.0,
371
+ "enhanced_query": query,
372
+ "suggested_params": {},
373
+ "query_entities": [],
374
+ "query_modifiers": [],
375
+ "suggestions": [],
376
+ }
377
+
378
+ # Detect intent type
379
+ max_confidence = 0.0
380
+ detected_intent = QueryIntentType.GENERAL.value
381
+
382
+ for intent_type, intent_config in self.INTENT_PATTERNS.items():
383
+ keywords = intent_config["keywords"]
384
+ matches = sum(1 for kw in keywords if kw in query_lower)
385
+
386
+ if matches > 0:
387
+ confidence = min(1.0, matches / len(keywords) * 2)
388
+ if confidence > max_confidence:
389
+ max_confidence = confidence
390
+ detected_intent = intent_type
391
+
392
+ analysis["intent_type"] = detected_intent
393
+ analysis["confidence"] = max_confidence
394
+
395
+ # Enhance query if intent detected
396
+ if detected_intent != QueryIntentType.GENERAL.value:
397
+ intent_config = self.INTENT_PATTERNS[detected_intent]
398
+ enhancement = intent_config["query_enhancement"]
399
+
400
+ if enhancement:
401
+ analysis["enhanced_query"] = f"{query} {enhancement}"
402
+
403
+ analysis["suggested_params"] = intent_config["suggested_params"].copy()
404
+
405
+ # Extract entities and modifiers
406
+ analysis["query_entities"] = self._extract_entities(query)
407
+ analysis["query_modifiers"] = self._extract_modifiers(query)
408
+
409
+ # Generate suggestions
410
+ analysis["suggestions"] = self._generate_suggestions(query, detected_intent)
411
+
412
+ return analysis
413
+
414
+ def _extract_entities(self, query: str) -> List[str]:
415
+ """Extract potential entities from query (simplified)"""
416
+ words = query.split()
417
+ entities = []
418
+
419
+ for word in words:
420
+ # Simple rule: capitalized words might be entities
421
+ if word and word[0].isupper() and len(word) > 2:
422
+ entities.append(word)
423
+
424
+ return entities
425
+
426
+ def _extract_modifiers(self, query: str) -> List[str]:
427
+ """Extract query modifiers"""
428
+ modifiers = []
429
+ modifier_words = [
430
+ "best",
431
+ "top",
432
+ "latest",
433
+ "new",
434
+ "old",
435
+ "cheap",
436
+ "expensive",
437
+ "free",
438
+ "open source",
439
+ "commercial",
440
+ "beginner",
441
+ "advanced",
442
+ ]
443
+
444
+ query_lower = query.lower()
445
+ for modifier in modifier_words:
446
+ if modifier in query_lower:
447
+ modifiers.append(modifier)
448
+
449
+ return modifiers
450
+
451
+ def _generate_suggestions(self, query: str, intent_type: str) -> List[str]:
452
+ """Generate query optimization suggestions"""
453
+ suggestions = []
454
+
455
+ if intent_type == QueryIntentType.HOW_TO.value:
456
+ if "beginner" not in query.lower() and "advanced" not in query.lower():
457
+ suggestions.append('Consider adding "beginner" or "advanced" to target skill level')
458
+
459
+ elif intent_type == QueryIntentType.COMPARISON.value:
460
+ if " vs " not in query.lower():
461
+ suggestions.append('Use "vs" or "versus" for better comparison results')
462
+
463
+ elif intent_type == QueryIntentType.ACADEMIC.value:
464
+ if "pdf" not in query.lower():
465
+ suggestions.append('Consider adding "filetype:pdf" to find research papers')
466
+
467
+ elif intent_type == QueryIntentType.RECENT_NEWS.value:
468
+ suggestions.append("Results will be filtered to last week for freshness")
469
+
470
+ # General suggestions
471
+ if len(query.split()) < 3:
472
+ suggestions.append("Query is short - consider adding more specific terms")
473
+
474
+ if len(query.split()) > 10:
475
+ suggestions.append("Query is long - consider simplifying to key terms")
476
+
477
+ return suggestions
478
+
479
+
480
+ # ============================================================================
481
+ # Result Summarizer
482
+ # ============================================================================
483
+
484
+
485
+ class ResultSummarizer:
486
+ """Generator of structured summaries from search results"""
487
+
488
+ def generate_summary(self, results: List[Dict[str, Any]], query: str) -> Dict[str, Any]:
489
+ """
490
+ Generate comprehensive summary of search results.
491
+
492
+ Args:
493
+ results: List of search results with quality analysis
494
+ query: Original search query
495
+
496
+ Returns:
497
+ Summary dictionary with statistics and recommendations
498
+ """
499
+ summary = {
500
+ "query": query,
501
+ "total_results": len(results),
502
+ "quality_distribution": {"high": 0, "medium": 0, "low": 0},
503
+ "top_domains": [],
504
+ "content_types": {},
505
+ "freshness_distribution": {
506
+ "very_fresh": 0,
507
+ "fresh": 0,
508
+ "moderate": 0,
509
+ "old": 0,
510
+ },
511
+ "recommended_results": [],
512
+ "warnings": [],
513
+ "suggestions": [],
514
+ }
515
+
516
+ if not results:
517
+ summary["warnings"].append("No results found")
518
+ return summary
519
+
520
+ # Gather statistics
521
+ domain_stats = {}
522
+
523
+ for result in results:
524
+ quality = result.get("_quality", {})
525
+ quality_level = quality.get("credibility_level", "medium")
526
+ summary["quality_distribution"][quality_level] += 1
527
+
528
+ # Domain statistics
529
+ domain = result.get("displayLink", "unknown")
530
+ if domain not in domain_stats:
531
+ domain_stats[domain] = {"count": 0, "total_quality": 0.0}
532
+ domain_stats[domain]["count"] += 1
533
+ domain_stats[domain]["total_quality"] += quality.get("quality_score", 0.5)
534
+
535
+ # Freshness distribution
536
+ freshness = quality.get("freshness_score", 0.5)
537
+ if freshness > 0.9:
538
+ summary["freshness_distribution"]["very_fresh"] += 1
539
+ elif freshness > 0.7:
540
+ summary["freshness_distribution"]["fresh"] += 1
541
+ elif freshness > 0.5:
542
+ summary["freshness_distribution"]["moderate"] += 1
543
+ else:
544
+ summary["freshness_distribution"]["old"] += 1
545
+
546
+ # Top domains
547
+ top_domains = []
548
+ for domain, stats in domain_stats.items():
549
+ avg_quality = stats["total_quality"] / stats["count"]
550
+ top_domains.append(
551
+ {
552
+ "domain": domain,
553
+ "count": stats["count"],
554
+ "avg_quality": avg_quality,
555
+ }
556
+ )
557
+
558
+ summary["top_domains"] = sorted(
559
+ top_domains,
560
+ key=lambda x: (x["count"], x["avg_quality"]),
561
+ reverse=True,
562
+ )[:5]
563
+
564
+ # Recommended results (top 3 by quality)
565
+ sorted_results = sorted(
566
+ results,
567
+ key=lambda x: x.get("_quality", {}).get("quality_score", 0),
568
+ reverse=True,
569
+ )
570
+ summary["recommended_results"] = sorted_results[:3]
571
+
572
+ # Generate warnings
573
+ if summary["quality_distribution"]["low"] > 0:
574
+ summary["warnings"].append(
575
+ f"{summary['quality_distribution']['low']} low quality result(s) detected"
576
+ )
577
+
578
+ https_count = sum(1 for r in results if r.get("link", "").startswith("https://"))
579
+ if https_count < len(results):
580
+ summary["warnings"].append(f"{len(results) - https_count} result(s) lack HTTPS")
581
+
582
+ # Generate suggestions
583
+ if summary["freshness_distribution"]["old"] > len(results) / 2:
584
+ summary["suggestions"].append("Many results are outdated - consider adding date filter")
585
+
586
+ if summary["quality_distribution"]["high"] < 3:
587
+ summary["suggestions"].append("Few high-quality results - try refining your query")
588
+
589
+ return summary