realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,398 @@
1
+ """
2
+ Answer Consolidation - Generate answers from raw search results
3
+
4
+ Supports two strategies:
5
+ 1. template: Fast Jinja2 template rendering
6
+ 2. llm: LLM-based answer synthesis (uses project's LLM config from env vars)
7
+ """
8
+
9
+ from typing import Any
10
+
11
+ from jinja2 import BaseLoader, Environment
12
+
13
+ from src.logging import get_logger
14
+ from src.services.llm import get_llm_client
15
+
16
+ from .types import WebSearchResponse
17
+
18
+ # Module logger
19
+ _logger = get_logger("Search.Consolidation", level="INFO")
20
+
21
+
22
+ # Available consolidation types
23
+ CONSOLIDATION_TYPES = ["none", "template", "llm"]
24
+
25
+
26
+ # =============================================================================
27
+ # PROVIDER-SPECIFIC TEMPLATES
28
+ # =============================================================================
29
+ # Only providers that return raw SERP results (supports_answer=False) need templates.
30
+ # AI providers (Perplexity, Tavily, Baidu, Exa) already generate answers.
31
+ PROVIDER_TEMPLATES = {
32
+ # -------------------------------------------------------------------------
33
+ # SERPER TEMPLATE
34
+ # -------------------------------------------------------------------------
35
+ "serper": """{% if knowledge_graph %}
36
+ ## {{ knowledge_graph.title }}{% if knowledge_graph.type %} ({{ knowledge_graph.type }}){% endif %}
37
+
38
+ {{ knowledge_graph.description }}
39
+ {% if knowledge_graph.attributes %}
40
+ {% for key, value in knowledge_graph.attributes.items() %}
41
+ - **{{ key }}**: {{ value }}
42
+ {% endfor %}
43
+ {% endif %}
44
+ {% if knowledge_graph.website %}🔗 [{{ knowledge_graph.website }}]({{ knowledge_graph.website }}){% endif %}
45
+
46
+ ---
47
+ {% endif %}
48
+ {% if answer_box %}
49
+ ### Direct Answer
50
+ {{ answer_box.answer or answer_box.snippet }}
51
+ {% if answer_box.title %}*Source: [{{ answer_box.title }}]({{ answer_box.link }})*{% endif %}
52
+
53
+ ---
54
+ {% endif %}
55
+ ### Search Results for "{{ query }}"
56
+
57
+ {% for result in results[:max_results] %}
58
+ **[{{ loop.index }}] {{ result.title }}**
59
+ {{ result.snippet }}
60
+ {% if result.date %}📅 {{ result.date }}{% endif %}
61
+ 🔗 {{ result.url }}
62
+ {% if result.sitelinks %}
63
+ └ Related: {% for link in result.sitelinks[:3] %}[{{ link.title }}]({{ link.link }}){% if not loop.last %} | {% endif %}{% endfor %}
64
+ {% endif %}
65
+
66
+ {% endfor %}
67
+ {% if people_also_ask %}
68
+ ---
69
+ ### People Also Ask
70
+ {% for qa in people_also_ask[:3] %}
71
+ **Q: {{ qa.question }}**
72
+ {{ qa.snippet }}
73
+ *[{{ qa.title }}]({{ qa.link }})*
74
+
75
+ {% endfor %}
76
+ {% endif %}
77
+ {% if related_searches %}
78
+ ---
79
+ *Related searches: {% for rs in related_searches[:5] %}{{ rs.query }}{% if not loop.last %}, {% endif %}{% endfor %}*
80
+ {% endif %}""",
81
+ # -------------------------------------------------------------------------
82
+ # JINA TEMPLATE
83
+ # -------------------------------------------------------------------------
84
+ "jina": """### Search Results for "{{ query }}"
85
+
86
+ {% for result in results[:max_results] %}
87
+ ---
88
+ ## [{{ loop.index }}] {{ result.title }}
89
+
90
+ {% if result.attributes.date %}📅 *{{ result.attributes.date }}*{% endif %}
91
+
92
+ {% if result.content %}
93
+ {% if result.snippet %}*{{ result.snippet }}*{% endif %}
94
+
95
+ ### Content Preview
96
+ {{ result.content[:2000] }}{% if result.content|length > 2000 %}
97
+
98
+ *[Content truncated - {{ result.attributes.tokens|default('many') }} tokens total]*{% endif %}
99
+ {% else %}
100
+ *{{ result.snippet }}*
101
+ {% endif %}
102
+
103
+ 🔗 [{{ result.url }}]({{ result.url }})
104
+
105
+ {% endfor %}
106
+ ---
107
+ *{{ results|length }} results via Jina Reader{% if results and results|length > 0 and not results[0].content %} (no-content mode){% endif %}*
108
+
109
+ {% if links %}
110
+ ### Extracted Links
111
+ {% for name, url in links.items()[:10] %}
112
+ - [{{ name }}]({{ url }})
113
+ {% endfor %}
114
+ {% endif %}
115
+ {% if images %}
116
+ ### Images Found
117
+ {% for alt, src in images.items()[:5] %}
118
+ - ![{{ alt }}]({{ src }})
119
+ {% endfor %}
120
+ {% endif %}""",
121
+ # -------------------------------------------------------------------------
122
+ # SERPER SCHOLAR TEMPLATE
123
+ # -------------------------------------------------------------------------
124
+ "serper_scholar": """### Academic Results for "{{ query }}"
125
+
126
+ {% for result in results[:max_results] %}
127
+ **[{{ loop.index }}] {{ result.title }}**{% if result.attributes.year %} ({{ result.attributes.year }}){% endif %}
128
+
129
+ {% if result.attributes.publicationInfo %}*{{ result.attributes.publicationInfo }}*{% endif %}
130
+
131
+ {{ result.snippet }}
132
+
133
+ {% if result.attributes.pdfUrl %}📄 [PDF]({{ result.attributes.pdfUrl }}) | {% endif %}🔗 [Link]({{ result.url }})
134
+ {% if result.attributes.citedBy %}📚 Cited by: {{ result.attributes.citedBy }}{% endif %}
135
+
136
+ {% endfor %}
137
+ ---
138
+ *{{ results|length }} academic papers found via Google Scholar*""",
139
+ }
140
+
141
+
142
+ class AnswerConsolidator:
143
+ """
144
+ Consolidate raw SERP results into formatted answers.
145
+
146
+ IMPORTANT: Template consolidation only works for providers that have
147
+ specific templates defined (serper, jina, serper_scholar).
148
+
149
+ For other providers, use:
150
+ - consolidation_type="llm" for LLM-based synthesis
151
+ - custom_template for a custom Jinja2 template
152
+ """
153
+
154
+ # Map provider names to their specific templates
155
+ # Only these providers support template consolidation
156
+ PROVIDER_TEMPLATE_MAP = {
157
+ "serper": "serper",
158
+ "jina": "jina",
159
+ "serper_scholar": "serper_scholar",
160
+ }
161
+
162
+ def __init__(
163
+ self,
164
+ consolidation_type: str = "template",
165
+ custom_template: str | None = None,
166
+ llm_config: dict[str, Any] | None = None,
167
+ max_results: int = 5,
168
+ autoescape: bool = True,
169
+ ):
170
+ """
171
+ Initialize consolidator.
172
+
173
+ Args:
174
+ consolidation_type: "none", "template", or "llm"
175
+ custom_template: Custom Jinja2 template string
176
+ llm_config: Optional overrides (system_prompt, max_tokens, temperature)
177
+ max_results: Maximum results to include in answer
178
+ autoescape: Whether to enable Jinja2 autoescape for security (default: True)
179
+ """
180
+ self.consolidation_type = consolidation_type
181
+ self.custom_template = custom_template
182
+ self.llm_config = llm_config or {}
183
+ self.max_results = max_results
184
+ # Security: autoescape defaults to True (set in function signature above).
185
+ # When True, Jinja2 auto-escapes HTML to prevent XSS.
186
+ self.jinja_env = Environment(loader=BaseLoader(), autoescape=autoescape) # nosec B701
187
+
188
+ if self.custom_template is not None and autoescape:
189
+ _logger.warning(
190
+ "Custom Jinja2 templates are rendered with autoescape=True. "
191
+ "HTML in rendered variables will be escaped by default; use the "
192
+ "'safe' filter in your template if you intentionally need raw HTML."
193
+ )
194
+
195
+ def consolidate(self, response: WebSearchResponse) -> WebSearchResponse:
196
+ """
197
+ Consolidate search results into an answer.
198
+
199
+ Args:
200
+ response: WebSearchResponse with search_results populated
201
+
202
+ Returns:
203
+ WebSearchResponse with answer field populated
204
+ """
205
+ if self.consolidation_type == "none":
206
+ _logger.debug("Consolidation disabled, returning raw response")
207
+ return response
208
+
209
+ results_count = len(response.search_results)
210
+ _logger.info(
211
+ f"Consolidating {results_count} results from {response.provider} using {self.consolidation_type}"
212
+ )
213
+
214
+ if self.consolidation_type == "template":
215
+ response.answer = self._consolidate_with_template(response)
216
+ _logger.success(f"Template consolidation completed ({len(response.answer)} chars)")
217
+ elif self.consolidation_type == "llm":
218
+ response.answer = self._consolidate_with_llm(response)
219
+ _logger.success(f"LLM consolidation completed ({len(response.answer)} chars)")
220
+ else:
221
+ _logger.error(f"Unknown consolidation type: {self.consolidation_type}")
222
+ raise ValueError(f"Unknown consolidation type: {self.consolidation_type}")
223
+
224
+ return response
225
+
226
+ def _get_template_for_provider(self, provider: str) -> str:
227
+ """
228
+ Get the template for a specific provider.
229
+
230
+ Only provider-specific templates exist because each provider has
231
+ different response schemas and metadata. No universal templates.
232
+
233
+ Args:
234
+ provider: Provider name (e.g., "serper", "jina")
235
+
236
+ Returns:
237
+ Template string for this provider
238
+
239
+ Raises:
240
+ ValueError: If no template exists for this provider
241
+ """
242
+ # 1. Custom template takes highest priority
243
+ if self.custom_template:
244
+ _logger.debug(f"Using custom template ({len(self.custom_template)} chars)")
245
+ return self.custom_template
246
+
247
+ # 2. Get provider-specific template
248
+ template_key = self.PROVIDER_TEMPLATE_MAP.get(provider.lower())
249
+ if template_key and template_key in PROVIDER_TEMPLATES:
250
+ _logger.debug(f"Using provider-specific template: {template_key}")
251
+ return PROVIDER_TEMPLATES[template_key]
252
+
253
+ # 3. No template exists for this provider - fail explicitly
254
+ available = list(PROVIDER_TEMPLATES.keys())
255
+ _logger.error(f"No template for provider '{provider}'. Available: {available}")
256
+ raise ValueError(
257
+ f"No template consolidation available for provider '{provider}'. "
258
+ f"Template consolidation only works with: {available}. "
259
+ f"Use consolidation='llm' or provide a custom_template for other providers."
260
+ )
261
+
262
+ def _build_provider_context(self, response: WebSearchResponse) -> dict[str, Any]:
263
+ """
264
+ Build template context with provider-specific fields.
265
+
266
+ Each provider has unique response fields that we extract from metadata.
267
+ """
268
+ # Base context (common to all providers)
269
+ context: dict[str, Any] = {
270
+ "query": response.query,
271
+ "provider": response.provider,
272
+ "model": response.model,
273
+ "max_results": self.max_results,
274
+ "results": [
275
+ {
276
+ "title": r.title,
277
+ "url": r.url,
278
+ "snippet": r.snippet,
279
+ "date": r.date,
280
+ "source": r.source,
281
+ "content": r.content,
282
+ "sitelinks": r.sitelinks,
283
+ "attributes": r.attributes,
284
+ }
285
+ for r in response.search_results
286
+ ],
287
+ "citations": [
288
+ {
289
+ "id": c.id,
290
+ "reference": c.reference,
291
+ "url": c.url,
292
+ "title": c.title,
293
+ "snippet": c.snippet,
294
+ }
295
+ for c in response.citations
296
+ ],
297
+ "timestamp": response.timestamp,
298
+ }
299
+
300
+ # Extract provider-specific metadata
301
+ metadata = response.metadata or {}
302
+ provider_lower = response.provider.lower()
303
+
304
+ # -----------------------------------------------------------------
305
+ # SERPER-specific context
306
+ # -----------------------------------------------------------------
307
+ if provider_lower == "serper":
308
+ context["knowledge_graph"] = metadata.get("knowledgeGraph")
309
+ context["answer_box"] = metadata.get("answerBox")
310
+ context["people_also_ask"] = metadata.get("peopleAlsoAsk")
311
+ context["related_searches"] = metadata.get("relatedSearches")
312
+
313
+ # -----------------------------------------------------------------
314
+ # JINA-specific context
315
+ # -----------------------------------------------------------------
316
+ elif provider_lower == "jina":
317
+ context["links"] = metadata.get("links", {})
318
+ context["images"] = metadata.get("images", {})
319
+
320
+ return context
321
+
322
+ def _consolidate_with_template(self, response: WebSearchResponse) -> str:
323
+ """Render results using Jinja2 template"""
324
+ _logger.debug(f"Building template context for {response.provider}")
325
+
326
+ # Get template (auto-detect provider-specific if not explicitly set)
327
+ template_str = self._get_template_for_provider(response.provider)
328
+ template = self.jinja_env.from_string(template_str)
329
+
330
+ # Build context with provider-specific fields
331
+ context = self._build_provider_context(response)
332
+ _logger.debug(
333
+ f"Context has {len(context.get('results', []))} results, {len(context.get('citations', []))} citations"
334
+ )
335
+
336
+ try:
337
+ rendered = template.render(**context)
338
+ _logger.debug("Template rendered successfully")
339
+ return rendered
340
+ except Exception as e:
341
+ _logger.error(f"Template rendering failed: {e}")
342
+ raise
343
+
344
+ def _consolidate_with_llm(self, response: WebSearchResponse) -> str:
345
+ """Generate answer using LLM."""
346
+ system_prompt, user_prompt = self._build_prompts(response)
347
+
348
+ llm = get_llm_client()
349
+ max_tokens = self.llm_config.get("max_tokens", 1000)
350
+ temperature = self.llm_config.get("temperature", 0.3)
351
+
352
+ return llm.complete_sync(
353
+ prompt=user_prompt,
354
+ system_prompt=system_prompt,
355
+ max_tokens=max_tokens,
356
+ temperature=temperature,
357
+ )
358
+
359
+ def _build_prompts(self, response: WebSearchResponse) -> tuple[str, str]:
360
+ """Build system and user prompts for LLM consolidation."""
361
+ results_text = []
362
+ for i, r in enumerate(response.search_results[: self.max_results], 1):
363
+ text = f"[{i}] {r.title}\nURL: {r.url}\n"
364
+ if r.snippet:
365
+ text += f"{r.snippet}\n"
366
+ if r.content:
367
+ text += f"{r.content[:5000]}{'...' if len(r.content) > 5000 else ''}"
368
+ results_text.append(text)
369
+
370
+ system_prompt = self.llm_config.get(
371
+ "system_prompt",
372
+ """You are a search result consolidator. Your output will be used as grounding context for another LLM.
373
+
374
+ Task: Extract and structure relevant information from web search results.
375
+
376
+ Output format:
377
+ - Start with a brief factual summary (2-3 sentences)
378
+ - List key facts as bullet points with citation numbers [1], [2], etc.
379
+ - Include specific data: numbers, dates, names, definitions
380
+ - Note any conflicting information between sources
381
+ - End with a "Sources:" section listing [n] URL pairs
382
+
383
+ Be factual and dense. Omit filler words. Prioritize information diversity.""",
384
+ )
385
+
386
+ user_prompt = f"""Query: {response.query}
387
+
388
+ Search Results:
389
+ ---
390
+ {chr(10).join(results_text)}
391
+ ---
392
+
393
+ Consolidate these results into structured grounding context."""
394
+
395
+ return system_prompt, user_prompt
396
+
397
+
398
+ __all__ = ["AnswerConsolidator", "CONSOLIDATION_TYPES", "PROVIDER_TEMPLATES"]
@@ -0,0 +1,128 @@
1
+ """
2
+ Web Search Provider Registry
3
+
4
+ This module manages the registration and retrieval of search providers.
5
+ """
6
+
7
+ import os
8
+ from typing import Type
9
+
10
+ from ..base import BaseSearchProvider
11
+
12
+ _PROVIDERS: dict[str, Type[BaseSearchProvider]] = {}
13
+
14
+
15
+ def register_provider(name: str):
16
+ """
17
+ Decorator to register a provider.
18
+
19
+ Args:
20
+ name: Name to register the provider under.
21
+
22
+ Returns:
23
+ Decorator function.
24
+ """
25
+
26
+ def decorator(cls: Type[BaseSearchProvider]):
27
+ _PROVIDERS[name.lower()] = cls
28
+ cls.name = name.lower()
29
+ return cls
30
+
31
+ return decorator
32
+
33
+
34
+ def get_provider(name: str, **kwargs) -> BaseSearchProvider:
35
+ """
36
+ Get a provider instance by name.
37
+
38
+ Args:
39
+ name: Provider name (case-insensitive).
40
+ **kwargs: Arguments to pass to provider constructor.
41
+
42
+ Returns:
43
+ BaseSearchProvider: Provider instance.
44
+
45
+ Raises:
46
+ ValueError: If provider is not found.
47
+ """
48
+ name = name.lower()
49
+ if name not in _PROVIDERS:
50
+ available = ", ".join(sorted(_PROVIDERS.keys()))
51
+ raise ValueError(f"Unknown provider: {name}. Available: {available}")
52
+ return _PROVIDERS[name](**kwargs)
53
+
54
+
55
+ def list_providers() -> list[str]:
56
+ """
57
+ List all registered providers.
58
+
59
+ Returns:
60
+ list[str]: Sorted list of provider names.
61
+ """
62
+ return sorted(_PROVIDERS.keys())
63
+
64
+
65
+ def get_available_providers() -> list[str]:
66
+ """
67
+ List providers that are currently available (have API keys set).
68
+
69
+ Returns:
70
+ list[str]: Sorted list of available provider names.
71
+ """
72
+ available = []
73
+ for name, cls in _PROVIDERS.items():
74
+ try:
75
+ instance = cls()
76
+ if instance.is_available():
77
+ available.append(name)
78
+ except Exception:
79
+ pass
80
+ return sorted(available)
81
+
82
+
83
+ def get_providers_info() -> list[dict]:
84
+ """
85
+ Get full provider info from class attributes for frontend display.
86
+
87
+ Returns:
88
+ list[dict]: List of provider info dicts with id, name, description, supports_answer
89
+ """
90
+ providers_info = []
91
+ for provider_id, cls in sorted(_PROVIDERS.items()):
92
+ providers_info.append(
93
+ {
94
+ "id": provider_id,
95
+ "name": cls.display_name,
96
+ "description": cls.description,
97
+ "supports_answer": cls.supports_answer,
98
+ "requires_api_key": cls.requires_api_key,
99
+ }
100
+ )
101
+ return providers_info
102
+
103
+
104
+ def get_default_provider(**kwargs) -> BaseSearchProvider:
105
+ """
106
+ Get the default provider based on SEARCH_PROVIDER env var.
107
+
108
+ Args:
109
+ **kwargs: Arguments to pass to provider constructor.
110
+
111
+ Returns:
112
+ BaseSearchProvider: Default provider instance.
113
+ """
114
+ provider_name = os.environ.get("SEARCH_PROVIDER", "perplexity").lower()
115
+ return get_provider(provider_name, **kwargs)
116
+
117
+
118
+ # Auto-import all providers to trigger registration
119
+ from . import baidu, exa, jina, perplexity, serper, tavily
120
+
121
+ __all__ = [
122
+ "register_provider",
123
+ "get_provider",
124
+ "list_providers",
125
+ "get_available_providers",
126
+ "get_providers_info",
127
+ "get_default_provider",
128
+ ]