realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,714 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ ResearchAgent - Research Agent
5
+ Responsible for executing research logic and tool call decisions
6
+ """
7
+
8
+ from collections.abc import Awaitable, Callable
9
+ from pathlib import Path
10
+ import re
11
+ from string import Template
12
+ import sys
13
+ from typing import Any
14
+
15
+ project_root = Path(__file__).parent.parent.parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.agents.base_agent import BaseAgent
19
+ from src.agents.research.data_structures import DynamicTopicQueue, TopicBlock
20
+
21
+ from ..utils.json_utils import extract_json_from_text
22
+
23
+
24
+ class ResearchAgent(BaseAgent):
25
+ """Research Agent"""
26
+
27
+ def __init__(
28
+ self,
29
+ config: dict[str, Any],
30
+ api_key: str | None = None,
31
+ base_url: str | None = None,
32
+ api_version: str | None = None,
33
+ ):
34
+ language = config.get("system", {}).get("language", "zh")
35
+ super().__init__(
36
+ module_name="research",
37
+ agent_name="research_agent",
38
+ api_key=api_key,
39
+ base_url=base_url,
40
+ api_version=api_version,
41
+ language=language,
42
+ config=config,
43
+ )
44
+ self.researching_config = config.get("researching", {})
45
+ self.max_iterations = self.researching_config.get("max_iterations", 5)
46
+ # Iteration mode: "fixed" (must explore all iterations) or "flexible" (can stop early)
47
+ # In "fixed" mode, agent should be more conservative about declaring knowledge sufficient
48
+ # In "flexible" mode (auto), agent can stop early when knowledge is truly sufficient
49
+ self.iteration_mode = self.researching_config.get("iteration_mode", "fixed")
50
+ # Tool availability configuration
51
+ self.enable_rag = self.researching_config.get(
52
+ "enable_rag_hybrid", True
53
+ ) or self.researching_config.get("enable_rag_naive", True)
54
+ # Web search: global switch (tools.web_search.enabled) has higher priority
55
+ # Only enabled when both global switch and module switch are True
56
+ tools_web_search_enabled = (
57
+ config.get("tools", {}).get("web_search", {}).get("enabled", True)
58
+ )
59
+ research_web_search_enabled = self.researching_config.get("enable_web_search", False)
60
+ self.enable_web_search = tools_web_search_enabled and research_web_search_enabled
61
+ self.enable_paper_search = self.researching_config.get("enable_paper_search", False)
62
+ self.enable_run_code = self.researching_config.get("enable_run_code", True)
63
+ # Store enabled tools list for prompt generation
64
+ self.enabled_tools = self.researching_config.get("enabled_tools", ["RAG"])
65
+
66
+ @staticmethod
67
+ def _convert_to_template_format(template_str: str) -> str:
68
+ """
69
+ Convert {var} style placeholders to $var style for string.Template.
70
+ This avoids conflicts with LaTeX braces like {\rho}.
71
+ """
72
+ # Only convert simple {var_name} patterns, not nested or complex ones
73
+ return re.sub(r"\{(\w+)\}", r"$\1", template_str)
74
+
75
+ def _safe_format(self, template_str: str, **kwargs) -> str:
76
+ """
77
+ Safe string formatting using string.Template to avoid LaTeX brace conflicts.
78
+ """
79
+ converted = self._convert_to_template_format(template_str)
80
+ return Template(converted).safe_substitute(**kwargs)
81
+
82
+ def _generate_available_tools_text(self) -> str:
83
+ """
84
+ Generate available tools list based on enabled_tools configuration
85
+
86
+ Returns:
87
+ Available tools text for prompt
88
+ """
89
+ tools = []
90
+ if self.enable_rag:
91
+ tools.append(
92
+ "- rag_hybrid: Hybrid RAG retrieval (knowledge base) | Query format: Natural language"
93
+ )
94
+ tools.append(
95
+ "- rag_naive: Basic RAG retrieval (knowledge base) | Query format: Natural language"
96
+ )
97
+ tools.append(
98
+ "- query_item: Entity/item query (e.g., Theorem 3.1, Fig 2.1) | Query format: Entry number"
99
+ )
100
+ if self.enable_paper_search:
101
+ tools.append(
102
+ "- paper_search: Academic paper search | Query format: 3-5 English keywords, space-separated"
103
+ )
104
+ if self.enable_web_search:
105
+ tools.append(
106
+ "- web_search: Web search for latest information | Query format: Natural language"
107
+ )
108
+ if self.enable_run_code:
109
+ tools.append(
110
+ "- run_code: Code execution for calculation/visualization | Query format: Python code"
111
+ )
112
+
113
+ if not tools:
114
+ tools.append(
115
+ "- rag_hybrid: Hybrid RAG retrieval (default) | Query format: Natural language"
116
+ )
117
+
118
+ return "\n".join(tools)
119
+
120
+ def _generate_tool_phase_guidance(self) -> str:
121
+ """
122
+ Generate phased tool selection guidance based on enabled tools.
123
+ Only includes guidance for tools that are actually enabled.
124
+
125
+ Returns:
126
+ Tool phase guidance text for prompt
127
+ """
128
+ # Determine which tool categories are enabled
129
+ has_rag = self.enable_rag
130
+ has_paper = self.enable_paper_search
131
+ has_web = self.enable_web_search
132
+ has_code = self.enable_run_code
133
+
134
+ # Build phase guidance dynamically based on enabled tools
135
+ guidance_parts = []
136
+
137
+ # Phase 1: Basic exploration (always includes RAG if enabled)
138
+ phase1_tools = []
139
+ if has_rag:
140
+ phase1_tools.append(
141
+ "- `rag_hybrid`: Get comprehensive information, core concepts, mechanism principles"
142
+ )
143
+ phase1_tools.append("- `rag_naive`: Query specific definitions, precise formulas")
144
+ phase1_tools.append(
145
+ "- `query_item`: Get content with specific entry numbers (if known)"
146
+ )
147
+
148
+ if phase1_tools:
149
+ guidance_parts.append(f"""**Phase 1: Basic Exploration (early iterations)**
150
+ Focus on building foundational knowledge:
151
+ {chr(10).join(phase1_tools)}""")
152
+
153
+ # Phase 2: Deep mining (introduce external tools if enabled)
154
+ phase2_tools = []
155
+ if has_rag:
156
+ phase2_tools.append(
157
+ "- Continue using `rag_hybrid` to explore different angles (applications, relationships, comparisons)"
158
+ )
159
+ if has_paper:
160
+ phase2_tools.append(
161
+ "- `paper_search`: Get cutting-edge academic research (if topic involves academic fields)"
162
+ )
163
+ if has_web:
164
+ phase2_tools.append("- `web_search`: Get practical application cases, industry trends")
165
+
166
+ if phase2_tools:
167
+ guidance_parts.append(f"""**Phase 2: Deep Mining (middle iterations)**
168
+ Deep dive and expand knowledge:
169
+ {chr(10).join(phase2_tools)}""")
170
+
171
+ # Phase 3: Completion (all available external tools)
172
+ phase3_tools = []
173
+ if has_paper:
174
+ phase3_tools.append(
175
+ "- `paper_search`: Cutting-edge research, specific methods, experimental results"
176
+ )
177
+ if has_web:
178
+ phase3_tools.append(
179
+ "- `web_search`: Latest developments, practical cases, industry applications"
180
+ )
181
+ if has_code:
182
+ phase3_tools.append(
183
+ "- `run_code`: Algorithm verification, numerical calculation, visualization"
184
+ )
185
+
186
+ if phase3_tools:
187
+ guidance_parts.append(f"""**Phase 3: Completion and Supplement (late iterations)**
188
+ Fill gaps and expand horizons:
189
+ {chr(10).join(phase3_tools)}""")
190
+
191
+ # If no external tools enabled, add a note
192
+ if not has_paper and not has_web:
193
+ guidance_parts.append("""**Note**: Only knowledge base tools (RAG) are available.
194
+ Focus on thoroughly exploring the knowledge base from multiple angles.""")
195
+
196
+ return "\n\n".join(guidance_parts)
197
+
198
+ def _generate_research_depth_guidance(self, iteration: int, used_tools: list[str]) -> str:
199
+ """
200
+ Generate research depth guidance based on iteration, used tools, and iteration_mode
201
+
202
+ Args:
203
+ iteration: Current iteration number
204
+ used_tools: List of tools already used
205
+
206
+ Returns:
207
+ Research depth guidance text
208
+ """
209
+ # Determine research phase based on max_iterations
210
+ early_threshold = max(2, self.max_iterations // 3)
211
+ middle_threshold = max(4, self.max_iterations * 2 // 3)
212
+
213
+ if iteration <= early_threshold:
214
+ phase = "early"
215
+ phase_desc = f"Early Stage (Iteration 1-{early_threshold})"
216
+ guidance = "Focus on building foundational knowledge using RAG/knowledge base tools."
217
+ elif iteration <= middle_threshold:
218
+ phase = "middle"
219
+ phase_desc = f"Middle Stage (Iteration {early_threshold + 1}-{middle_threshold})"
220
+ if self.enable_paper_search or self.enable_web_search:
221
+ guidance = "Consider using Paper/Web search to add academic depth and real-time information."
222
+ else:
223
+ guidance = "Deepen knowledge coverage, explore different angles of the topic."
224
+ else:
225
+ phase = "late"
226
+ phase_desc = f"Late Stage (Iteration {middle_threshold + 1}+)"
227
+ guidance = "Fill knowledge gaps, ensure completeness before concluding."
228
+
229
+ # Tool diversity analysis
230
+ unique_tools = set(used_tools)
231
+ available_tools = []
232
+ if self.enable_rag and not any(
233
+ t in unique_tools for t in ["rag_hybrid", "rag_naive", "query_item"]
234
+ ):
235
+ available_tools.append("RAG tools (rag_hybrid/rag_naive/query_item)")
236
+ if self.enable_paper_search and "paper_search" not in unique_tools:
237
+ available_tools.append("paper_search")
238
+ if self.enable_web_search and "web_search" not in unique_tools:
239
+ available_tools.append("web_search")
240
+
241
+ diversity_hint = ""
242
+ if available_tools and phase != "early":
243
+ diversity_hint = f"\n**Tool Diversity Suggestion**: Consider using unexplored tools: {', '.join(available_tools)}"
244
+
245
+ # Iteration mode specific guidance
246
+ if self.iteration_mode == "flexible":
247
+ # Auto/flexible mode: agent can decide when to stop
248
+ mode_guidance = """
249
+ **Iteration Mode: FLEXIBLE (Auto)**
250
+ You have autonomy to decide when knowledge is sufficient. You may stop early if:
251
+ - Core concepts are well covered from multiple angles
252
+ - Key questions about the topic have been addressed
253
+ - Further iterations would only add marginal value
254
+ However, ensure you have made meaningful exploration before concluding."""
255
+ else:
256
+ # Fixed mode: more conservative about stopping
257
+ mode_guidance = """
258
+ **Iteration Mode: FIXED**
259
+ This mode requires thorough exploration. Be CONSERVATIVE about declaring knowledge sufficient:
260
+ - In early iterations (first third), rarely conclude sufficiency
261
+ - In middle iterations, require strong evidence of comprehensive coverage
262
+ - Only in late iterations, conclude if truly comprehensive"""
263
+
264
+ return f"""
265
+ **Research Phase Guidance** ({phase_desc}):
266
+ {guidance}
267
+
268
+ Current iteration: {iteration}/{self.max_iterations}
269
+ Tools already used: {", ".join(used_tools) if used_tools else "None"}
270
+ {diversity_hint}
271
+ {mode_guidance}
272
+ """
273
+
274
+ def _generate_online_search_instruction(self) -> str:
275
+ """
276
+ Generate online search guidance instructions from YAML config
277
+
278
+ Returns:
279
+ Online search guidance text, returns empty string if not enabled
280
+ """
281
+ if not self.enable_web_search and not self.enable_paper_search:
282
+ return ""
283
+
284
+ if self.enable_web_search and self.enable_paper_search:
285
+ instruction = self.get_prompt("guidance", "online_search_both")
286
+ if instruction:
287
+ return instruction
288
+ elif self.enable_web_search:
289
+ instruction = self.get_prompt("guidance", "online_search_web_only")
290
+ if instruction:
291
+ return instruction
292
+ elif self.enable_paper_search:
293
+ instruction = self.get_prompt("guidance", "online_search_paper_only")
294
+ if instruction:
295
+ return instruction
296
+
297
+ return ""
298
+
299
+ def _generate_iteration_mode_criteria(self, iteration: int) -> str:
300
+ """
301
+ Generate iteration mode specific criteria for sufficiency check from YAML config
302
+
303
+ Args:
304
+ iteration: Current iteration number
305
+
306
+ Returns:
307
+ Iteration mode criteria text
308
+ """
309
+ # Calculate early threshold
310
+ early_threshold = max(2, self.max_iterations // 3)
311
+
312
+ if self.iteration_mode == "flexible":
313
+ criteria = self.get_prompt("guidance", "iteration_mode_flexible")
314
+ if criteria:
315
+ return criteria
316
+ # Fallback if YAML not configured
317
+ return "- **FLEXIBLE mode (Auto)**: You have autonomy to decide sufficiency."
318
+ else:
319
+ criteria = self.get_prompt("guidance", "iteration_mode_fixed")
320
+ if criteria:
321
+ return criteria.format(early_threshold=early_threshold)
322
+ # Fallback if YAML not configured
323
+ return f"- **FIXED mode**: Be CONSERVATIVE about declaring sufficiency. Early threshold: {early_threshold}"
324
+
325
+ async def check_sufficiency(
326
+ self,
327
+ topic: str,
328
+ overview: str,
329
+ current_knowledge: str,
330
+ iteration: int,
331
+ used_tools: list[str] | None = None,
332
+ ) -> dict[str, Any]:
333
+ system_prompt = self.get_prompt("system", "role")
334
+ if not system_prompt:
335
+ raise ValueError(
336
+ "ResearchAgent missing system prompt, please configure system.role in prompts/{lang}/research_agent.yaml"
337
+ )
338
+ user_prompt_template = self.get_prompt("process", "check_sufficiency")
339
+ if not user_prompt_template:
340
+ raise ValueError(
341
+ "ResearchAgent missing check_sufficiency prompt, please configure process.check_sufficiency in prompts/{lang}/research_agent.yaml"
342
+ )
343
+
344
+ # Generate online search guidance (if web_search or paper_search is enabled)
345
+ online_search_instruction = self._generate_online_search_instruction()
346
+
347
+ # Generate research depth guidance
348
+ research_depth_guidance = self._generate_research_depth_guidance(
349
+ iteration, used_tools or []
350
+ )
351
+
352
+ # Generate iteration mode specific criteria
353
+ iteration_mode_criteria = self._generate_iteration_mode_criteria(iteration)
354
+
355
+ # Use safe_format to avoid conflicts with LaTeX braces like {\rho}
356
+ user_prompt = self._safe_format(
357
+ user_prompt_template,
358
+ topic=topic,
359
+ overview=overview,
360
+ current_knowledge=current_knowledge if current_knowledge else "(None)",
361
+ iteration=iteration,
362
+ max_iterations=self.max_iterations,
363
+ online_search_instruction=online_search_instruction,
364
+ research_depth_guidance=research_depth_guidance,
365
+ iteration_mode_criteria=iteration_mode_criteria,
366
+ )
367
+ response = await self.call_llm(
368
+ user_prompt=user_prompt,
369
+ system_prompt=system_prompt,
370
+ stage="check_sufficiency",
371
+ verbose=False,
372
+ )
373
+ from ..utils.json_utils import ensure_json_dict, ensure_keys
374
+
375
+ data = extract_json_from_text(response)
376
+ obj = ensure_json_dict(data)
377
+ ensure_keys(obj, ["is_sufficient", "reason"])
378
+ return obj
379
+
380
+ async def generate_query_plan(
381
+ self,
382
+ topic: str,
383
+ overview: str,
384
+ current_knowledge: str,
385
+ iteration: int,
386
+ existing_topics: list[str] | None = None,
387
+ used_tools: list[str] | None = None,
388
+ ) -> dict[str, Any]:
389
+ system_prompt = self.get_prompt("system", "role")
390
+ if not system_prompt:
391
+ raise ValueError(
392
+ "ResearchAgent missing system prompt, please configure system.role in prompts/{lang}/research_agent.yaml"
393
+ )
394
+ user_prompt_template = self.get_prompt("process", "generate_query_plan")
395
+ if not user_prompt_template:
396
+ raise ValueError(
397
+ "ResearchAgent missing generate_query_plan prompt, please configure process.generate_query_plan in prompts/{lang}/research_agent.yaml"
398
+ )
399
+ topics_text = "(No other topics)"
400
+ if existing_topics:
401
+ topics_text = "\n".join([f"- {t}" for t in existing_topics])
402
+
403
+ # Generate available tools list based on configuration (only enabled tools)
404
+ available_tools_text = self._generate_available_tools_text()
405
+
406
+ # Generate tool phase guidance based on enabled tools
407
+ tool_phase_guidance = self._generate_tool_phase_guidance()
408
+
409
+ # Generate research depth guidance
410
+ research_depth_guidance = self._generate_research_depth_guidance(
411
+ iteration, used_tools or []
412
+ )
413
+
414
+ # Use safe_format to avoid conflicts with LaTeX braces like {\rho}
415
+ user_prompt = self._safe_format(
416
+ user_prompt_template,
417
+ topic=topic,
418
+ overview=overview,
419
+ current_knowledge=current_knowledge[:2000] if current_knowledge else "(None)",
420
+ iteration=iteration,
421
+ max_iterations=self.max_iterations,
422
+ existing_topics=topics_text,
423
+ available_tools=available_tools_text,
424
+ tool_phase_guidance=tool_phase_guidance,
425
+ research_depth_guidance=research_depth_guidance,
426
+ )
427
+ response = await self.call_llm(
428
+ user_prompt=user_prompt,
429
+ system_prompt=system_prompt,
430
+ stage="generate_query_plan",
431
+ verbose=False,
432
+ )
433
+ from ..utils.json_utils import ensure_json_dict, ensure_keys
434
+
435
+ data = extract_json_from_text(response)
436
+ obj = ensure_json_dict(data)
437
+ ensure_keys(obj, ["query", "tool_type", "rationale"])
438
+ return obj
439
+
440
+ async def process(
441
+ self,
442
+ topic_block: TopicBlock,
443
+ call_tool_callback: Callable[[str, str], Awaitable[str]],
444
+ note_agent,
445
+ citation_manager,
446
+ queue: DynamicTopicQueue,
447
+ manager_agent,
448
+ config: dict[str, Any],
449
+ progress_callback: Callable[[str, Any], None] | None = None,
450
+ ) -> dict[str, Any]:
451
+ """
452
+ Execute research for a single topic block (complete multi-round retrieval loop)
453
+
454
+ Args:
455
+ topic_block: Topic block to research
456
+ call_tool_callback: Tool call callback function (tool_type, query) -> raw_answer
457
+ note_agent: NoteAgent instance for generating summaries
458
+ citation_manager: CitationManager instance for managing citations
459
+ queue: DynamicTopicQueue instance for getting existing topic list
460
+ manager_agent: ManagerAgent instance for adding new topics
461
+ config: Configuration dictionary for getting parameters
462
+ progress_callback: Optional callback for iteration progress (event_type, **data)
463
+
464
+ Returns:
465
+ Research result
466
+ {
467
+ "block_id": str,
468
+ "iterations": int,
469
+ "final_knowledge": str,
470
+ "tools_used": List[str],
471
+ "queries_used": List[dict],
472
+ "status": str
473
+ }
474
+ """
475
+ block_id_prefix = f"[{topic_block.block_id}]"
476
+ print(f"\n{block_id_prefix} {'=' * 70}")
477
+ print(f"{block_id_prefix} 🔬 ResearchAgent - Executing Research")
478
+ print(f"{block_id_prefix} {'=' * 70}")
479
+ print(f"{block_id_prefix} Topic: {topic_block.sub_topic}")
480
+ print(f"{block_id_prefix} Overview: {topic_block.overview}")
481
+ print(
482
+ f"{block_id_prefix} Max iterations: {self.max_iterations}, Mode: {self.iteration_mode}\n"
483
+ )
484
+
485
+ iteration = 0
486
+ current_knowledge = ""
487
+ tools_used = []
488
+ queries_used = [] # Track all queries for progress display
489
+
490
+ # Helper to send progress updates
491
+ def send_progress(event_type: str, **data):
492
+ if progress_callback:
493
+ try:
494
+ progress_callback(event_type, **data)
495
+ except Exception:
496
+ pass # Ignore callback errors
497
+
498
+ while iteration < self.max_iterations:
499
+ iteration += 1
500
+ print(f"{block_id_prefix} \n【Iteration {iteration}/{self.max_iterations}】")
501
+
502
+ # Send iteration started progress
503
+ send_progress(
504
+ "iteration_started",
505
+ iteration=iteration,
506
+ max_iterations=self.max_iterations,
507
+ tools_used=tools_used.copy(),
508
+ )
509
+
510
+ # Step 1: Check if knowledge is sufficient
511
+ send_progress(
512
+ "checking_sufficiency", iteration=iteration, max_iterations=self.max_iterations
513
+ )
514
+ suff = await self.check_sufficiency(
515
+ topic=topic_block.sub_topic,
516
+ overview=topic_block.overview,
517
+ current_knowledge=current_knowledge,
518
+ iteration=iteration,
519
+ used_tools=tools_used,
520
+ )
521
+
522
+ if suff.get("is_sufficient", False):
523
+ print(
524
+ f"{block_id_prefix} ✓ Current topic is sufficient, ending research for this topic"
525
+ )
526
+ send_progress(
527
+ "knowledge_sufficient",
528
+ iteration=iteration,
529
+ max_iterations=self.max_iterations,
530
+ reason=suff.get("reason", ""),
531
+ )
532
+ break
533
+
534
+ # Step 2: Generate query plan
535
+ send_progress(
536
+ "generating_query", iteration=iteration, max_iterations=self.max_iterations
537
+ )
538
+ plan = await self.generate_query_plan(
539
+ topic=topic_block.sub_topic,
540
+ overview=topic_block.overview,
541
+ current_knowledge=current_knowledge,
542
+ iteration=iteration,
543
+ existing_topics=queue.list_topics(),
544
+ used_tools=tools_used,
545
+ )
546
+
547
+ # Dynamic splitting: if new topic is discovered, add to queue tail
548
+ new_topic = plan.get("new_sub_topic")
549
+ new_overview = plan.get("new_overview")
550
+ new_topic_score = float(plan.get("new_topic_score") or 0)
551
+ should_add_new_topic = plan.get("should_add_new_topic")
552
+ min_score = config.get("researching", {}).get("new_topic_min_score", 0.75)
553
+ new_topic_reason = plan.get("new_topic_reason")
554
+
555
+ if isinstance(new_topic, str) and new_topic.strip():
556
+ trimmed_topic = new_topic.strip()
557
+ if should_add_new_topic is False:
558
+ print(
559
+ f"{block_id_prefix} ↩️ LLM determined not to add new topic《{trimmed_topic}》, skipping"
560
+ )
561
+ elif new_topic_score < min_score:
562
+ print(
563
+ f"{block_id_prefix} ↩️ New topic《{trimmed_topic}》score {new_topic_score:.2f} below threshold {min_score:.2f}, skipping"
564
+ )
565
+ else:
566
+ # Support both sync and async manager_agent
567
+ import inspect
568
+
569
+ add_topic_method = getattr(manager_agent, "add_new_topic")
570
+ if inspect.iscoroutinefunction(add_topic_method):
571
+ added = await add_topic_method(trimmed_topic, new_overview or "")
572
+ else:
573
+ added = manager_agent.add_new_topic(trimmed_topic, new_overview or "")
574
+ if added:
575
+ print(f"{block_id_prefix} ✓ Added new topic《{trimmed_topic}》to queue")
576
+ send_progress(
577
+ "new_topic_added",
578
+ iteration=iteration,
579
+ max_iterations=self.max_iterations,
580
+ new_topic=trimmed_topic,
581
+ new_overview=new_overview or "",
582
+ )
583
+ if new_topic_reason:
584
+ print(f"{block_id_prefix} Reason: {new_topic_reason}")
585
+
586
+ query = plan.get("query", "").strip()
587
+ tool_type = plan.get("tool_type", "rag_hybrid")
588
+ rationale = plan.get("rationale", "")
589
+
590
+ if not query:
591
+ print(f"{block_id_prefix} ⚠️ Generated query is empty, skipping this iteration")
592
+ send_progress(
593
+ "query_empty", iteration=iteration, max_iterations=self.max_iterations
594
+ )
595
+ continue
596
+
597
+ # Track this query
598
+ query_info = {
599
+ "query": query,
600
+ "tool_type": tool_type,
601
+ "rationale": rationale,
602
+ "iteration": iteration,
603
+ }
604
+ queries_used.append(query_info)
605
+
606
+ # Send progress before tool call
607
+ send_progress(
608
+ "tool_calling",
609
+ iteration=iteration,
610
+ max_iterations=self.max_iterations,
611
+ tool_type=tool_type,
612
+ query=query,
613
+ rationale=rationale,
614
+ )
615
+
616
+ # Step 3: Call tool
617
+ raw_answer = await call_tool_callback(tool_type, query)
618
+
619
+ # Send progress after tool call
620
+ send_progress(
621
+ "tool_completed",
622
+ iteration=iteration,
623
+ max_iterations=self.max_iterations,
624
+ tool_type=tool_type,
625
+ query=query,
626
+ )
627
+
628
+ # Step 4: Get citation ID from CitationManager (unified ID generation)
629
+ send_progress(
630
+ "processing_notes", iteration=iteration, max_iterations=self.max_iterations
631
+ )
632
+
633
+ # Get citation_id from CitationManager - support both sync and async
634
+ import inspect
635
+
636
+ if hasattr(
637
+ citation_manager, "get_next_citation_id_async"
638
+ ) and inspect.iscoroutinefunction(
639
+ getattr(citation_manager, "get_next_citation_id_async", None)
640
+ ):
641
+ citation_id = await citation_manager.get_next_citation_id_async(
642
+ stage="research", block_id=topic_block.block_id
643
+ )
644
+ else:
645
+ citation_id = citation_manager.get_next_citation_id(
646
+ stage="research", block_id=topic_block.block_id
647
+ )
648
+
649
+ # Step 5: NoteAgent records summary with the citation ID
650
+ trace = await note_agent.process(
651
+ tool_type=tool_type,
652
+ query=query,
653
+ raw_answer=raw_answer,
654
+ citation_id=citation_id,
655
+ topic=topic_block.sub_topic,
656
+ context=current_knowledge,
657
+ )
658
+ topic_block.add_tool_trace(trace)
659
+
660
+ # Step 6: Add citation information to citation manager
661
+ # Support both sync and async citation_manager
662
+ if hasattr(citation_manager, "add_citation") and callable(
663
+ getattr(citation_manager, "add_citation", None)
664
+ ):
665
+ add_citation_method = getattr(citation_manager, "add_citation")
666
+ if inspect.iscoroutinefunction(add_citation_method):
667
+ await add_citation_method(
668
+ citation_id=citation_id,
669
+ tool_type=tool_type,
670
+ tool_trace=trace,
671
+ raw_answer=raw_answer,
672
+ )
673
+ else:
674
+ citation_manager.add_citation(
675
+ citation_id=citation_id,
676
+ tool_type=tool_type,
677
+ tool_trace=trace,
678
+ raw_answer=raw_answer,
679
+ )
680
+ else:
681
+ # Fallback to sync version
682
+ citation_manager.add_citation(
683
+ citation_id=citation_id,
684
+ tool_type=tool_type,
685
+ tool_trace=trace,
686
+ raw_answer=raw_answer,
687
+ )
688
+
689
+ # Step 7: Update knowledge (accumulate summaries)
690
+ current_knowledge = (current_knowledge + "\n" + trace.summary).strip()
691
+ topic_block.iteration_count = iteration
692
+ tools_used.append(tool_type)
693
+
694
+ # Send iteration completed progress
695
+ send_progress(
696
+ "iteration_completed",
697
+ iteration=iteration,
698
+ max_iterations=self.max_iterations,
699
+ tool_type=tool_type,
700
+ query=query,
701
+ tools_used=tools_used.copy(),
702
+ )
703
+
704
+ return {
705
+ "block_id": topic_block.block_id,
706
+ "iterations": iteration,
707
+ "final_knowledge": current_knowledge,
708
+ "tools_used": tools_used,
709
+ "queries_used": queries_used,
710
+ "status": "completed" if iteration < self.max_iterations else "max_iterations_reached",
711
+ }
712
+
713
+
714
+ __all__ = ["ResearchAgent"]